# # Foremost configuration file #------------------------------------------------------------------------- # Note the foremost configuration file is provided to support formats which # don't have built-in extraction functions. If the format is built-in to foremost # simply run foremost with -t and provide the format you wish to extract. # # The configuration file is used to control what types of files foremost # searches for. A sample configuration file, foremost.conf, is included with # this distribution. For each file type, the configuration file describes # the file's extension, whether the header and footer are case sensitive, # the maximum file size, and the header and footer for the file. The footer # field is optional, but header, size, case sensitivity, and extension are # not! # # Any line that begins with a '#' is considered a comment and ignored. Thus, # to skip a file type just put a '#' at the beginning of that line # # Headers and footers are decoded before use. To specify a value in # hexadecimal use \x[0-f][0-f], and for octal use \[0-3][0-7][0-7]. Spaces # can be represented by \s. Example: "\x4F\123\I\sCCI" decodes to "OSI CCI". # # To match any single character (aka a wildcard) use a '?'. If you need to # search for the '?' character, you will need to change the 'wildcard' line # *and* every occurrence of the old wildcard character in the configuration # file. Don't forget those hex and octal values! '?' is equal to 0x3f and # \063. # # If you would like to extract files without an extension enter the value # "NONE" in the extension column (note: you can change the value of this # "no suffix" flag by setting the variable FOREMOST_NOEXTENSION_SUFFIX # in foremost.h and recompiling). # # The ASCII option will extract all ASCII printable characters before and after # the keyword provided. # # The NEXT keyword after a footer instructs foremost to search forwards for data # that starts with the header provided and terminates or is followed by data in # the footer -- the footer data is not included in the output. The data in the # footer, when used with the NEXT keyword effectively allows you to search for # data that you know for sure should not be in the output file. This method for # example, lets you search for two 'starting' headers in a document that doesn't # have a good ending footer and you can't say exactly what the footer is, but # you know if you see another header, that should end the search and an output # file should be written. # To redefine the wildcard character, change the setting below and all # occurances in the formost.conf file. # #wildcard ? # # case size header footer #extension sensitive # #--------------------------------------------------------------------- # EXAMPLE WITH NO SUFFIX #--------------------------------------------------------------------- # # Here is an example of how to use the no extension option. Any files # containing the string "FOREMOST" would be extracted to a file without # an extension (eg: 00000000,00000001) # NONE y 1000 FOREMOST # #--------------------------------------------------------------------- # GRAPHICS FILES #--------------------------------------------------------------------- # # # AOL ART files # art y 150000 \x4a\x47\x04\x0e \xcf\xc7\xcb # art y 150000 \x4a\x47\x03\x0e \xd0\xcb\x00\x00 # # GIF and JPG files (very common) # (NOTE THESE FORMATS HAVE BUILTIN EXTRACTION FUNCTION) # gif y 155000000 \x47\x49\x46\x38\x37\x61 \x00\x3b # gif y 155000000 \x47\x49\x46\x38\x39\x61 \x00\x00\x3b # jpg y 20000000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9 # jpg y 20000000 \xff\xd8\xff\xe1 \xff\xd9 # jpg y 20000000 \xff\xd8 \xff\xd9 # # PNG (used in web pages) # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # png y 200000 \x50\x4e\x47? \xff\xfc\xfd\xfe # # # BMP # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # bmp y 100000 BM??\x00\x00\x00 # # TIF # tif y 200000000 \x49\x49\x2a\x00 # #--------------------------------------------------------------------- # ANIMATION FILES #--------------------------------------------------------------------- # # AVI (Windows animation and DiVX/MPEG-4 movies) # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # avi y 4000000 RIFF????AVI # # Apple Quicktime # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # mov y 4000000 ????????\x6d\x6f\x6f\x76 # mov y 4000000 ????????\x6d\x64\x61\x74 # # MPEG Video # mpg y 4000000 mpg eof # mpg y 20000000 \x00\x00\x01\xba \x00\x00\x01\xb9 # mpg y 20000000 \x00\x00\x01\xb3 \x00\x00\x01\xb7 # # Macromedia Flash # fws y 4000000 FWS # #--------------------------------------------------------------------- # MICROSOFT OFFICE #--------------------------------------------------------------------- # # Word documents # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # doc y 12500000 \xd0\xcf\x11\xe0\xa1\xb1 # # Outlook files # pst y 400000000 \x21\x42\x4e\xa5\x6f\xb5\xa6 # ost y 400000000 \x21\x42\x44\x4e # # Outlook Express # dbx y 4000000 \xcf\xad\x12\xfe\xc5\xfd\x74\x6f # idx y 4000000 \x4a\x4d\x46\x39 # mbx y 4000000 \x4a\x4d\x46\x36 # #--------------------------------------------------------------------- # WORDPERFECT #--------------------------------------------------------------------- # # wpc y 100000 ?WPC # #--------------------------------------------------------------------- # HTML (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) #--------------------------------------------------------------------- # # htm n 50000 # #--------------------------------------------------------------------- # ADOBE PDF (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) #--------------------------------------------------------------------- # # pdf y 5000000 %PDF- %EOF # # #--------------------------------------------------------------------- # AOL (AMERICA ONLINE) #--------------------------------------------------------------------- # # AOL Mailbox # mail y 500000 \x41\x4f\x4c\x56\x4d # # # #--------------------------------------------------------------------- # PGP (PRETTY GOOD PRIVACY) #--------------------------------------------------------------------- # # PGP Disk Files # pgd y 500000 \x50\x47\x50\x64\x4d\x41\x49\x4e\x60\x01 # # Public Key Ring # pgp y 100000 \x99\x00 # Security Ring # pgp y 100000 \x95\x01 # pgp y 100000 \x95\x00 # Encrypted Data or ASCII armored keys # pgp y 100000 \xa6\x00 # (there should be a trailer for this...) # txt y 100000 -----BEGIN\040PGP # # #--------------------------------------------------------------------- # RPM (Linux package format) #--------------------------------------------------------------------- # rpm y 1000000 \xed\xab # # #--------------------------------------------------------------------- # SOUND FILES #--------------------------------------------------------------------- # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) # wav y 200000 RIFF????WAVE # # Real Audio Files # ra y 1000000 \x2e\x72\x61\xfd # ra y 1000000 .RMF # # asf y 8000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C # # wmv y 20000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C # # wma y 8000000 \x30\x26\xB2\x75 \x00\x00\x00\xFF # # wma y 8000000 \x30\x26\xB2\x75 \x52\x9A\x12\x46 # # mp3 y 8000000 \xFF\xFB??\x44\x00\x00 # mp3 y 8000000 \x57\x41\x56\45 \x00\x00\xFF\ # mp3 y 8000000 \xFF\xFB\xD0\ \xD1\x35\x51\xCC\ # mp3 y 8000000 \x49\x44\x33\ # mp3 y 8000000 \x4C\x41\x4D\x45\ #--------------------------------------------------------------------- # WINDOWS REGISTRY FILES #--------------------------------------------------------------------- # # Windows NT registry # dat y 4000000 regf # Windows 95 registry # dat y 4000000 CREG # # lnk y 5000 \x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00 # chm y 100000 \x49\x54\x53\x46\x03\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00 # cookie n 4096 id= # rdp y 4096 \xFF\xFE\x73\x00\x63\x00\x72\x00\x65\x00\x65\x00\x6E\x00\x20\x00\x6D # #--------------------------------------------------------------------- # MISCELLANEOUS #--------------------------------------------------------------------- # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) # zip y 10000000 PK\x03\x04 \x3c\xac # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) # rar y 10000000 Rar! # # java y 1000000 \xca\xfe\xba\xbe # # cpp y 20000 #include #include ASCII #--------------------------------------------------------------------- # ScanSoft PaperPort "Max" files #--------------------------------------------------------------------- # max y 1000000 \x56\x69\x47\x46\x6b\x1a\x00\x00\x00\x00 \x00\x00\x05\x80\x00\x00 #--------------------------------------------------------------------- # PINs Password Manager program #--------------------------------------------------------------------- # pins y 8000 \x50\x49\x4e\x53\x20\x34\x2e\x32\x30\x0d