diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cfd6b41 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +snappy-fox diff --git a/README.md b/README.md index bdb67d1..334c017 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ GitHub -Snappy-fox is a morgue cache decompressor for Firefox which does not have -dependencies. +Snappy-fox is a Snappy file decompressor (i.e. morgue cache of Firefox) +which does not have dependencies. It can also reconstruct corrupted +files. ## Why? Online applications such as whatsapp web (web.whatsapp.com) save cache @@ -56,3 +57,22 @@ done it will extract all your cache files in the `/tmp/extracted-cache-whatsapp` directory. + +## Example + +You can try the application with the example image present in the +example directory: +```bash +./snappy-fox example/exampleimage.snappy example/exampleimage.jpg +``` + +![example image](https://github.com/berdav/snappy-fox/blob/staging/example/exampleimage.jpg?raw=true) + +`alteredimage.snappy` is a corrupted version of the image, you can see +the retrival performance of the tool using: +```bash +./snappy-fox --ignore_offset_errors example/alteredimage.snappy example/alteredimage.jpg +``` + +![altered image](https://github.com/berdav/snappy-fox/blob/staging/example/alteredimage.jpg?raw=true) + diff --git a/example/alteredimage.jpg b/example/alteredimage.jpg new file mode 100644 index 0000000..56dc0ea Binary files /dev/null and b/example/alteredimage.jpg differ diff --git a/example/alteredimage.snappy b/example/alteredimage.snappy new file mode 100644 index 0000000..7ddf0e5 Binary files /dev/null and b/example/alteredimage.snappy differ diff --git a/example/alterrandom.py b/example/alterrandom.py new file mode 100644 index 0000000..c70979b --- /dev/null +++ b/example/alterrandom.py @@ -0,0 +1,18 @@ +import sys +import random + +prob = int(sys.argv[3]) +substbyte = int(sys.argv[4]) + +with open(sys.argv[1], "rb") as f: + indata = bytearray(f.read()) + +for b in range(len(indata)): + x = random.randint(0, 10000) + if x < prob: + # Alter the data + indata[b] = substbyte + +with open(sys.argv[2], "wb") as f: + f.write(indata) + diff --git a/example/exampleimage.jpg b/example/exampleimage.jpg new file mode 100644 index 0000000..f83e402 Binary files /dev/null and b/example/exampleimage.jpg differ diff --git a/example/exampleimage.snappy b/example/exampleimage.snappy new file mode 100644 index 0000000..0d95620 Binary files /dev/null and b/example/exampleimage.snappy differ diff --git a/snappy-fox.c b/snappy-fox.c index 0345ba2..93f8e48 100644 --- a/snappy-fox.c +++ b/snappy-fox.c @@ -23,6 +23,7 @@ #include #include #include +#include #define MAX_COMPRESSED_DATA_SIZE 16777211 #define MAX_UNCOMPRESSED_DATA_SIZE 65536 @@ -37,6 +38,103 @@ #define prbanner(f...) fprintf(stderr, f) #define prerror(f...) fprintf(stderr, "[ ERROR ]"), fprintf(stderr, f) +#ifndef VERSION +#define VERSION "unknown" +#endif + +/* Flags */ +/* Unframed file stream, by default assume framed file */ +static uint32_t unframed_stream = 0; +/* Ignore offset errors, by default consider them as fatal errors*/ +static uint32_t ignore_offset_errors = 0; +/* Byte to substitute offset corrupted values with */ +static uint8_t offset_dummy_byte = 0xff; +/* Read offset */ +static uint32_t read_offset = 0; +/* Consider CRC Errors */ +static uint32_t consider_crc_errors = 0; + +/* CRC related functions */ +static const uint32_t crc32c_table[] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, + 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, + 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, + 0x5e133c24, 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, 0x9a879fa0, + 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, + 0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, + 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, + 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 0x30e349b1, 0xc288cab2, + 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, + 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, + 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, + 0x67dafa54, 0x95b17957, 0xcba24573, 0x39c9c670, 0x2a993584, + 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, + 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f, + 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, + 0x0f36e6f7, 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 0xeb1fcbad, + 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, + 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e, 0x90a324fa, + 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, + 0xceb018de, 0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, + 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, + 0x563c5f93, 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 0x92a8fc17, + 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, + 0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, + 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, + 0x97baa1ba, 0x84ea524e, 0x7681d14d, 0x2892ed69, 0xdaf96e6a, + 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, + 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, + 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, + 0x1e6dcdee, 0xec064eed, 0xc38d26c4, 0x31e6a5c7, 0x22b65633, + 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, + 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8, + 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, + 0x07198540, 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, + 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, + 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6, 0x88d28022, + 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, + 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, + 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, + 0xad7d5351 +}; + +static uint32_t crc32c_lookup(uint8_t index) { + return crc32c_table[index & 0xff]; +} + +static void crc32c(uint32_t *crc, const uint8_t *data, size_t len) +{ + int i = 0; + for (i = 0 ; i < len; ++i) { + uint32_t tabval = crc32c_lookup(*crc ^ data[i]); + *crc = tabval ^ (*crc >> 8); + } +} + +static void crc32c_init(uint32_t *crc) { + /* Initial value is 0 */ + *crc = 0xffffffff; +} + +static void crc32c_fini(uint32_t *crc) { + /* Final step is to reverse the CRC Value */ + *crc ^= 0xffffffff; + /* Mask the CRC */ + *crc = ((*crc >> 15) | (*crc << 17)) + 0xa282ead8; +} + /* Logarithm base two of the number */ static uint32_t log2_32(uint32_t n) { int32_t i = 0; @@ -117,7 +215,7 @@ static int32_t parse_literal(uint8_t *cdata, uint32_t cidx, uint32_t clength, if (*idx + clen > length) return -1; - prdebug("Copying literal %d bytes at (u:%d c:%d (%lu))\n", + prdebug("Copying literal %d bytes at (u:%d c:%d (%u))\n", clen, *idx, offsetval, offsetval); memcpy(&data[*idx], &cdata[offsetval], clen); @@ -128,18 +226,28 @@ static int32_t parse_literal(uint8_t *cdata, uint32_t cidx, uint32_t clength, static int offsetread(uint8_t *data, uint32_t *idx, uint32_t length, uint32_t clen, uint32_t coff) { + int ret = 0; uint32_t i; prdebug("Copying %d bytes offset %d (pos: %d)\n", clen, coff, *idx); /* Ignore invalid offset */ if (*idx < coff || coff == 0) - return -1; + ret = -1; if (*idx + clen > length) - return -1; + ret = -1; - if (coff >= clen) { + /* Check if we can ignore errors */ + if (ret != 0 && !ignore_offset_errors) { + prerror("Offset error\n"); + } else if (ret != 0 && ignore_offset_errors) { + prinfo("Ignoring offset errors\n"); + for (i = 0; i < clen; ++i) + data[*idx+i] = offset_dummy_byte; + *idx = *idx + clen; + ret = 0; + } else if (coff >= clen) { memcpy(&data[*idx], &data[*idx - coff], clen); *idx += clen; } else { @@ -150,7 +258,7 @@ static int offsetread(uint8_t *data, uint32_t *idx, uint32_t length, memcpy(&data[*idx], &data[*idx - coff], clen % coff); *idx += clen % coff; } - return 0; + return ret; } @@ -224,13 +332,15 @@ static int32_t parse_compressed_type(uint8_t compressed_type, } static int snappy_uncompress(FILE *out, uint8_t *cdata, size_t clength, - uint8_t *data, size_t length, uint32_t *idx) { + uint8_t *data, size_t length, uint32_t *idx, uint32_t *crc) { int32_t off = 0; uint32_t cidx = 0; uint32_t bytes = 0; uint32_t len = 0; uint8_t ctype = 0; + crc32c_init(crc); + *idx = 0; prdebug("Decompressing %ld bytes\n", clength); @@ -248,6 +358,9 @@ static int snappy_uncompress(FILE *out, uint8_t *cdata, size_t clength, off = parse_compressed_type(ctype, cdata, cidx, clength, data, idx, len); if (off < 0) { + /* Calculate CRC */ + crc32c(crc, data, *idx); + crc32c_fini(crc); if (fwrite(data, 1, *idx, out) != *idx) return off; return off; @@ -257,6 +370,9 @@ static int snappy_uncompress(FILE *out, uint8_t *cdata, size_t clength, cidx += off; } + crc32c(crc, data, *idx); + crc32c_fini(crc); + return 0; } @@ -264,6 +380,14 @@ static FILE *open_read_file(const char *file) { FILE *in = stdin; if (strcmp(file, "-") != 0) in = fopen(file, "rb"); + prdebug("Opening IN file: %s\n", file); + if (in == NULL) + return in; + + if (read_offset != 0) { + prinfo("Seeking to offset %d\n", read_offset); + fseek(in, read_offset, SEEK_SET); + } return in; } @@ -271,6 +395,7 @@ static FILE *open_write_file(const char *file) { FILE *out = stdout; if (strcmp(file, "-") != 0) out = fopen(file, "wb"); + prdebug("Opening OUT file: %s\n", file); return out; } @@ -314,6 +439,7 @@ static int parse_compressed_data_chunk(FILE *in, FILE *out) { uint32_t c_read_length = 0; uint32_t crc = 0; uint32_t idx = 0; + uint32_t uncompressed_crc = 0; c_data = malloc(MAX_COMPRESSED_DATA_SIZE); if (c_data == NULL) { @@ -362,12 +488,20 @@ static int parse_compressed_data_chunk(FILE *in, FILE *out) { c_read_length = fread(c_data, 1, c_length - 3, in); if ((ret = snappy_uncompress(out, c_data, c_read_length, - data, MAX_UNCOMPRESSED_DATA_SIZE, &idx)) != 0) { + data, MAX_UNCOMPRESSED_DATA_SIZE, &idx, + &uncompressed_crc)) != 0) { goto return_point; } prinfo("End of decompression %lx\n", ftell(in)); + if (crc != uncompressed_crc) { + prinfo("Corrupted File! Expected CRC: %08x Calculated CRC: %08x\n", crc, uncompressed_crc); + if (consider_crc_errors) { + ret = -1; + goto return_point; + } + } if (fwrite(data, 1, idx, out) < idx) { perror("fwrite"); @@ -417,7 +551,62 @@ static int parse_chunk(FILE *in, FILE *out, uint8_t chunktype) { } } -static int snappy_decompress_frame(FILE *in, FILE *out) { +static int snappy_decompress_unframed(FILE *in, FILE *out) { + int ret = 0; + int32_t r = 0; + uint32_t read_head = 0; + uint32_t write_head = 0; + + uint8_t *inbuf, *outbuf; + + int32_t read_size = MAX_COMPRESSED_DATA_SIZE; + int32_t write_size = MAX_COMPRESSED_DATA_SIZE; + + inbuf = malloc(read_size); + if (inbuf == NULL) { + ret = -1; + goto return_point; + } + + outbuf = malloc(write_size); + if (outbuf == NULL) { + ret = -1; + goto free_in; + } + + read_size = fread(inbuf, 1, read_size, in); + if (read_size <= 0) { + ret = read_size; + goto free_out; + } + + while (read_head < read_size) { + /* Skip unvalid compressed types, sledge */ + uint8_t ctype = inbuf[read_head] & 0x03; + r = parse_compressed_type(ctype, inbuf, read_head, read_size, + outbuf, &write_head, write_size); + if (r < 0) { + prerror("parse_compressed_type: %d\n", r); + return r; + } + + read_head += r; + prinfo("offset: %u\n", read_head); + } + + r = fwrite(outbuf, 1, write_head, out); + if (r < 0) + ret = r; + +free_out: + free(outbuf); +free_in: + free(inbuf); +return_point: + return ret; +} + +static int snappy_decompress_framed(FILE *in, FILE *out) { int ret = 0; uint8_t chunktype; @@ -430,40 +619,105 @@ static int snappy_decompress_frame(FILE *in, FILE *out) { return ret; } +static void version(const char *progname) { + fprintf(stderr, "%s Version: %s\n", progname, VERSION); +} + static void usage(const char *progname) { - fprintf(stderr, "Usage %s \n", progname); + fprintf(stderr, "Usage %s [options] \n", + progname); fprintf(stderr, " files can be specified as - for stdin or stdout\n"); + fprintf(stderr, " Options:\n"); + fprintf(stderr, " -E --ignore_offset_errors [substitution byte] Ignore any offset errors that occurs\n"); + fprintf(stderr, " -O --read_offset [offset] Start reading file from offset\n"); + fprintf(stderr, " -u --unframed Assume Unframed stream in input file\n"); + fprintf(stderr, " -h --help This Help\n"); + fprintf(stderr, " -v --version Print Version and exit\n"); } int main(int argc, char **argv) { + int c = 0; int ret = 0; FILE *in, *out; - prdebug("Starting snappyturtle\n"); + int option_idx = 0; + static struct option flags[] = { + {"consider_crc_errors", no_argument, 0, 'C'}, + {"ignore_offset_errors", optional_argument, 0, 'E'}, + {"read_offset", required_argument, 0, 'O'}, + {"unframed", no_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; - if (argc < 3) { + while (c != -1) { + c = getopt_long(argc, argv, "CO:E::uhv", flags, &option_idx); + switch (c) { + case 'C': + consider_crc_errors = 1; + break; + case 'E': + ignore_offset_errors = 1; + /* Set the dummy byte to the passed value */ + if (optarg != NULL) + offset_dummy_byte = (strtol(optarg, NULL, 0) & 0xff); + break; + case 'O': + if (optarg != NULL) + read_offset = strtol(optarg, NULL, 0); + break; + case 'u': + unframed_stream = 1; + break; + case 'h': + usage(argv[0]); + return 0; + case 'v': + version(argv[0]); + return 0; + default: + prerror("Unknown Option: %c\n", c); + continue; + case -1: + break; + } + } + + prdebug("Starting snappy-fox\n"); + + if (argc - optind < 2) { usage(argv[0]); return 1; } #ifdef __AFL_LOOP - while (__AFL_LOOP(1000)) { + while (__AFL_LOOP(UINT32_MAX)) { #endif - in = open_read_file(argv[1]); + in = open_read_file(argv[optind]); if (in == NULL) { - perror("fopen"); + perror("fopen read"); ret = 1; goto exit_point; } - out = open_write_file(argv[2]); +#ifdef __AFL_LOOP + ftell(in); +#endif + + out = open_write_file(argv[optind + 1]); if (out == NULL) { - perror("fopen"); + perror("fopen write"); ret = 1; goto close_in; } - if ((ret = snappy_decompress_frame(in, out)) != 0) { + if (unframed_stream == 0) + ret = snappy_decompress_framed(in, out); + else + ret = snappy_decompress_unframed(in, out); + + if (ret != 0) { prerror("decompress %d\n", ret); goto return_point; } diff --git a/test/run-tests.sh b/test/run-tests.sh new file mode 100755 index 0000000..5967910 --- /dev/null +++ b/test/run-tests.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +set -eu + +test000() { + echo "[Test 000 ] check compilation" + cd .. + # Standard compilation + echo "[Test 000 a] Debug compilation " + make CFLAGS=-DDEBUG clean all + file ./snappy-fox | grep -q ELF + echo "[Test 000 b] Normal compilation " + make clean all + file ./snappy-fox | grep -q ELF + echo "[Test 000 ] ok" +} + +test001() { + echo "[Test 001 ]check standard framed run" + cd .. + echo "[Test 001 a] normal image" + ./snappy-fox example/exampleimage.snappy example/exampleimage.jpg + file example/exampleimage.jpg | grep -q JPEG + + # The example image is CRC corrupted + echo "[Test 001 b] CRC Corruption" + ! ./snappy-fox --consider_crc_errors \ + example/exampleimage.snappy example/exampleimage.jpg + + # Test on the corrupted image + echo "[Test 001 c] Corrupted image" + ! ./snappy-fox \ + example/alteredimage.snappy example/alteredimage.jpg + echo "[Test 001 c] Corrupted image ignoring offset errors" + ./snappy-fox --ignore_offset_errors \ + example/alteredimage.snappy example/alteredimage.jpg + echo "[Test 001 ] ok" +} + +( test000 ) +( test001 )