snappy-fox/snappy-fox.c

632 lines
16 KiB
C
Raw Normal View History

2021-01-24 12:27:39 +01:00
/**
* Snappy-fox -- Firefox Morgue Cache de-compressor
* Copyright (C) 2021 Davide Berardi <berardi.dav@gmail.com>
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
#define _POSIX_C_SOURCE 1
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
2024-02-19 09:49:03 +01:00
#include <getopt.h>
2021-01-24 12:27:39 +01:00
#define MAX_COMPRESSED_DATA_SIZE 16777211
#define MAX_UNCOMPRESSED_DATA_SIZE 65536
#ifdef DEBUG
#define prdebug(f...) fprintf(stderr, "[ DEBUG ]"), fprintf(stderr, f)
#define prinfo(f...) fprintf(stderr, "[ INFO ]"), fprintf(stderr, f)
#else
#define prdebug(f...)
#define prinfo(f...)
#endif
#define prbanner(f...) fprintf(stderr, f)
#define prerror(f...) fprintf(stderr, "[ ERROR ]"), fprintf(stderr, f)
2024-02-19 09:49:03 +01:00
#ifndef VERSION
#define VERSION "unknown"
#endif
2024-02-19 10:10:06 +01:00
/* Flags */
/* Unframed file stream, by default assume framed file */
static uint32_t unframed_stream = 0;
2024-02-19 11:08:01 +01:00
/* Ignore offset errors, by default consider them as fatal errors*/
static uint32_t ignore_offset_errors = 0;
/* Byte to substitute offset corrupted values with */
static uint8_t offset_dummy_byte = 0xff;
2024-02-19 11:19:42 +01:00
/* Read offset */
static uint32_t read_offset = 0;
2024-02-19 10:10:06 +01:00
/* Logarithm base two of the number */
2021-01-24 19:06:43 +01:00
static uint32_t log2_32(uint32_t n) {
2021-01-24 18:42:40 +01:00
int32_t i = 0;
for (i = 31; i >= 0; --i) {
2021-01-24 19:06:43 +01:00
if (n & (1ul << i))
2021-01-24 18:42:40 +01:00
return i + 1;
}
return 0;
}
static int check_overflow_shift(uint8_t c, uint32_t shift, uint32_t length) {
/* Trivial check */
if (c == 0 || shift == 0)
return 0;
/* The right value will overflow the number */
if (7*shift + log2_32(c) > 31)
return 1;
return 0;
}
static uint32_t get_length(uint8_t *data, uint32_t length, uint32_t *bytes) {
2021-01-24 12:27:39 +01:00
uint32_t l = 0;
uint32_t shift = 0;
uint8_t c = 0;
uint8_t cbit = 1;
while (cbit != 0) {
c = *data;
2021-01-24 19:06:43 +01:00
/* Return error */
2021-01-24 18:42:40 +01:00
if (check_overflow_shift(c, shift, length))
return MAX_UNCOMPRESSED_DATA_SIZE + 1;
2021-01-24 12:27:39 +01:00
cbit = c & 0x80;
c = c & ~0x80;
l |= c << (7*shift);
data++;
shift++;
(*bytes)++;
}
return l;
}
static int32_t parse_literal(uint8_t *cdata, uint32_t cidx, uint32_t clength,
uint8_t *data, uint32_t *idx, uint32_t length) {
int32_t lenval = 0;
uint32_t bytes_to_read = 0;
uint32_t offsetval = 0;
uint32_t lenval_u = 0;
uint32_t clen = (uint32_t)(cdata[cidx] & 0xfc) >> 2;
if (clen < 60) {
bytes_to_read = 0;
} else {
bytes_to_read = clen - 59;
clen = 0;
memcpy(&clen, &cdata[cidx + 1], bytes_to_read);
}
clen += 1;
offsetval = cidx + bytes_to_read + 1;
if (offsetval > clength)
return -1;
/* Check integer overflow */
lenval_u = clen + bytes_to_read +1;
if (lenval_u > (uint32_t)(UINT32_MAX / 2))
return -1;
lenval = (int32_t)lenval_u;
2021-01-26 17:23:03 +01:00
if (*idx > length || clen > length)
return -1;
2021-01-24 12:27:39 +01:00
if (*idx + clen > length)
2021-01-26 17:23:03 +01:00
return -1;
2021-01-24 12:27:39 +01:00
2024-02-19 09:50:06 +01:00
prdebug("Copying literal %d bytes at (u:%d c:%d (%u))\n",
2021-01-24 12:27:39 +01:00
clen, *idx, offsetval, offsetval);
memcpy(&data[*idx], &cdata[offsetval], clen);
*idx += clen;
return lenval;
}
static int offsetread(uint8_t *data, uint32_t *idx, uint32_t length,
uint32_t clen, uint32_t coff) {
2024-02-19 11:08:01 +01:00
int ret = 0;
2021-01-24 12:27:39 +01:00
uint32_t i;
prdebug("Copying %d bytes offset %d (pos: %d)\n",
clen, coff, *idx);
/* Ignore invalid offset */
if (*idx < coff || coff == 0)
2024-02-19 11:08:01 +01:00
ret = -1;
2021-01-24 12:27:39 +01:00
if (*idx + clen > length)
2024-02-19 11:08:01 +01:00
ret = -1;
2021-01-24 12:27:39 +01:00
2024-02-19 11:08:01 +01:00
/* Check if we can ignore errors */
if (ret != 0 && !ignore_offset_errors) {
prerror("Offset error\n");
} else if (ret != 0 && ignore_offset_errors) {
prinfo("Ignoring offset errors\n");
for (i = 0; i < clen; ++i)
data[*idx+i] = offset_dummy_byte;
*idx = *idx + clen;
ret = 0;
} else if (coff >= clen) {
2021-01-24 12:27:39 +01:00
memcpy(&data[*idx], &data[*idx - coff], clen);
*idx += clen;
} else {
for (i = 0; i < clen / coff ; ++i) {
memcpy(&data[*idx], &data[*idx - coff], coff);
*idx += coff;
}
memcpy(&data[*idx], &data[*idx - coff], clen % coff);
*idx += clen % coff;
}
2024-02-19 11:08:01 +01:00
return ret;
2021-01-24 12:27:39 +01:00
}
static int32_t parse_copy1(uint8_t *cdata, uint32_t cidx, uint32_t clength,
uint8_t *data, uint32_t *idx, uint32_t length) {
int ret = 0;
uint32_t clen = (uint32_t)((cdata[cidx] & 0x1c) >> 2) + 4;
uint32_t coff = (uint32_t)((cdata[cidx] & 0xe0)) << 3;
coff |= cdata[cidx+1];
if ((ret = offsetread(data, idx, length, clen, coff)) != 0)
return ret;
return 2;
}
static int32_t parse_copy2(uint8_t *cdata, uint32_t cidx, uint32_t clength,
uint8_t *data, uint32_t *idx, uint32_t length) {
int ret = 0;
uint32_t clen = (uint32_t)((cdata[cidx] & 0xfc) >> 2) + 1;
uint32_t coff = 0;
memcpy(&coff, &cdata[cidx+1], 2);
if ((ret = offsetread(data, idx, length, clen, coff)) != 0)
return ret;
return 3;
}
static int32_t parse_copy4(uint8_t *cdata, uint32_t cidx, uint32_t clength,
uint8_t *data, uint32_t *idx, uint32_t length) {
int ret = 0;
uint32_t clen = (uint32_t)((cdata[cidx] & 0xfc) >> 2) + 1;
uint32_t coff = 0;
memcpy(&coff, &cdata[cidx+1], 4);
if ((ret = offsetread(data, idx, length, clen, coff)) != 0)
return ret;
return 5;
}
static int32_t parse_compressed_type(uint8_t compressed_type,
uint8_t *cdata, uint32_t cidx, uint32_t clen,
uint8_t *data, uint32_t *idx, uint32_t len) {
switch (compressed_type) {
case 0:
/* Literal stream */
prdebug("Found Literal stream\n");
return parse_literal(cdata, cidx, clen, data, idx, len);
case 1:
/* 1 byte offset */
prdebug("Found single byte offset stream\n");
return parse_copy1(cdata, cidx, clen, data, idx, len);
case 2:
/* 2 byte offset */
prdebug("Found two bytes offset stream\n");
return parse_copy2(cdata, cidx, clen, data, idx, len);
case 3:
/* 4 byte offset */
prdebug("Found four bytes offset stream\n");
return parse_copy4(cdata, cidx, clen, data, idx, len);
default:
prerror("Impossible compressed type!\n");
return -1;
}
}
static int snappy_uncompress(FILE *out, uint8_t *cdata, size_t clength,
uint8_t *data, size_t length, uint32_t *idx) {
int32_t off = 0;
uint32_t cidx = 0;
uint32_t bytes = 0;
uint32_t len = 0;
uint8_t ctype = 0;
*idx = 0;
prdebug("Decompressing %ld bytes\n", clength);
len = get_length(cdata, clength, &bytes);
prdebug("Uncompressed Length %d\n", len);
if (len > MAX_UNCOMPRESSED_DATA_SIZE)
return -1;
cidx = bytes;
while (cidx < clength && *idx < length) {
ctype = cdata[cidx] & 0x03;
off = parse_compressed_type(ctype, cdata, cidx, clength,
data, idx, len);
if (off < 0) {
if (fwrite(data, 1, *idx, out) != *idx)
return off;
return off;
}
cidx += off;
}
return 0;
}
static FILE *open_read_file(const char *file) {
FILE *in = stdin;
if (strcmp(file, "-") != 0)
in = fopen(file, "rb");
2024-02-19 11:08:01 +01:00
prdebug("Opening IN file: %s\n", file);
2024-02-19 11:19:42 +01:00
if (in == NULL)
return in;
if (read_offset != 0) {
prinfo("Seeking to offset %d\n", read_offset);
fseek(in, read_offset, SEEK_SET);
}
2021-01-24 12:27:39 +01:00
return in;
}
static FILE *open_write_file(const char *file) {
FILE *out = stdout;
if (strcmp(file, "-") != 0)
out = fopen(file, "wb");
2024-02-19 11:08:01 +01:00
prdebug("Opening OUT file: %s\n", file);
2021-01-24 12:27:39 +01:00
return out;
}
static int close_file(FILE *f) {
if (f == stdin || f == stdout)
return 0;
return fclose(f);
}
static uint8_t get_chunktype(FILE *in) {
uint8_t chunktype;
if (fread(&chunktype, 1, 1, in) != 1)
return 0x27;
return chunktype;
}
static int parse_stream_identifier(FILE *in) {
uint8_t stream_identifier[9];
uint8_t reference_identifier[] = {
0x06, 0x00, 0x00, 0x73,
0x4e, 0x61, 0x50, 0x70,
0x59
};
if (fread(stream_identifier, 9, 1, in) < 1)
return -1;
if (memcmp(reference_identifier, stream_identifier, 9) != 0)
return -1;
return 0;
}
static int parse_compressed_data_chunk(FILE *in, FILE *out) {
int ret = 0;
size_t r = 0;
uint8_t *c_data = NULL;
uint8_t *data = NULL;
/* Compressed data */
uint32_t c_length = 0;
uint32_t c_read_length = 0;
uint32_t crc = 0;
uint32_t idx = 0;
c_data = malloc(MAX_COMPRESSED_DATA_SIZE);
if (c_data == NULL) {
ret = -1;
goto exit_point;
}
data = malloc(MAX_UNCOMPRESSED_DATA_SIZE);
if (data == NULL) {
ret = -1;
goto free_c_data;
}
r = fread(&c_length, 1, 3, in);
if (r == 0) {
ret = 0;
goto return_point;
} else if (r < 3) {
ret = -1;
goto return_point;
}
r = fread(&crc, 1, 4, in);
if (r == 0) {
ret = 0;
goto return_point;
} else if (r < 4) {
ret = -1;
goto return_point;
}
if (c_length > MAX_COMPRESSED_DATA_SIZE) {
ret = -1;
goto return_point;
}
c_length--;
prdebug("Compressed data chunk, len %d\n", c_length);
if (c_length > MAX_COMPRESSED_DATA_SIZE) {
ret = -1;
goto return_point;
}
c_read_length = fread(c_data, 1, c_length - 3, in);
if ((ret = snappy_uncompress(out, c_data, c_read_length,
data, MAX_UNCOMPRESSED_DATA_SIZE, &idx)) != 0) {
goto return_point;
}
prinfo("End of decompression %lx\n", ftell(in));
if (fwrite(data, 1, idx, out) < idx) {
perror("fwrite");
ret = -1;
goto return_point;
}
return_point:
free(data);
free_c_data:
free(c_data);
exit_point:
return ret;
}
static int parse_unknown_chunktype(uint8_t chunktype) {
if (chunktype == 0x27) {
return 0;
} else if (chunktype > 0x27 && chunktype <= 0x7f) {
prerror("[frame] Unskippable chunk encountered %02hhx\n",
(unsigned char) chunktype);
return 1;
} else {
prerror("[frame] Skipping chunk %02hhx\n",
(unsigned char) chunktype);
return 0;
}
}
static int parse_chunk(FILE *in, FILE *out, uint8_t chunktype) {
prinfo("Got chunk %d\n", chunktype);
switch (chunktype) {
case 0xff:
return parse_stream_identifier(in);
case 0x00:
return parse_compressed_data_chunk(in, out);
case 0x01:
/* TODO */
// return parse_uncompressed_data_chunk(in, out);
return -1;
case 0xfe:
/* TODO */
// return parse_padding(in, out);
return -1;
default:
return parse_unknown_chunktype(chunktype);
}
}
2024-02-19 10:10:06 +01:00
static int snappy_decompress_unframed(FILE *in, FILE *out) {
int ret = 0;
int32_t r = 0;
uint32_t read_head = 0;
uint32_t write_head = 0;
uint8_t *inbuf, *outbuf;
int32_t read_size = MAX_COMPRESSED_DATA_SIZE;
int32_t write_size = MAX_COMPRESSED_DATA_SIZE;
inbuf = malloc(read_size);
if (inbuf == NULL) {
ret = -1;
goto return_point;
}
outbuf = malloc(write_size);
if (outbuf == NULL) {
ret = -1;
goto free_in;
}
read_size = fread(inbuf, 1, read_size, in);
if (read_size <= 0) {
ret = read_size;
goto free_out;
}
while (read_head < read_size) {
/* Skip unvalid compressed types, sledge */
uint8_t ctype = inbuf[read_head] & 0x03;
r = parse_compressed_type(ctype, inbuf, read_head, read_size,
outbuf, &write_head, write_size);
if (r < 0) {
prerror("parse_compressed_type: %d\n", r);
return r;
}
read_head += r;
prinfo("offset: %u\n", read_head);
}
r = fwrite(outbuf, 1, write_head, out);
if (r < 0)
ret = r;
free_out:
free(outbuf);
free_in:
free(inbuf);
return_point:
return ret;
}
static int snappy_decompress_framed(FILE *in, FILE *out) {
2021-01-24 12:27:39 +01:00
int ret = 0;
uint8_t chunktype;
while (feof(in) == 0 && ferror(in) == 0 && ret == 0) {
chunktype = get_chunktype(in);
ret = parse_chunk(in, out, chunktype);
prdebug("New run %ld %d %d\n", ftell(in), feof(in), ferror(in));
}
return ret;
}
2024-02-19 09:49:03 +01:00
static void version(const char *progname) {
fprintf(stderr, "%s Version: %s\n", progname, VERSION);
}
2021-01-24 12:27:39 +01:00
static void usage(const char *progname) {
2024-02-19 09:49:03 +01:00
fprintf(stderr, "Usage %s [options] <input file> <output file>\n",
progname);
2021-01-24 12:27:39 +01:00
fprintf(stderr, " files can be specified as - for stdin or stdout\n");
2024-02-19 09:49:03 +01:00
fprintf(stderr, " Options:\n");
2024-02-19 11:08:01 +01:00
fprintf(stderr, " -E --ignore_offset_errors [substitution byte] Ignore any offset errors that occurs\n");
2024-02-19 11:19:42 +01:00
fprintf(stderr, " -O --read_offset [offset] Start reading file from offset\n");
fprintf(stderr, " -u --unframed Assume Unframed stream in input file\n");
fprintf(stderr, " -h --help This Help\n");
fprintf(stderr, " -v --version Print Version and exit\n");
2021-01-24 12:27:39 +01:00
}
int main(int argc, char **argv) {
2024-02-19 09:49:03 +01:00
int c;
2021-01-24 12:27:39 +01:00
int ret = 0;
FILE *in, *out;
2024-02-19 09:49:03 +01:00
int option_idx = 0;
static struct option flags[] = {
2024-02-19 11:08:01 +01:00
{"ignore_offset_errors", optional_argument, 0, 'E'},
2024-02-19 11:19:42 +01:00
{"read_offset", required_argument, 0, 'O'},
2024-02-19 11:08:01 +01:00
{"unframed", no_argument, 0, 'u'},
{"version", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
2024-02-19 09:49:03 +01:00
};
while (c != -1) {
2024-02-19 11:19:42 +01:00
c = getopt_long(argc, argv, "O:E::uhv", flags, &option_idx);
2024-02-19 09:49:03 +01:00
switch (c) {
2024-02-19 11:08:01 +01:00
case 'E':
ignore_offset_errors = 1;
/* Set the dummy byte to the passed value */
if (optarg != NULL)
offset_dummy_byte = (strtol(optarg, NULL, 0) & 0xff);
break;
2024-02-19 11:19:42 +01:00
case 'O':
if (optarg != NULL)
read_offset = strtol(optarg, NULL, 0);
break;
2024-02-19 10:10:06 +01:00
case 'u':
unframed_stream = 1;
break;
2024-02-19 09:49:03 +01:00
case 'h':
usage(argv[0]);
return 0;
case 'v':
version(argv[0]);
return 0;
default:
prerror("Unknown Option: %c\n", c);
continue;
case -1:
break;
}
}
prdebug("Starting snappy-fox\n");
2021-01-24 12:27:39 +01:00
2024-02-19 11:08:01 +01:00
if (argc - optind < 2) {
2021-01-24 12:27:39 +01:00
usage(argv[0]);
return 1;
}
2021-01-24 12:28:41 +01:00
#ifdef __AFL_LOOP
while (__AFL_LOOP(1000)) {
#endif
2024-02-19 11:08:01 +01:00
in = open_read_file(argv[optind]);
2021-01-24 12:27:39 +01:00
if (in == NULL) {
2024-02-19 10:10:06 +01:00
perror("fopen read");
2021-01-24 12:27:39 +01:00
ret = 1;
goto exit_point;
}
2024-02-19 11:08:01 +01:00
out = open_write_file(argv[optind + 1]);
2021-01-24 12:27:39 +01:00
if (out == NULL) {
2024-02-19 10:10:06 +01:00
perror("fopen write");
2021-01-24 12:27:39 +01:00
ret = 1;
goto close_in;
}
2024-02-19 10:10:06 +01:00
if (unframed_stream == 0)
ret = snappy_decompress_framed(in, out);
else
ret = snappy_decompress_unframed(in, out);
if (ret != 0) {
2021-01-24 12:27:39 +01:00
prerror("decompress %d\n", ret);
goto return_point;
}
return_point:
if (close_file(out) != 0)
perror("close");
close_in:
if (close_file(in) != 0)
perror("close");
exit_point:
prdebug("Exiting %d\n", ret);
2021-01-24 12:28:41 +01:00
#ifdef __AFL_LOOP
}
#endif
2021-01-24 12:27:39 +01:00
return ret;
}
2021-01-24 18:42:40 +01:00