From abf15c735e7b88d1df45d7fcfb129701c8d93568 Mon Sep 17 00:00:00 2001 From: fenix Date: Thu, 23 May 2024 18:57:55 +0200 Subject: [PATCH] Upload files to "/" --- list.cc | 114 +++++ lzip.h | 372 +++++++++++++++++ lzip_index.cc | 218 ++++++++++ lzip_index.h | 93 +++++ main.cc | 1107 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1904 insertions(+) create mode 100644 list.cc create mode 100644 lzip.h create mode 100644 lzip_index.cc create mode 100644 lzip_index.h create mode 100644 main.cc diff --git a/list.cc b/list.cc new file mode 100644 index 0000000..cf68a31 --- /dev/null +++ b/list.cc @@ -0,0 +1,114 @@ +/* Lzip - LZMA lossless data compressor + Copyright (C) 2008-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "lzip_index.h" + + +namespace { + +void list_line( const unsigned long long uncomp_size, + const unsigned long long comp_size, + const char * const input_filename ) + { + if( uncomp_size > 0 ) + std::printf( "%14llu %14llu %6.2f%% %s\n", uncomp_size, comp_size, + 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ), + input_filename ); + else + std::printf( "%14llu %14llu -INF%% %s\n", uncomp_size, comp_size, + input_filename ); + } + +} // end namespace + + +int list_files( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ) + { + unsigned long long total_comp = 0, total_uncomp = 0; + int files = 0, retval = 0; + bool first_post = true; + bool stdin_used = false; + + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const bool from_stdin = ( filenames[i] == "-" ); + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + const char * const input_filename = + from_stdin ? "(stdin)" : filenames[i].c_str(); + struct stat in_stats; // not used + const int infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) { set_retval( retval, 1 ); continue; } + + const Lzip_index lzip_index( infd, cl_opts ); + close( infd ); + if( lzip_index.retval() != 0 ) + { + show_file_error( input_filename, lzip_index.error().c_str() ); + set_retval( retval, lzip_index.retval() ); + continue; + } + if( verbosity < 0 ) continue; + const unsigned long long udata_size = lzip_index.udata_size(); + const unsigned long long cdata_size = lzip_index.cdata_size(); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + const long members = lzip_index.members(); + if( first_post ) + { + first_post = false; + if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout ); + std::fputs( " uncompressed compressed saved name\n", stdout ); + } + if( verbosity >= 1 ) + std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ), + members, lzip_index.file_size() - cdata_size ); + list_line( udata_size, cdata_size, input_filename ); + + if( verbosity >= 2 && members > 1 ) + { + std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); + for( long i = 0; i < members; ++i ) + { + const Block & db = lzip_index.dblock( i ); + const Block & mb = lzip_index.mblock( i ); + std::printf( "%6ld %14llu %14llu %14llu %14llu\n", + i + 1, db.pos(), db.size(), mb.pos(), mb.size() ); + } + first_post = true; // reprint heading after list of members + } + std::fflush( stdout ); + } + if( verbosity >= 0 && files > 1 ) + { + if( verbosity >= 1 ) std::fputs( " ", stdout ); + list_line( total_uncomp, total_comp, "(totals)" ); + std::fflush( stdout ); + } + return retval; + } diff --git a/lzip.h b/lzip.h new file mode 100644 index 0000000..dec033c --- /dev/null +++ b/lzip.h @@ -0,0 +1,372 @@ +/* Lzip - LZMA lossless data compressor + Copyright (C) 2008-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +class State + { + int st; + +public: + enum { states = 12 }; + State() : st( 0 ) {} + int operator()() const { return st; } + bool is_char() const { return st < 7; } + + void set_char() + { + static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + st = next[st]; + } + bool is_char_set_char() { set_char(); return st < 4; } + void set_char_rep() { st = 8; } + void set_match() { st = ( st < 7 ) ? 7 : 10; } + void set_rep() { st = ( st < 7 ) ? 8 : 11; } + void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } + }; + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, + literal_context_bits = 3, + literal_pos_state_bits = 0, // not used + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + len_states = 4, + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << ( end_dis_model / 2 ), // 128 + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, // must be 2 + max_match_len = min_match_len + max_len_symbols - 1, // 273 + min_match_len_limit = 5 }; + +inline int get_len_state( const int len ) + { return std::min( len - min_match_len, len_states - 1 ); } + +inline int get_lit_state( const uint8_t prev_byte ) + { return prev_byte >> ( 8 - literal_context_bits ); } + + +enum { bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; + +struct Bit_model + { + int probability; + void reset() { probability = bit_model_total / 2; } + void reset( const int size ) + { for( int i = 0; i < size; ++i ) this[i].reset(); } + Bit_model() { reset(); } + }; + +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + + void reset() + { + choice1.reset(); + choice2.reset(); + bm_low[0][0].reset( pos_states * len_low_symbols ); + bm_mid[0][0].reset( pos_states * len_mid_symbols ); + bm_high[0].reset( len_high_symbols ); + } + }; + + +// defined in main.cc +extern int verbosity; + +class Pretty_print // requires global var 'int verbosity' + { + std::string name_; + std::string padded_name; + const char * const stdin_name; + unsigned longest_name; + mutable bool first_post; + +public: + Pretty_print( const std::vector< std::string > & filenames ) + : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) + { + if( verbosity <= 0 ) return; + const unsigned stdin_name_len = std::strlen( stdin_name ); + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const std::string & s = filenames[i]; + const unsigned len = ( s == "-" ) ? stdin_name_len : s.size(); + if( longest_name < len ) longest_name = len; + } + if( longest_name == 0 ) longest_name = stdin_name_len; + } + + void set_name( const std::string & filename ) + { + if( filename.size() && filename != "-" ) name_ = filename; + else name_ = stdin_name; + padded_name = " "; padded_name += name_; padded_name += ": "; + if( longest_name > name_.size() ) + padded_name.append( longest_name - name_.size(), ' ' ); + first_post = true; + } + + void reset() const { if( name_.size() ) first_post = true; } + const char * name() const { return name_.c_str(); } + void operator()( const char * const msg = 0 ) const; + }; + + +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32() + { + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + uint32_t operator[]( const uint8_t byte ) const { return data[byte]; } + + void update_byte( uint32_t & crc, const uint8_t byte ) const + { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + + // about as fast as it is possible without messing with endianness + void update_buf( uint32_t & crc, const uint8_t * const buffer, + const int size ) const + { + uint32_t c = crc; + for( int i = 0; i < size; ++i ) + c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + crc = c; + } + }; + +extern const CRC32 crc32; + + +inline bool isvalid_ds( const unsigned dictionary_size ) + { return dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size; } + + +inline int real_bits( unsigned value ) + { + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } + return bits; + } + + +const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" + +struct Lzip_header + { + enum { size = 6 }; + uint8_t data[size]; // 0-3 magic bytes + // 4 version + // 5 coded dictionary size + + void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; } + bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; } + + bool check_prefix( const int sz ) const // detect (truncated) header + { + for( int i = 0; i < sz && i < 4; ++i ) + if( data[i] != lzip_magic[i] ) return false; + return sz > 0; + } + + bool check_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == lzip_magic[i] ) ++matches; + return matches > 1 && matches < 4; + } + + uint8_t version() const { return data[4]; } + bool check_version() const { return data[4] == 1; } + + unsigned dictionary_size() const + { + unsigned sz = 1 << ( data[5] & 0x1F ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + + bool dictionary_size( const unsigned sz ) + { + if( !isvalid_ds( sz ) ) return false; + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) + { + const unsigned base_size = 1 << data[5]; + const unsigned fraction = base_size / 16; + for( unsigned i = 7; i >= 1; --i ) + if( base_size - ( i * fraction ) >= sz ) + { data[5] |= i << 5; break; } + } + return true; + } + + bool check() const + { return check_magic() && check_version() && + isvalid_ds( dictionary_size() ); } + }; + + +struct Lzip_trailer + { + enum { size = 20 }; + uint8_t data[size]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + + unsigned data_crc() const + { + unsigned tmp = 0; + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + void data_crc( unsigned crc ) + { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } + + unsigned long long data_size() const + { + unsigned long long tmp = 0; + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + void data_size( unsigned long long sz ) + { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } + + unsigned long long member_size() const + { + unsigned long long tmp = 0; + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + void member_size( unsigned long long sz ) + { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } + + bool check_consistency() const // check internal consistency + { + const unsigned crc = data_crc(); + const unsigned long long dsize = data_size(); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = member_size(); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + }; + + +struct Cl_options // command-line options + { + bool ignore_empty; + bool ignore_marking; + bool ignore_trailing; + bool loose_trailing; + + Cl_options() + : ignore_empty( true ), ignore_marking( true ), + ignore_trailing( true ), loose_trailing( false ) {} + }; + + +struct Error + { + const char * const msg; + explicit Error( const char * const s ) : msg( s ) {} + }; + +inline void set_retval( int & retval, const int new_val ) + { if( retval < new_val ) retval = new_val; } + +const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +const char * const empty_msg = "Empty member not allowed."; +const char * const marking_msg = "Marking data not allowed."; +const char * const trailing_msg = "Trailing data not allowed."; + +// defined in decoder.cc +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); + +// defined in list.cc +int list_files( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ); + +// defined in main.cc +struct stat; +const char * bad_version( const unsigned version ); +const char * format_ds( const unsigned dictionary_size ); +void show_header( const unsigned dictionary_size ); +int open_instream( const char * const name, struct stat * const in_statsp, + const bool one_to_one, const bool reg_only = false ); +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); +class Matchfinder_base; +void show_cprogress( const unsigned long long cfile_size = 0, + const unsigned long long partial_size = 0, + const Matchfinder_base * const m = 0, + const Pretty_print * const p = 0 ); +class Range_decoder; +void show_dprogress( const unsigned long long cfile_size = 0, + const unsigned long long partial_size = 0, + const Range_decoder * const d = 0, + const Pretty_print * const p = 0 ); diff --git a/lzip_index.cc b/lzip_index.cc new file mode 100644 index 0000000..6c1caf2 --- /dev/null +++ b/lzip_index.cc @@ -0,0 +1,218 @@ +/* Lzip - LZMA lossless data compressor + Copyright (C) 2008-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "lzip_index.h" + + +namespace { + +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + +} // end namespace + + +bool Lzip_index::check_header( const Lzip_header & header ) + { + if( !header.check_magic() ) + { error_ = bad_magic_msg; retval_ = 2; return false; } + if( !header.check_version() ) + { error_ = bad_version( header.version() ); retval_ = 2; return false; } + if( !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return false; } + return true; + } + +void Lzip_index::set_errno_error( const char * const msg ) + { + error_ = msg; error_ += std::strerror( errno ); + retval_ = 1; + } + +void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + error_ = buf; + retval_ = 2; + } + + +bool Lzip_index::read_header( const int fd, Lzip_header & header, + const long long pos, const bool ignore_marking ) + { + if( seek_read( fd, header.data, header.size, pos ) != header.size ) + { set_errno_error( "Error reading member header: " ); return false; } + uint8_t byte; + if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 ) + { error_ = marking_msg; retval_ = 2; return false; } + return true; + } + + +// If successful, push last member and set pos to member header. +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, + const Cl_options & cl_opts ) + { + if( pos < min_member_size ) return false; + enum { block_size = 16384, + buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size }; + uint8_t buffer[buffer_size]; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= Lzip_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( buffer + i - trailer.size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) // skip trailing zeros + { while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !trailer.check_consistency() ) continue; + Lzip_header header; + if( !read_header( fd, header, ipos + i - member_size, + cl_opts.ignore_marking ) ) return false; + if( !header.check() ) continue; + const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= header.size; + if( header2.check_prefix( bsize - i ) ) // last member + { + if( !full_h2 ) error_ = "Last member in input file is truncated."; + else if( check_header( header2 ) ) + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + if( !cl_opts.loose_trailing && full_h2 && header2.check_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !cl_opts.ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } + const unsigned long long data_size = trailer.data_size(); + if( !cl_opts.ignore_empty && data_size == 0 ) + { error_ = empty_msg; retval_ = 2; return false; } + pos = ipos + i - member_size; // good member + const unsigned dictionary_size = header.dictionary_size(); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + member_vector.push_back( Member( 0, data_size, pos, member_size, + dictionary_size ) ); + return true; + } + if( ipos == 0 ) + { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); + return false; } + bsize = buffer_size; + search_size = bsize - Lzip_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ) + { + if( insize < 0 ) + { set_errno_error( "Input file is not seekable: " ); return; } + if( insize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } + if( insize > INT64_MAX ) + { error_ = "Input file is too long (2^63 bytes or more)."; + retval_ = 2; return; } + + Lzip_header header; + if( !read_header( infd, header, 0, cl_opts.ignore_marking ) || + !check_header( header ) ) return; + + unsigned long long pos = insize; // always points to a header or to EOF + while( pos >= min_member_size ) + { + Lzip_trailer trailer; + if( seek_read( infd, trailer.data, trailer.size, pos - trailer.size ) != + trailer.size ) + { set_errno_error( "Error reading member trailer: " ); break; } + const unsigned long long member_size = trailer.member_size(); + if( member_size > pos || !trailer.check_consistency() ) // bad trailer + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, cl_opts ) ) continue; return; } + set_num_error( "Bad trailer at pos ", pos - trailer.size ); break; + } + if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) ) + break; + if( !header.check() ) // bad header + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, cl_opts ) ) continue; return; } + set_num_error( "Bad header at pos ", pos - member_size ); break; + } + const unsigned long long data_size = trailer.data_size(); + if( !cl_opts.ignore_empty && data_size == 0 ) + { error_ = empty_msg; retval_ = 2; break; } + pos -= member_size; // good member + const unsigned dictionary_size = header.dictionary_size(); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + member_vector.push_back( Member( 0, data_size, pos, member_size, + dictionary_size ) ); + } + if( pos != 0 || member_vector.empty() || retval_ != 0 ) + { + member_vector.clear(); + if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } + return; + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned long i = 0; ; ++i ) + { + const long long end = member_vector[i].dblock.end(); + if( end < 0 || end > INT64_MAX ) + { + member_vector.clear(); + error_ = "Data in input file is too long (2^63 bytes or more)."; + retval_ = 2; return; + } + if( i + 1 >= member_vector.size() ) break; + member_vector[i+1].dblock.pos( end ); + } + } diff --git a/lzip_index.h b/lzip_index.h new file mode 100644 index 0000000..928a7c7 --- /dev/null +++ b/lzip_index.h @@ -0,0 +1,93 @@ +/* Lzip - LZMA lossless data compressor + Copyright (C) 2008-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +class Block + { + long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + }; + + +class Lzip_index + { + struct Member + { + Block dblock, mblock; // data block, member block + unsigned dictionary_size; + + Member( const long long dpos, const long long dsize, + const long long mpos, const long long msize, + const unsigned dict_size ) + : dblock( dpos, dsize ), mblock( mpos, msize ), + dictionary_size( dict_size ) {} + }; + + std::vector< Member > member_vector; + std::string error_; + const long long insize; + int retval_; + unsigned dictionary_size_; // largest dictionary size in the file + + bool check_header( const Lzip_header & header ); + void set_errno_error( const char * const msg ); + void set_num_error( const char * const msg, unsigned long long num ); + bool read_header( const int fd, Lzip_header & header, const long long pos, + const bool ignore_marking ); + bool skip_trailing_data( const int fd, unsigned long long & pos, + const Cl_options & cl_opts ); + +public: + Lzip_index( const int infd, const Cl_options & cl_opts ); + + long members() const { return member_vector.size(); } + const std::string & error() const { return error_; } + int retval() const { return retval_; } + unsigned dictionary_size() const { return dictionary_size_; } + + long long udata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().dblock.end(); } + + long long cdata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().mblock.end(); } + + // total size including trailing data (if any) + long long file_size() const + { if( insize >= 0 ) return insize; else return 0; } + + const Block & dblock( const long i ) const + { return member_vector[i].dblock; } + const Block & mblock( const long i ) const + { return member_vector[i].mblock; } + unsigned dictionary_size( const long i ) const + { return member_vector[i].dictionary_size; } + }; diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..1a905a8 --- /dev/null +++ b/main.cc @@ -0,0 +1,1107 @@ +/* Lzip - LZMA lossless data compressor + Copyright (C) 2008-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid command-line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused lzip to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include // CHAR_BIT, SSIZE_MAX +#include +#include +#include +#include +#include +#include +#include +#include +#include // SIZE_MAX +#include +#include +#include +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ +#include +#if defined __MSVCRT__ +#define fchmod(x,y) 0 +#define fchown(x,y,z) 0 +#define strtoull std::strtoul +#define SIGHUP SIGTERM +#define S_ISSOCK(x) 0 +#ifndef S_IRGRP +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IROTH 0 +#define S_IWOTH 0 +#endif +#endif +#if defined __DJGPP__ +#define S_ISSOCK(x) 0 +#define S_ISVTX 0 +#endif +#endif + +#include "arg_parser.h" +#include "lzip.h" +#include "decoder.h" +#include "encoder_base.h" +#include "encoder.h" +#include "fast_encoder.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + +int verbosity = 0; + +namespace { + +const char * const program_name = "lzip"; +const char * const program_year = "2024"; +const char * invocation_name = program_name; // default value + +const struct { const char * from; const char * to; } known_extensions[] = { + { ".lz", "" }, + { ".tlz", ".tar" }, + { 0, 0 } }; + +struct Lzma_options + { + int dictionary_size; // 4 KiB .. 512 MiB + int match_len_limit; // 5 .. 273 + }; + +enum Mode { m_compress, m_decompress, m_list, m_test }; + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +std::string output_filename; +int outfd = -1; +bool delete_output_on_interrupt = false; + + +void show_help() + { + std::printf( "Lzip is a lossless data compressor with a user interface similar to the one\n" + "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" + "chain-Algorithm' (LZMA) stream format to maximize interoperability. The\n" + "maximum dictionary size is 512 MiB so that any lzip file can be decompressed\n" + "on 32-bit machines. Lzip provides accurate and robust 3-factor integrity\n" + "checking. Lzip can compress about as fast as gzip (lzip -0) or compress most\n" + "files more than bzip2 (lzip -9). Decompression speed is intermediate between\n" + "gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery\n" + "perspective. Lzip has been designed, written, and tested with great care to\n" + "replace gzip and bzip2 as the standard general-purpose compressed format for\n" + "Unix-like systems.\n" + "\nUsage: %s [options] [files]\n", invocation_name ); + std::printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -b, --member-size= set member size limit in bytes\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress, test compressed file integrity\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force re-compression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --list print (un)compressed file sizes\n" + " -m, --match-length= set match length limit in bytes [36]\n" + " -o, --output= write to , keep input files\n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size= set dictionary size limit in bytes [8 MiB]\n" + " -S, --volume-size= set volume size limit in bytes\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" + " --best alias for -9\n" + " --empty-error exit with error status if empty member in file\n" + " --marking-error exit with error status if 1st LZMA byte not 0\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + "\nIf no file names are given, or if a file is '-', lzip compresses or\n" + "decompresses from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to\n" + "2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear scale\n" + "optimal for all files. If your files are large, very repetitive, etc, you\n" + "may need to use the options --dictionary-size and --match-length directly\n" + "to achieve optimal performance.\n" + "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" + "'tar -xf foo.tar.lz' or 'lzip -cd foo.tar.lz | tar -xf -'.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command-line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused lzip to panic.\n" + "\nThe ideas embodied in lzip are due to (at least) the following people:\n" + "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the\n" + "definition of Markov chains), G.N.N. Martin (for the definition of range\n" + "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n" + "Julian Seward (for bzip2's CLI).\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); + } + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv2+: GNU GPL version 2 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + +} // end namespace + +void Pretty_print::operator()( const char * const msg ) const + { + if( verbosity < 0 ) return; + if( first_post ) + { + first_post = false; + std::fputs( padded_name.c_str(), stderr ); + if( !msg ) std::fflush( stderr ); + } + if( msg ) std::fprintf( stderr, "%s\n", msg ); + } + + +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + + +const char * format_ds( const unsigned dictionary_size ) + { + enum { bufsize = 16, factor = 1024, n = 3 }; + static char buf[bufsize]; + const char * const prefix[n] = { "Ki", "Mi", "Gi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); + return buf; + } + + +void show_header( const unsigned dictionary_size ) + { + std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) ); + } + +namespace { + +// separate numbers of 5 or more digits in groups of 3 digits using '_' +const char * format_num3( unsigned long long num ) + { + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + if( num > 1024 ) + { + char prefix = 0; // try binary first, then si + for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 10000; + + for( int i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +// Recognized formats: k, Ki, [MGTPEZYRQ][i] +unsigned long long getnum( const char * const arg, + const char * const option_name, + const unsigned long long llimit, + const unsigned long long ulimit ) + { + char * tail; + errno = 0; + unsigned long long result = strtoull( arg, &tail, 0 ); + if( tail == arg ) + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } + + if( !errno && tail[0] ) + { + const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; // 0 = bad multiplier + switch( tail[0] ) + { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent <= 0 ) + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), + format_num3( ulimit ), option_name ); + std::exit( 1 ); + } + return result; + } + + +int get_dict_size( const char * const arg, const char * const option_name ) + { + char * tail; + const long bits = std::strtol( arg, &tail, 0 ); + if( bits >= min_dictionary_bits && + bits <= max_dictionary_bits && *tail == 0 ) + return 1 << bits; + return getnum( arg, option_name, min_dictionary_size, max_dictionary_size ); + } + + +void set_mode( Mode & program_mode, const Mode new_mode ) + { + if( program_mode != m_compress && program_mode != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + std::exit( 1 ); + } + program_mode = new_mode; + } + + +int extension_index( const std::string & name ) + { + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const std::string ext( known_extensions[eindex].from ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return eindex; + } + return -1; + } + + +void set_c_outname( const std::string & name, const bool filenames_given, + const bool force_ext, const bool multifile ) + { + /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when + reading from standard input. */ + output_filename = name; + if( multifile ) output_filename += "00001"; + if( force_ext || multifile || + ( !filenames_given && extension_index( output_filename ) < 0 ) ) + output_filename += known_extensions[0].from; + } + + +void set_d_outname( const std::string & name, const int eindex ) + { + if( eindex >= 0 ) + { + const std::string from( known_extensions[eindex].from ); + if( name.size() > from.size() ) + { + output_filename.assign( name, 0, name.size() - from.size() ); + output_filename += known_extensions[eindex].to; + return; + } + } + output_filename = name; output_filename += ".out"; + if( verbosity >= 1 ) + std::fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n", + program_name, name.c_str(), output_filename.c_str() ); + } + +} // end namespace + +int open_instream( const char * const name, struct stat * const in_statsp, + const bool one_to_one, const bool reg_only ) + { + int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); + else + { + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && !reg_only && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); + close( infd ); + infd = -1; + } + } + return infd; + } + +namespace { + +int open_instream2( const char * const name, struct stat * const in_statsp, + const Mode program_mode, const int eindex, + const bool one_to_one, const bool recompress ) + { + if( program_mode == m_compress && !recompress && eindex >= 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n", + program_name, name, known_extensions[eindex].from ); + return -1; + } + return open_instream( name, in_statsp, one_to_one, false ); + } + + +bool make_dirs( const std::string & name ) + { + int i = name.size(); + while( i > 0 && name[i-1] != '/' ) --i; // remove last component + while( i > 0 && name[i-1] == '/' ) --i; // remove slash(es) + const int dirsize = i; // size of dirname without trailing slash(es) + + for( i = 0; i < dirsize; ) // if dirsize == 0, dirname is '/' or empty + { + while( i < dirsize && name[i] == '/' ) ++i; + const int first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) + { + const std::string partial( name, 0, i ); + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another process created the dir + } + } + return true; + } + + +bool open_outstream( const bool force, const bool protect ) + { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = protect ? usr_rw : all_rw; + int flags = O_CREAT | O_WRONLY | O_BINARY; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = -1; + if( output_filename.size() && + output_filename[output_filename.size()-1] == '/' ) errno = EISDIR; + else { + if( !protect && !make_dirs( output_filename ) ) + { show_file_error( output_filename.c_str(), + "Error creating intermediate directory", errno ); return false; } + outfd = open( output_filename.c_str(), flags, outfd_mode ); + if( outfd >= 0 ) { delete_output_on_interrupt = true; return true; } + if( errno == EEXIST ) + { show_file_error( output_filename.c_str(), + "Output file already exists, skipping." ); return false; } + } + show_file_error( output_filename.c_str(), "Can't create output file", errno ); + return false; + } + + +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + + +void cleanup_and_fail( const int retval ) + { + set_signals( SIG_IGN ); // ignore signals + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + show_file_error( output_filename.c_str(), + "Deleting output file, if it exists." ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) + show_error( "warning: deletion of output file failed", errno ); + } + std::exit( retval ); + } + + +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + +bool check_tty_in( const char * const input_filename, const int infd, + const Mode program_mode, int & retval ) + { + if( ( program_mode == m_decompress || program_mode == m_test ) && + isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read compressed data from a terminal." ); + close( infd ); set_retval( retval, 2 ); + if( program_mode != m_test ) cleanup_and_fail( retval ); + return false; } + return true; + } + +bool check_tty_out( const Mode program_mode ) + { + if( program_mode == m_compress && isatty( outfd ) ) + { show_file_error( output_filename.size() ? + output_filename.c_str() : "(stdout)", + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. +void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + // fchown in many cases returns with EPERM, which can be safely ignored. + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { show_file_error( output_filename.c_str(), "Error closing output file", + errno ); cleanup_and_fail( 1 ); } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_file_error( output_filename.c_str(), + "warning: can't change output file attributes", errno ); + } + + +bool next_filename() + { + const unsigned name_len = output_filename.size(); + const unsigned ext_len = std::strlen( known_extensions[0].from ); + if( name_len >= ext_len + 5 ) // "*00001.lz" + for( int i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j ) + { + if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } + else output_filename[i] = '0'; + } + return false; + } + + +int compress( const unsigned long long cfile_size, + const unsigned long long member_size, + const unsigned long long volume_size, const int infd, + const Lzma_options & encoder_options, const Pretty_print & pp, + const struct stat * const in_statsp, const bool zero ) + { + int retval = 0; + LZ_encoder_base * encoder = 0; // polymorphic encoder + if( verbosity >= 1 ) pp(); + + if( zero ) + encoder = new FLZ_encoder( infd, outfd ); + else + { + Lzip_header header; + if( header.dictionary_size( encoder_options.dictionary_size ) && + encoder_options.match_len_limit >= min_match_len_limit && + encoder_options.match_len_limit <= max_match_len ) + encoder = new LZ_encoder( header.dictionary_size(), + encoder_options.match_len_limit, infd, outfd ); + else internal_error( "invalid argument to encoder." ); + } + + unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0; + while( true ) // encode one member per iteration + { + const unsigned long long size = ( volume_size > 0 ) ? + std::min( member_size, volume_size - partial_volume_size ) : member_size; + show_cprogress( cfile_size, in_size, encoder, &pp ); // init + if( !encoder->encode_member( size ) ) + { pp( "Encoder error." ); retval = 1; break; } + in_size += encoder->data_position(); + out_size += encoder->member_position(); + if( encoder->data_finished() ) break; + if( volume_size > 0 ) + { + partial_volume_size += encoder->member_position(); + if( partial_volume_size >= volume_size - min_dictionary_size ) + { + partial_volume_size = 0; + if( delete_output_on_interrupt ) + { + close_and_set_permissions( in_statsp ); + if( !next_filename() ) + { pp( "Too many volume files." ); retval = 1; break; } + if( !open_outstream( true, in_statsp ) ) { retval = 1; break; } + } + } + } + encoder->reset(); + } + + if( retval == 0 && verbosity >= 1 ) + { + if( in_size == 0 || out_size == 0 ) + std::fputs( " no data compressed.\n", stderr ); + else + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " + "%llu in, %llu out.\n", + (double)in_size / out_size, + ( 100.0 * out_size ) / in_size, + 100.0 - ( ( 100.0 * out_size ) / in_size ), + in_size, out_size ); + } + delete encoder; + return retval; + } + + +unsigned char xdigit( const unsigned value ) // hex digit for 'value' + { + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; + return 0; + } + + +bool show_trailing_data( const uint8_t * const data, const int size, + const Pretty_print & pp, const bool all, + const int ignore_trailing ) // -1 = show + { + if( verbosity >= 4 || ignore_trailing <= 0 ) + { + std::string msg; + if( !all ) msg = "first bytes of "; + msg += "trailing data = "; + for( int i = 0; i < size; ++i ) + { + msg += xdigit( data[i] >> 4 ); + msg += xdigit( data[i] & 0x0F ); + msg += ' '; + } + msg += '\''; + for( int i = 0; i < size; ++i ) + { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; } + msg += '\''; + pp( msg.c_str() ); + if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg ); + } + return ignore_trailing > 0; + } + + +int decompress( const unsigned long long cfile_size, const int infd, + const Cl_options & cl_opts, const Pretty_print & pp, + const bool testing ) + { + unsigned long long partial_file_pos = 0; + Range_decoder rdec( infd ); + int retval = 0; + + for( bool first_member = true; ; first_member = false ) + { + Lzip_header header; + rdec.reset_member_position(); + const int size = rdec.read_data( header.data, header.size ); + if( rdec.finished() ) // End Of File + { + if( first_member ) + { show_file_error( pp.name(), "File ends unexpectedly at member header." ); + retval = 2; } + else if( header.check_prefix( size ) ) + { pp( "Truncated header in multimember file." ); + show_trailing_data( header.data, size, pp, true, -1 ); retval = 2; } + else if( size > 0 && !show_trailing_data( header.data, size, pp, true, + cl_opts.ignore_trailing ) ) retval = 2; + break; + } + if( !header.check_magic() ) + { + if( first_member ) + { show_file_error( pp.name(), bad_magic_msg ); retval = 2; } + else if( !cl_opts.loose_trailing && header.check_corrupt() ) + { pp( corrupt_mm_msg ); + show_trailing_data( header.data, size, pp, false, -1 ); retval = 2; } + else if( !show_trailing_data( header.data, size, pp, false, + cl_opts.ignore_trailing ) ) retval = 2; + break; + } + if( !header.check_version() ) + { pp( bad_version( header.version() ) ); retval = 2; break; } + const unsigned dictionary_size = header.dictionary_size(); + if( !isvalid_ds( dictionary_size ) ) + { pp( bad_dict_msg ); retval = 2; break; } + + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp(); + + LZ_decoder decoder( rdec, dictionary_size, outfd ); + show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init + const int result = decoder.decode_member( cl_opts, pp ); + partial_file_pos += rdec.member_position(); + if( result != 0 ) + { + if( verbosity >= 0 && result <= 2 ) + { + pp(); + std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + partial_file_pos ); + } + else if( result == 5 ) pp( empty_msg ); + else if( result == 6 ) pp( marking_msg ); + retval = 2; break; + } + if( verbosity >= 2 ) + { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); } + } + if( verbosity == 1 && retval == 0 ) + std::fputs( testing ? "ok\n" : "done\n", stderr ); + return retval; + } + +} // end namespace + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +void show_cprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const Matchfinder_base * const m, + const Pretty_print * const p ) + { + static unsigned long long csize = 0; // file_size / 100 + static unsigned long long psize = 0; + static const Matchfinder_base * mb = 0; + static const Pretty_print * pp = 0; + static bool enabled = true; + + if( !enabled ) return; + if( p ) // initialize static vars + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; psize = partial_size; mb = m; pp = p; + } + if( mb && pp ) + { + const unsigned long long pos = psize + mb->data_position(); + if( csize > 0 ) + std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + } + } + + +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const Range_decoder * const d, + const Pretty_print * const p ) + { + static unsigned long long csize = 0; // file_size / 100 + static unsigned long long psize = 0; + static const Range_decoder * rdec = 0; + static const Pretty_print * pp = 0; + static int counter = 0; + static bool enabled = true; + + if( !enabled ) return; + if( p ) // initialize static vars + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0; + } + if( rdec && pp && --counter <= 0 ) + { + const unsigned long long pos = psize + rdec->member_position(); + counter = 7; // update display every 114688 bytes + if( csize > 0 ) + std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + } + } + + +int main( const int argc, const char * const argv[] ) + { + /* Mapping from gzip/bzip2 style 0..9 compression levels to the + corresponding LZMA compression parameters. */ + const Lzma_options option_mapping[] = + { + { 1 << 16, 16 }, // -0 + { 1 << 20, 5 }, // -1 + { 3 << 19, 6 }, // -2 + { 1 << 21, 8 }, // -3 + { 3 << 20, 12 }, // -4 + { 1 << 22, 20 }, // -5 + { 1 << 23, 36 }, // -6 + { 1 << 24, 68 }, // -7 + { 3 << 23, 132 }, // -8 + { 1 << 25, 273 } }; // -9 + Lzma_options encoder_options = option_mapping[6]; // default = "-6" + const unsigned long long max_member_size = 0x0008000000000000ULL; // 2 PiB + const unsigned long long max_volume_size = 0x4000000000000000ULL; // 4 EiB + unsigned long long member_size = max_member_size; + unsigned long long volume_size = 0; + std::string default_output_filename; + Mode program_mode = m_compress; + Cl_options cl_opts; // command-line options + bool force = false; + bool keep_input_files = false; + bool recompress = false; + bool to_stdout = false; + bool zero = false; + if( argc > 0 ) invocation_name = argv[0]; + + enum { opt_eer = 256, opt_lt, opt_mer }; + const Arg_parser::Option options[] = + { + { '0', "fast", Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', "best", Arg_parser::no }, + { 'a', "trailing-error", Arg_parser::no }, + { 'b', "member-size", Arg_parser::yes }, + { 'c', "stdout", Arg_parser::no }, + { 'd', "decompress", Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'F', "recompress", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, + { 'm', "match-length", Arg_parser::yes }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "dictionary-size", Arg_parser::yes }, + { 'S', "volume-size", Arg_parser::yes }, + { 't', "test", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { opt_eer, "empty-error", Arg_parser::no }, + { opt_lt, "loose-trailing", Arg_parser::no }, + { opt_mer, "marking-error", Arg_parser::no }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + zero = ( code == '0' ); + encoder_options = option_mapping[code-'0']; break; + case 'a': cl_opts.ignore_trailing = false; break; + case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break; + case 'c': to_stdout = true; break; + case 'd': set_mode( program_mode, m_decompress ); break; + case 'f': force = true; break; + case 'F': recompress = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'l': set_mode( program_mode, m_list ); break; + case 'm': encoder_options.match_len_limit = + getnum( arg, pn, min_match_len_limit, max_match_len ); + zero = false; break; + case 'n': break; + case 'o': if( sarg == "-" ) to_stdout = true; + else { default_output_filename = sarg; } break; + case 'q': verbosity = -1; break; + case 's': encoder_options.dictionary_size = get_dict_size( arg, pn ); + zero = false; break; + case 'S': volume_size = getnum( arg, pn, 100000, max_volume_size ); break; + case 't': set_mode( program_mode, m_test ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case opt_eer: cl_opts.ignore_empty = false; break; + case opt_lt: cl_opts.loose_trailing = true; break; + case opt_mer: cl_opts.ignore_marking = false; break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + std::vector< std::string > filenames; + bool filenames_given = false; + for( ; argind < parser.arguments(); ++argind ) + { + filenames.push_back( parser.argument( argind ) ); + if( filenames.back() != "-" ) filenames_given = true; + } + if( filenames.empty() ) filenames.push_back("-"); + + if( program_mode == m_list ) return list_files( filenames, cl_opts ); + + if( program_mode == m_compress ) + { + if( volume_size > 0 && !to_stdout && default_output_filename.size() && + filenames.size() > 1 ) + { show_error( "Only can compress one file when using '-o' and '-S'.", + 0, true ); return 1; } + dis_slots.init(); + prob_prices.init(); + } + else volume_size = 0; + if( program_mode == m_test ) to_stdout = false; // apply overrides + if( program_mode == m_test || to_stdout ) default_output_filename.clear(); + + if( to_stdout && program_mode != m_test ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; } + else outfd = -1; + + const bool to_file = !to_stdout && program_mode != m_test && + default_output_filename.size(); + if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) + set_signals( signal_handler ); + + Pretty_print pp( filenames ); + + int failed_tests = 0; + int retval = 0; + const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; + struct stat in_stats; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + std::string input_filename; + int infd; + + pp.set_name( filenames[i] ); + if( filenames[i] == "-" ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); } + } + else + { + const int eindex = extension_index( input_filename = filenames[i] ); + infd = open_instream2( input_filename.c_str(), &in_stats, program_mode, + eindex, one_to_one, recompress ); + if( infd < 0 ) { set_retval( retval, 1 ); continue; } + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) // open outfd after checking infd + { + if( program_mode == m_compress ) + set_c_outname( input_filename, true, true, volume_size > 0 ); + else set_d_outname( input_filename, eindex ); + if( !open_outstream( force, true ) ) + { close( infd ); set_retval( retval, 1 ); continue; } + } + } + + if( one_to_one && !check_tty_out( program_mode ) ) + { set_retval( retval, 1 ); return retval; } // don't delete a tty + + if( to_file && outfd < 0 ) // open outfd after checking infd + { + if( program_mode == m_compress ) set_c_outname( default_output_filename, + filenames_given, false, volume_size > 0 ); + else output_filename = default_output_filename; + if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) + return 1; // check tty only once and don't try to delete a tty + } + + const struct stat * const in_statsp = + ( input_filename.size() && one_to_one ) ? &in_stats : 0; + const unsigned long long cfile_size = + ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ? + ( in_stats.st_size + 99 ) / 100 : 0; + int tmp; + try { + if( program_mode == m_compress ) + tmp = compress( cfile_size, member_size, volume_size, infd, + encoder_options, pp, in_statsp, zero ); + else + tmp = decompress( cfile_size, infd, cl_opts, pp, program_mode == m_test ); + } + catch( std::bad_alloc & ) + { pp( ( program_mode == m_compress ) ? + "Not enough memory. Try a smaller dictionary size." : + "Not enough memory." ); tmp = 1; } + catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; } + if( close( infd ) != 0 ) + { show_file_error( pp.name(), "Error closing input file", errno ); + set_retval( tmp, 1 ); } + set_retval( retval, tmp ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } + + if( delete_output_on_interrupt && one_to_one ) + close_and_set_permissions( in_statsp ); + if( input_filename.size() && !keep_input_files && one_to_one && + ( program_mode != m_compress || volume_size == 0 ) ) + std::remove( input_filename.c_str() ); + } + if( delete_output_on_interrupt ) // -o + close_and_set_permissions( ( retval == 0 && !stdin_used && + filenames_given && filenames.size() == 1 ) ? &in_stats : 0 ); + else if( outfd >= 0 && close( outfd ) != 0 ) // -c + { + show_error( "Error closing stdout", errno ); + set_retval( retval, 1 ); + } + if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + }