diff --git a/PdfForQtLib/sources/pdfjbig2decoder.cpp b/PdfForQtLib/sources/pdfjbig2decoder.cpp index e5a4c37..3dfff34 100644 --- a/PdfForQtLib/sources/pdfjbig2decoder.cpp +++ b/PdfForQtLib/sources/pdfjbig2decoder.cpp @@ -351,11 +351,23 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA { 0x56010000, 46, 46, 0 } }; +uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state) +{ + uint32_t byte = 0; + for (int i = 0; i < 8; ++i) + { + byte = (byte << 1) | readBit(context, state); + } + + return byte; +} + void PDFJBIG2ArithmeticDecoder::perform_INITDEC() { // Used figure G.1, in annex G, of specification uint32_t B = m_reader->readUnsignedByte(); - m_c = (B ^ 0xFF) << 16; + m_lastByte = B; + m_c = B << 16; perform_BYTEIN(); m_c = m_c << 7; m_ct -= 7; @@ -365,24 +377,26 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC() void PDFJBIG2ArithmeticDecoder::perform_BYTEIN() { // Used figure G.3, in annex G, of specification - const uint32_t B = m_reader->readUnsignedByte(); - if (B == 0xFF) + if (m_lastByte == 0xFF) { const uint32_t B1 = m_reader->look(8); if (B1 > 0x8F) { + m_c += 0xFF00; m_ct = 8; } else { - m_c = m_c + (0xFE00 - (B << 9)); + m_c = m_c + (B1 << 9); m_ct = 7; - m_reader->readUnsignedByte(); + m_lastByte = m_reader->readUnsignedByte(); } } else { - m_c = m_c + (0xFF00 - (B << 8)); + const uint32_t B = m_reader->readUnsignedByte(); + m_lastByte = B; + m_c = m_c + (B << 8); m_ct = 8; } } @@ -402,8 +416,11 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith const uint32_t Qe = QeInfo.Qe; m_a -= Qe; - if (m_c < m_a) + if (m_c >= Qe) { + // We are substracting this value according figure E.15 in the specification + m_c -= Qe; + if ((m_a & 0x80000000) == 0) { // We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification @@ -430,9 +447,6 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith } else { - m_c -= m_a; - m_a = Qe; - // We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification if (m_a < Qe) { @@ -447,6 +461,8 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith } state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS); } + + m_a = Qe; } // Perform RENORMD algorithm, according to figure E.18, in annex E, of specification @@ -1454,4 +1470,9 @@ std::vector PDFJBIG2HuffmanCodeTable::buildPrefixes(c return result; } +uint32_t PDFJBIG2ArithmeticDecoderState::getQe(size_t context) const +{ + return JBIG2_ARITHMETIC_DECODER_QE_VALUES[getQeRowIndex(context)].Qe; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfjbig2decoder.h b/PdfForQtLib/sources/pdfjbig2decoder.h index 9ec5e9d..f17da93 100644 --- a/PdfForQtLib/sources/pdfjbig2decoder.h +++ b/PdfForQtLib/sources/pdfjbig2decoder.h @@ -43,7 +43,7 @@ enum class PDFJBIG2BitOperation /// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used /// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit /// is used to store current MPS value (most probable symbol - 0/1). -class PDFJBIG2ArithmeticDecoderState +class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoderState { public: explicit inline PDFJBIG2ArithmeticDecoderState() = default; @@ -72,6 +72,10 @@ public: return m_state[context] >> 1; } + /// Returns Qe value for row index, according to document ISO/IEC 14492:2001, + /// annex E, table E.1 (Qe values and probability estimation process). + inline uint32_t getQe(size_t context) const; + /// Returns current bit value of MPS (most probable symbol) inline uint8_t getMPS(size_t context) const { @@ -96,13 +100,14 @@ private: /// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding /// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point /// arithmetic described in the specification (it is much faster). -class PDFJBIG2ArithmeticDecoder +class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoder { public: explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) : m_c(0), m_a(0), m_ct(0), + m_lastByte(0), m_reader(reader) { @@ -110,6 +115,11 @@ public: void initialize() { perform_INITDEC(); } uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); } + uint32_t readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state); + + uint32_t getRegisterC() const { return m_c; } + uint32_t getRegisterA() const { return m_a; } + uint32_t getRegisterCT() const { return m_ct; } private: /// Performs INITDEC operation as described in the specification @@ -136,6 +146,9 @@ private: /// Number of current unprocessed bits. uint32_t m_ct; + /// Last processed byte + uint8_t m_lastByte; + /// Data source to read from PDFBitReader* m_reader; }; diff --git a/PdfForQtLib/sources/pdfutils.h b/PdfForQtLib/sources/pdfutils.h index b872f93..d1220da 100644 --- a/PdfForQtLib/sources/pdfutils.h +++ b/PdfForQtLib/sources/pdfutils.h @@ -89,7 +89,7 @@ private: /// Bit-reader, which can read n-bit unsigned integers from the stream. /// Number of bits can be set in the constructor and is constant. -class PDFBitReader +class PDFFORQTLIBSHARED_EXPORT PDFBitReader { public: using Value = uint64_t; diff --git a/UnitTests/tst_lexicalanalyzertest.cpp b/UnitTests/tst_lexicalanalyzertest.cpp index dc4ad5a..7bd3e6d 100644 --- a/UnitTests/tst_lexicalanalyzertest.cpp +++ b/UnitTests/tst_lexicalanalyzertest.cpp @@ -26,6 +26,7 @@ #include "pdffunction.h" #include "pdfdocument.h" #include "pdfexception.h" +#include "pdfjbig2decoder.h" #include @@ -53,6 +54,7 @@ private slots: void test_exponential_function(); void test_stitching_function(); void test_postscript_function(); + void test_jbig2_arithmetic_decoder(); private: void scanWholeStream(const char* stream); @@ -309,7 +311,7 @@ void LexicalAnalyzerTest::test_lzw_filter() // This example is from PDF 1.7 Reference QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501"); pdf::PDFLzwDecodeFilter filter; - QByteArray decoded = filter.apply(byteArray, nullptr, pdf::PDFObject(), nullptr); + QByteArray decoded = filter.apply(byteArray, [](const pdf::PDFObject& object) -> const pdf::PDFObject& { return object; }, pdf::PDFObject(), nullptr); QByteArray valid = "-----A---B"; QCOMPARE(decoded, valid); @@ -1056,6 +1058,45 @@ void LexicalAnalyzerTest::test_postscript_function() test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; }); } +void LexicalAnalyzerTest::test_jbig2_arithmetic_decoder() +{ + std::vector compressed = { 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC }; + std::vector decompressed = { 0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0, 0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6, 0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF }; + + QByteArray input; + input.append(reinterpret_cast(compressed.data()), static_cast(compressed.size())); + + pdf::PDFBitReader reader(&input, 1); + pdf::PDFJBIG2ArithmeticDecoder decoder(&reader); + decoder.initialize(); + + pdf::PDFJBIG2ArithmeticDecoderState state; + state.reset(1); + std::vector decompressedByAD; + decompressedByAD.reserve(decompressed.size()); +/* + for (size_t i = 0; i < decompressed.size() * 8; ++i) + { + uint32_t Qe = state.getQe(0); + uint8_t MPS = state.getMPS(0); + qDebug() << (i - 1) << ", Qe = " << qPrintable(QString("0x%1").arg(Qe, 8, 16, QChar(' '))) << ", MPS = " << MPS << + ", A = " << qPrintable(QString("0x%1").arg(decoder.getRegisterA(), 8, 16, QChar(' '))) << ", CT = " << decoder.getRegisterCT() << + ", C = " << qPrintable(QString("0x%1").arg(decoder.getRegisterC(), 8, 16, QChar(' '))) ; + decoder.readBit(0, &state); + } + + reader.seek(0); + state.reset(1); + decoder.initialize();*/ + + for (size_t i = 0; i < decompressed.size(); ++i) + { + decompressedByAD.push_back(decoder.readByte(0, &state)); + } + + QVERIFY(decompressed == decompressedByAD); +} + void LexicalAnalyzerTest::scanWholeStream(const char* stream) { pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));