JBIG2 - arithmetic decoder bugfixing

2025-06-05 21:59:17 +02:00 · 2019-10-29 15:34:09 +01:00
parent 34371706aa
commit 489033c4ce
4 changed files with 89 additions and 14 deletions
--- a/PdfForQtLib/sources/pdfjbig2decoder.cpp
+++ b/PdfForQtLib/sources/pdfjbig2decoder.cpp
@@ -351,11 +351,23 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA
    { 0x56010000, 46, 46, 0 }
 };
 uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state)
 {
    uint32_t byte = 0;
    for (int i = 0; i < 8; ++i)
    {
        byte = (byte << 1) | readBit(context, state);
    }
    return byte;
 }
 void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
 {
    // Used figure G.1, in annex G, of specification
    uint32_t B = m_reader->readUnsignedByte();
-    m_c = (B ^ 0xFF) << 16;
+    m_lastByte = B;
    m_c = B << 16;
    perform_BYTEIN();
    m_c = m_c << 7;
    m_ct -= 7;
@@ -365,24 +377,26 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
 void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
 {
    // Used figure G.3, in annex G, of specification
-    const uint32_t B = m_reader->readUnsignedByte();
+    if (m_lastByte == 0xFF)
    if (B == 0xFF)
    {
        const uint32_t B1 = m_reader->look(8);
        if (B1 > 0x8F)
        {
            m_c += 0xFF00;
            m_ct = 8;
        }
        else
        {
-            m_c = m_c + (0xFE00 - (B << 9));
+            m_c = m_c + (B1 << 9);
            m_ct = 7;
-            m_reader->readUnsignedByte();
+            m_lastByte = m_reader->readUnsignedByte();
        }
    }
    else
    {
-        m_c = m_c + (0xFF00 - (B << 8));
+        const uint32_t B = m_reader->readUnsignedByte();
        m_lastByte = B;
        m_c = m_c + (B << 8);
        m_ct = 8;
    }
 }
@@ -402,8 +416,11 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
    const uint32_t Qe = QeInfo.Qe;
    m_a -= Qe;
-    if (m_c < m_a)
+    if (m_c >= Qe)
    {
        // We are substracting this value according figure E.15 in the specification
        m_c -= Qe;
        if ((m_a & 0x80000000) == 0)
        {
            // We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification
@@ -430,9 +447,6 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
    }
    else
    {
        m_c -= m_a;
        m_a = Qe;
        // We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification
        if (m_a < Qe)
        {
@@ -447,6 +461,8 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
            }
            state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS);
        }
        m_a = Qe;
    }
    // Perform RENORMD algorithm, according to figure E.18, in annex E, of specification
@@ -1454,4 +1470,9 @@ std::vector<PDFJBIG2HuffmanTableEntry> PDFJBIG2HuffmanCodeTable::buildPrefixes(c
    return result;
 }
 uint32_t PDFJBIG2ArithmeticDecoderState::getQe(size_t context) const
 {
    return JBIG2_ARITHMETIC_DECODER_QE_VALUES[getQeRowIndex(context)].Qe;
 }
 }   // namespace pdf
--- a/PdfForQtLib/sources/pdfjbig2decoder.h
+++ b/PdfForQtLib/sources/pdfjbig2decoder.h
@@ -43,7 +43,7 @@ enum class PDFJBIG2BitOperation
 /// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
 /// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit
 /// is used to store current MPS value (most probable symbol - 0/1).
-class PDFJBIG2ArithmeticDecoderState
+class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoderState
 {
 public:
    explicit inline PDFJBIG2ArithmeticDecoderState() = default;
@@ -72,6 +72,10 @@ public:
        return m_state[context] >> 1;
    }
    /// Returns Qe value for row index, according to document ISO/IEC 14492:2001,
    /// annex E, table E.1 (Qe values and probability estimation process).
    inline uint32_t getQe(size_t context) const;
    /// Returns current bit value of MPS (most probable symbol)
    inline uint8_t getMPS(size_t context) const
    {
@@ -96,13 +100,14 @@ private:
 /// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding
 /// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point
 /// arithmetic described in the specification (it is much faster).
-class PDFJBIG2ArithmeticDecoder
+class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoder
 {
 public:
    explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) :
        m_c(0),
        m_a(0),
        m_ct(0),
        m_lastByte(0),
        m_reader(reader)
    {
@@ -110,6 +115,11 @@ public:
    void initialize() { perform_INITDEC(); }
    uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); }
    uint32_t readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state);
    uint32_t getRegisterC() const { return m_c; }
    uint32_t getRegisterA() const { return m_a; }
    uint32_t getRegisterCT() const { return m_ct; }
 private:
    /// Performs INITDEC operation as described in the specification
@@ -136,6 +146,9 @@ private:
    /// Number of current unprocessed bits.
    uint32_t m_ct;
    /// Last processed byte
    uint8_t m_lastByte;
    /// Data source to read from
    PDFBitReader* m_reader;
 };
--- a/PdfForQtLib/sources/pdfutils.h
+++ b/PdfForQtLib/sources/pdfutils.h
@@ -89,7 +89,7 @@ private:
 /// Bit-reader, which can read n-bit unsigned integers from the stream.
 /// Number of bits can be set in the constructor and is constant.
-class PDFBitReader
+class PDFFORQTLIBSHARED_EXPORT PDFBitReader
 {
 public:
    using Value = uint64_t;
--- a/UnitTests/tst_lexicalanalyzertest.cpp
+++ b/UnitTests/tst_lexicalanalyzertest.cpp
@@ -26,6 +26,7 @@
 #include "pdffunction.h"
 #include "pdfdocument.h"
 #include "pdfexception.h"
 #include "pdfjbig2decoder.h"
 #include <regex>
@@ -53,6 +54,7 @@ private slots:
    void test_exponential_function();
    void test_stitching_function();
    void test_postscript_function();
    void test_jbig2_arithmetic_decoder();
 private:
    void scanWholeStream(const char* stream);
@@ -309,7 +311,7 @@ void LexicalAnalyzerTest::test_lzw_filter()
    // This example is from PDF 1.7 Reference
    QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501");
    pdf::PDFLzwDecodeFilter filter;
-    QByteArray decoded = filter.apply(byteArray, nullptr, pdf::PDFObject(), nullptr);
+    QByteArray decoded = filter.apply(byteArray, [](const pdf::PDFObject& object) -> const pdf::PDFObject& { return object; }, pdf::PDFObject(), nullptr);
    QByteArray valid = "-----A---B";
    QCOMPARE(decoded, valid);
@@ -1056,6 +1058,45 @@ void LexicalAnalyzerTest::test_postscript_function()
    test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; });
 }
 void LexicalAnalyzerTest::test_jbig2_arithmetic_decoder()
 {
    std::vector<uint8_t> compressed = { 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC };
    std::vector<uint8_t> decompressed = { 0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0, 0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6, 0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF };
    QByteArray input;
    input.append(reinterpret_cast<char*>(compressed.data()), static_cast<int>(compressed.size()));
    pdf::PDFBitReader reader(&input, 1);
    pdf::PDFJBIG2ArithmeticDecoder decoder(&reader);
    decoder.initialize();
    pdf::PDFJBIG2ArithmeticDecoderState state;
    state.reset(1);
    std::vector<uint8_t> decompressedByAD;
    decompressedByAD.reserve(decompressed.size());
 /*
    for (size_t i = 0; i < decompressed.size() * 8; ++i)
    {
        uint32_t Qe = state.getQe(0);
        uint8_t MPS = state.getMPS(0);
        qDebug() << (i - 1) << ", Qe = " << qPrintable(QString("0x%1").arg(Qe, 8, 16, QChar(' '))) << ", MPS = " << MPS <<
                    ", A = " << qPrintable(QString("0x%1").arg(decoder.getRegisterA(), 8, 16, QChar(' '))) << ", CT = " << decoder.getRegisterCT() <<
                    ", C = " <<  qPrintable(QString("0x%1").arg(decoder.getRegisterC(), 8, 16, QChar(' '))) ;
        decoder.readBit(0, &state);
    }
    reader.seek(0);
    state.reset(1);
    decoder.initialize();*/
    for (size_t i = 0; i < decompressed.size(); ++i)
    {
        decompressedByAD.push_back(decoder.readByte(0, &state));
    }
    QVERIFY(decompressed == decompressedByAD);
 }
 void LexicalAnalyzerTest::scanWholeStream(const char* stream)
 {
    pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));