JBIG2 - arithmetic decoder bugfixing

This commit is contained in:
Jakub Melka
2019-10-29 15:34:09 +01:00
parent 34371706aa
commit 489033c4ce
4 changed files with 89 additions and 14 deletions

View File

@ -351,11 +351,23 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA
{ 0x56010000, 46, 46, 0 } { 0x56010000, 46, 46, 0 }
}; };
uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state)
{
uint32_t byte = 0;
for (int i = 0; i < 8; ++i)
{
byte = (byte << 1) | readBit(context, state);
}
return byte;
}
void PDFJBIG2ArithmeticDecoder::perform_INITDEC() void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
{ {
// Used figure G.1, in annex G, of specification // Used figure G.1, in annex G, of specification
uint32_t B = m_reader->readUnsignedByte(); uint32_t B = m_reader->readUnsignedByte();
m_c = (B ^ 0xFF) << 16; m_lastByte = B;
m_c = B << 16;
perform_BYTEIN(); perform_BYTEIN();
m_c = m_c << 7; m_c = m_c << 7;
m_ct -= 7; m_ct -= 7;
@ -365,24 +377,26 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
void PDFJBIG2ArithmeticDecoder::perform_BYTEIN() void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
{ {
// Used figure G.3, in annex G, of specification // Used figure G.3, in annex G, of specification
const uint32_t B = m_reader->readUnsignedByte(); if (m_lastByte == 0xFF)
if (B == 0xFF)
{ {
const uint32_t B1 = m_reader->look(8); const uint32_t B1 = m_reader->look(8);
if (B1 > 0x8F) if (B1 > 0x8F)
{ {
m_c += 0xFF00;
m_ct = 8; m_ct = 8;
} }
else else
{ {
m_c = m_c + (0xFE00 - (B << 9)); m_c = m_c + (B1 << 9);
m_ct = 7; m_ct = 7;
m_reader->readUnsignedByte(); m_lastByte = m_reader->readUnsignedByte();
} }
} }
else else
{ {
m_c = m_c + (0xFF00 - (B << 8)); const uint32_t B = m_reader->readUnsignedByte();
m_lastByte = B;
m_c = m_c + (B << 8);
m_ct = 8; m_ct = 8;
} }
} }
@ -402,8 +416,11 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
const uint32_t Qe = QeInfo.Qe; const uint32_t Qe = QeInfo.Qe;
m_a -= Qe; m_a -= Qe;
if (m_c < m_a) if (m_c >= Qe)
{ {
// We are substracting this value according figure E.15 in the specification
m_c -= Qe;
if ((m_a & 0x80000000) == 0) if ((m_a & 0x80000000) == 0)
{ {
// We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification // We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification
@ -430,9 +447,6 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
} }
else else
{ {
m_c -= m_a;
m_a = Qe;
// We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification // We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification
if (m_a < Qe) if (m_a < Qe)
{ {
@ -447,6 +461,8 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
} }
state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS); state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS);
} }
m_a = Qe;
} }
// Perform RENORMD algorithm, according to figure E.18, in annex E, of specification // Perform RENORMD algorithm, according to figure E.18, in annex E, of specification
@ -1454,4 +1470,9 @@ std::vector<PDFJBIG2HuffmanTableEntry> PDFJBIG2HuffmanCodeTable::buildPrefixes(c
return result; return result;
} }
uint32_t PDFJBIG2ArithmeticDecoderState::getQe(size_t context) const
{
return JBIG2_ARITHMETIC_DECODER_QE_VALUES[getQeRowIndex(context)].Qe;
}
} // namespace pdf } // namespace pdf

View File

@ -43,7 +43,7 @@ enum class PDFJBIG2BitOperation
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used /// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
/// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit /// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit
/// is used to store current MPS value (most probable symbol - 0/1). /// is used to store current MPS value (most probable symbol - 0/1).
class PDFJBIG2ArithmeticDecoderState class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoderState
{ {
public: public:
explicit inline PDFJBIG2ArithmeticDecoderState() = default; explicit inline PDFJBIG2ArithmeticDecoderState() = default;
@ -72,6 +72,10 @@ public:
return m_state[context] >> 1; return m_state[context] >> 1;
} }
/// Returns Qe value for row index, according to document ISO/IEC 14492:2001,
/// annex E, table E.1 (Qe values and probability estimation process).
inline uint32_t getQe(size_t context) const;
/// Returns current bit value of MPS (most probable symbol) /// Returns current bit value of MPS (most probable symbol)
inline uint8_t getMPS(size_t context) const inline uint8_t getMPS(size_t context) const
{ {
@ -96,13 +100,14 @@ private:
/// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding /// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding
/// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point /// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point
/// arithmetic described in the specification (it is much faster). /// arithmetic described in the specification (it is much faster).
class PDFJBIG2ArithmeticDecoder class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoder
{ {
public: public:
explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) : explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) :
m_c(0), m_c(0),
m_a(0), m_a(0),
m_ct(0), m_ct(0),
m_lastByte(0),
m_reader(reader) m_reader(reader)
{ {
@ -110,6 +115,11 @@ public:
void initialize() { perform_INITDEC(); } void initialize() { perform_INITDEC(); }
uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); } uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); }
uint32_t readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state);
uint32_t getRegisterC() const { return m_c; }
uint32_t getRegisterA() const { return m_a; }
uint32_t getRegisterCT() const { return m_ct; }
private: private:
/// Performs INITDEC operation as described in the specification /// Performs INITDEC operation as described in the specification
@ -136,6 +146,9 @@ private:
/// Number of current unprocessed bits. /// Number of current unprocessed bits.
uint32_t m_ct; uint32_t m_ct;
/// Last processed byte
uint8_t m_lastByte;
/// Data source to read from /// Data source to read from
PDFBitReader* m_reader; PDFBitReader* m_reader;
}; };

View File

@ -89,7 +89,7 @@ private:
/// Bit-reader, which can read n-bit unsigned integers from the stream. /// Bit-reader, which can read n-bit unsigned integers from the stream.
/// Number of bits can be set in the constructor and is constant. /// Number of bits can be set in the constructor and is constant.
class PDFBitReader class PDFFORQTLIBSHARED_EXPORT PDFBitReader
{ {
public: public:
using Value = uint64_t; using Value = uint64_t;

View File

@ -26,6 +26,7 @@
#include "pdffunction.h" #include "pdffunction.h"
#include "pdfdocument.h" #include "pdfdocument.h"
#include "pdfexception.h" #include "pdfexception.h"
#include "pdfjbig2decoder.h"
#include <regex> #include <regex>
@ -53,6 +54,7 @@ private slots:
void test_exponential_function(); void test_exponential_function();
void test_stitching_function(); void test_stitching_function();
void test_postscript_function(); void test_postscript_function();
void test_jbig2_arithmetic_decoder();
private: private:
void scanWholeStream(const char* stream); void scanWholeStream(const char* stream);
@ -309,7 +311,7 @@ void LexicalAnalyzerTest::test_lzw_filter()
// This example is from PDF 1.7 Reference // This example is from PDF 1.7 Reference
QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501"); QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501");
pdf::PDFLzwDecodeFilter filter; pdf::PDFLzwDecodeFilter filter;
QByteArray decoded = filter.apply(byteArray, nullptr, pdf::PDFObject(), nullptr); QByteArray decoded = filter.apply(byteArray, [](const pdf::PDFObject& object) -> const pdf::PDFObject& { return object; }, pdf::PDFObject(), nullptr);
QByteArray valid = "-----A---B"; QByteArray valid = "-----A---B";
QCOMPARE(decoded, valid); QCOMPARE(decoded, valid);
@ -1056,6 +1058,45 @@ void LexicalAnalyzerTest::test_postscript_function()
test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; }); test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; });
} }
void LexicalAnalyzerTest::test_jbig2_arithmetic_decoder()
{
std::vector<uint8_t> compressed = { 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC };
std::vector<uint8_t> decompressed = { 0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0, 0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6, 0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF };
QByteArray input;
input.append(reinterpret_cast<char*>(compressed.data()), static_cast<int>(compressed.size()));
pdf::PDFBitReader reader(&input, 1);
pdf::PDFJBIG2ArithmeticDecoder decoder(&reader);
decoder.initialize();
pdf::PDFJBIG2ArithmeticDecoderState state;
state.reset(1);
std::vector<uint8_t> decompressedByAD;
decompressedByAD.reserve(decompressed.size());
/*
for (size_t i = 0; i < decompressed.size() * 8; ++i)
{
uint32_t Qe = state.getQe(0);
uint8_t MPS = state.getMPS(0);
qDebug() << (i - 1) << ", Qe = " << qPrintable(QString("0x%1").arg(Qe, 8, 16, QChar(' '))) << ", MPS = " << MPS <<
", A = " << qPrintable(QString("0x%1").arg(decoder.getRegisterA(), 8, 16, QChar(' '))) << ", CT = " << decoder.getRegisterCT() <<
", C = " << qPrintable(QString("0x%1").arg(decoder.getRegisterC(), 8, 16, QChar(' '))) ;
decoder.readBit(0, &state);
}
reader.seek(0);
state.reset(1);
decoder.initialize();*/
for (size_t i = 0; i < decompressed.size(); ++i)
{
decompressedByAD.push_back(decoder.readByte(0, &state));
}
QVERIFY(decompressed == decompressedByAD);
}
void LexicalAnalyzerTest::scanWholeStream(const char* stream) void LexicalAnalyzerTest::scanWholeStream(const char* stream)
{ {
pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream)); pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));