JBIG2 - arithmetic decoder bugfixing

This commit is contained in:
Jakub Melka
2019-10-29 15:34:09 +01:00
parent 34371706aa
commit 489033c4ce
4 changed files with 89 additions and 14 deletions

View File

@ -351,11 +351,23 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA
{ 0x56010000, 46, 46, 0 }
};
uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state)
{
uint32_t byte = 0;
for (int i = 0; i < 8; ++i)
{
byte = (byte << 1) | readBit(context, state);
}
return byte;
}
void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
{
// Used figure G.1, in annex G, of specification
uint32_t B = m_reader->readUnsignedByte();
m_c = (B ^ 0xFF) << 16;
m_lastByte = B;
m_c = B << 16;
perform_BYTEIN();
m_c = m_c << 7;
m_ct -= 7;
@ -365,24 +377,26 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
{
// Used figure G.3, in annex G, of specification
const uint32_t B = m_reader->readUnsignedByte();
if (B == 0xFF)
if (m_lastByte == 0xFF)
{
const uint32_t B1 = m_reader->look(8);
if (B1 > 0x8F)
{
m_c += 0xFF00;
m_ct = 8;
}
else
{
m_c = m_c + (0xFE00 - (B << 9));
m_c = m_c + (B1 << 9);
m_ct = 7;
m_reader->readUnsignedByte();
m_lastByte = m_reader->readUnsignedByte();
}
}
else
{
m_c = m_c + (0xFF00 - (B << 8));
const uint32_t B = m_reader->readUnsignedByte();
m_lastByte = B;
m_c = m_c + (B << 8);
m_ct = 8;
}
}
@ -402,8 +416,11 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
const uint32_t Qe = QeInfo.Qe;
m_a -= Qe;
if (m_c < m_a)
if (m_c >= Qe)
{
// We are substracting this value according figure E.15 in the specification
m_c -= Qe;
if ((m_a & 0x80000000) == 0)
{
// We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification
@ -430,9 +447,6 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
}
else
{
m_c -= m_a;
m_a = Qe;
// We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification
if (m_a < Qe)
{
@ -447,6 +461,8 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
}
state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS);
}
m_a = Qe;
}
// Perform RENORMD algorithm, according to figure E.18, in annex E, of specification
@ -1454,4 +1470,9 @@ std::vector<PDFJBIG2HuffmanTableEntry> PDFJBIG2HuffmanCodeTable::buildPrefixes(c
return result;
}
uint32_t PDFJBIG2ArithmeticDecoderState::getQe(size_t context) const
{
return JBIG2_ARITHMETIC_DECODER_QE_VALUES[getQeRowIndex(context)].Qe;
}
} // namespace pdf

View File

@ -43,7 +43,7 @@ enum class PDFJBIG2BitOperation
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
/// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit
/// is used to store current MPS value (most probable symbol - 0/1).
class PDFJBIG2ArithmeticDecoderState
class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoderState
{
public:
explicit inline PDFJBIG2ArithmeticDecoderState() = default;
@ -72,6 +72,10 @@ public:
return m_state[context] >> 1;
}
/// Returns Qe value for row index, according to document ISO/IEC 14492:2001,
/// annex E, table E.1 (Qe values and probability estimation process).
inline uint32_t getQe(size_t context) const;
/// Returns current bit value of MPS (most probable symbol)
inline uint8_t getMPS(size_t context) const
{
@ -96,13 +100,14 @@ private:
/// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding
/// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point
/// arithmetic described in the specification (it is much faster).
class PDFJBIG2ArithmeticDecoder
class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoder
{
public:
explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) :
m_c(0),
m_a(0),
m_ct(0),
m_lastByte(0),
m_reader(reader)
{
@ -110,6 +115,11 @@ public:
void initialize() { perform_INITDEC(); }
uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); }
uint32_t readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state);
uint32_t getRegisterC() const { return m_c; }
uint32_t getRegisterA() const { return m_a; }
uint32_t getRegisterCT() const { return m_ct; }
private:
/// Performs INITDEC operation as described in the specification
@ -136,6 +146,9 @@ private:
/// Number of current unprocessed bits.
uint32_t m_ct;
/// Last processed byte
uint8_t m_lastByte;
/// Data source to read from
PDFBitReader* m_reader;
};

View File

@ -89,7 +89,7 @@ private:
/// Bit-reader, which can read n-bit unsigned integers from the stream.
/// Number of bits can be set in the constructor and is constant.
class PDFBitReader
class PDFFORQTLIBSHARED_EXPORT PDFBitReader
{
public:
using Value = uint64_t;

View File

@ -26,6 +26,7 @@
#include "pdffunction.h"
#include "pdfdocument.h"
#include "pdfexception.h"
#include "pdfjbig2decoder.h"
#include <regex>
@ -53,6 +54,7 @@ private slots:
void test_exponential_function();
void test_stitching_function();
void test_postscript_function();
void test_jbig2_arithmetic_decoder();
private:
void scanWholeStream(const char* stream);
@ -309,7 +311,7 @@ void LexicalAnalyzerTest::test_lzw_filter()
// This example is from PDF 1.7 Reference
QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501");
pdf::PDFLzwDecodeFilter filter;
QByteArray decoded = filter.apply(byteArray, nullptr, pdf::PDFObject(), nullptr);
QByteArray decoded = filter.apply(byteArray, [](const pdf::PDFObject& object) -> const pdf::PDFObject& { return object; }, pdf::PDFObject(), nullptr);
QByteArray valid = "-----A---B";
QCOMPARE(decoded, valid);
@ -1056,6 +1058,45 @@ void LexicalAnalyzerTest::test_postscript_function()
test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; });
}
void LexicalAnalyzerTest::test_jbig2_arithmetic_decoder()
{
std::vector<uint8_t> compressed = { 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC };
std::vector<uint8_t> decompressed = { 0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0, 0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6, 0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF };
QByteArray input;
input.append(reinterpret_cast<char*>(compressed.data()), static_cast<int>(compressed.size()));
pdf::PDFBitReader reader(&input, 1);
pdf::PDFJBIG2ArithmeticDecoder decoder(&reader);
decoder.initialize();
pdf::PDFJBIG2ArithmeticDecoderState state;
state.reset(1);
std::vector<uint8_t> decompressedByAD;
decompressedByAD.reserve(decompressed.size());
/*
for (size_t i = 0; i < decompressed.size() * 8; ++i)
{
uint32_t Qe = state.getQe(0);
uint8_t MPS = state.getMPS(0);
qDebug() << (i - 1) << ", Qe = " << qPrintable(QString("0x%1").arg(Qe, 8, 16, QChar(' '))) << ", MPS = " << MPS <<
", A = " << qPrintable(QString("0x%1").arg(decoder.getRegisterA(), 8, 16, QChar(' '))) << ", CT = " << decoder.getRegisterCT() <<
", C = " << qPrintable(QString("0x%1").arg(decoder.getRegisterC(), 8, 16, QChar(' '))) ;
decoder.readBit(0, &state);
}
reader.seek(0);
state.reset(1);
decoder.initialize();*/
for (size_t i = 0; i < decompressed.size(); ++i)
{
decompressedByAD.push_back(decoder.readByte(0, &state));
}
QVERIFY(decompressed == decompressedByAD);
}
void LexicalAnalyzerTest::scanWholeStream(const char* stream)
{
pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));