mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
JBIG2 - arithmetic decoder bugfixing
This commit is contained in:
@ -351,11 +351,23 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA
|
||||
{ 0x56010000, 46, 46, 0 }
|
||||
};
|
||||
|
||||
uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state)
|
||||
{
|
||||
uint32_t byte = 0;
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
byte = (byte << 1) | readBit(context, state);
|
||||
}
|
||||
|
||||
return byte;
|
||||
}
|
||||
|
||||
void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
|
||||
{
|
||||
// Used figure G.1, in annex G, of specification
|
||||
uint32_t B = m_reader->readUnsignedByte();
|
||||
m_c = (B ^ 0xFF) << 16;
|
||||
m_lastByte = B;
|
||||
m_c = B << 16;
|
||||
perform_BYTEIN();
|
||||
m_c = m_c << 7;
|
||||
m_ct -= 7;
|
||||
@ -365,24 +377,26 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
|
||||
void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
|
||||
{
|
||||
// Used figure G.3, in annex G, of specification
|
||||
const uint32_t B = m_reader->readUnsignedByte();
|
||||
if (B == 0xFF)
|
||||
if (m_lastByte == 0xFF)
|
||||
{
|
||||
const uint32_t B1 = m_reader->look(8);
|
||||
if (B1 > 0x8F)
|
||||
{
|
||||
m_c += 0xFF00;
|
||||
m_ct = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_c = m_c + (0xFE00 - (B << 9));
|
||||
m_c = m_c + (B1 << 9);
|
||||
m_ct = 7;
|
||||
m_reader->readUnsignedByte();
|
||||
m_lastByte = m_reader->readUnsignedByte();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_c = m_c + (0xFF00 - (B << 8));
|
||||
const uint32_t B = m_reader->readUnsignedByte();
|
||||
m_lastByte = B;
|
||||
m_c = m_c + (B << 8);
|
||||
m_ct = 8;
|
||||
}
|
||||
}
|
||||
@ -402,8 +416,11 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
|
||||
const uint32_t Qe = QeInfo.Qe;
|
||||
m_a -= Qe;
|
||||
|
||||
if (m_c < m_a)
|
||||
if (m_c >= Qe)
|
||||
{
|
||||
// We are substracting this value according figure E.15 in the specification
|
||||
m_c -= Qe;
|
||||
|
||||
if ((m_a & 0x80000000) == 0)
|
||||
{
|
||||
// We must perform MPS_EXCHANGE algorithm, according to figure E.16, in annex E, of specification
|
||||
@ -430,9 +447,6 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
|
||||
}
|
||||
else
|
||||
{
|
||||
m_c -= m_a;
|
||||
m_a = Qe;
|
||||
|
||||
// We must perform LPS_EXCHANGE algorithm, according to figure E.17, in annex E, of specification
|
||||
if (m_a < Qe)
|
||||
{
|
||||
@ -447,6 +461,8 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
|
||||
}
|
||||
state->setQeRowIndexAndMPS(context, QeInfo.newLPS, MPS);
|
||||
}
|
||||
|
||||
m_a = Qe;
|
||||
}
|
||||
|
||||
// Perform RENORMD algorithm, according to figure E.18, in annex E, of specification
|
||||
@ -1454,4 +1470,9 @@ std::vector<PDFJBIG2HuffmanTableEntry> PDFJBIG2HuffmanCodeTable::buildPrefixes(c
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t PDFJBIG2ArithmeticDecoderState::getQe(size_t context) const
|
||||
{
|
||||
return JBIG2_ARITHMETIC_DECODER_QE_VALUES[getQeRowIndex(context)].Qe;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -43,7 +43,7 @@ enum class PDFJBIG2BitOperation
|
||||
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
|
||||
/// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit
|
||||
/// is used to store current MPS value (most probable symbol - 0/1).
|
||||
class PDFJBIG2ArithmeticDecoderState
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoderState
|
||||
{
|
||||
public:
|
||||
explicit inline PDFJBIG2ArithmeticDecoderState() = default;
|
||||
@ -72,6 +72,10 @@ public:
|
||||
return m_state[context] >> 1;
|
||||
}
|
||||
|
||||
/// Returns Qe value for row index, according to document ISO/IEC 14492:2001,
|
||||
/// annex E, table E.1 (Qe values and probability estimation process).
|
||||
inline uint32_t getQe(size_t context) const;
|
||||
|
||||
/// Returns current bit value of MPS (most probable symbol)
|
||||
inline uint8_t getMPS(size_t context) const
|
||||
{
|
||||
@ -96,13 +100,14 @@ private:
|
||||
/// of decoder described in document ISO/IEC 14492:2001, T.88, annex G (arithmetic decoding
|
||||
/// procedure). It uses 32-bit fixed point arithmetic instead of 16-bit fixed point
|
||||
/// arithmetic described in the specification (it is much faster).
|
||||
class PDFJBIG2ArithmeticDecoder
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFJBIG2ArithmeticDecoder
|
||||
{
|
||||
public:
|
||||
explicit inline PDFJBIG2ArithmeticDecoder(PDFBitReader* reader) :
|
||||
m_c(0),
|
||||
m_a(0),
|
||||
m_ct(0),
|
||||
m_lastByte(0),
|
||||
m_reader(reader)
|
||||
{
|
||||
|
||||
@ -110,6 +115,11 @@ public:
|
||||
|
||||
void initialize() { perform_INITDEC(); }
|
||||
uint32_t readBit(size_t context, PDFJBIG2ArithmeticDecoderState* state) { return perform_DECODE(context, state); }
|
||||
uint32_t readByte(size_t context, PDFJBIG2ArithmeticDecoderState* state);
|
||||
|
||||
uint32_t getRegisterC() const { return m_c; }
|
||||
uint32_t getRegisterA() const { return m_a; }
|
||||
uint32_t getRegisterCT() const { return m_ct; }
|
||||
|
||||
private:
|
||||
/// Performs INITDEC operation as described in the specification
|
||||
@ -136,6 +146,9 @@ private:
|
||||
/// Number of current unprocessed bits.
|
||||
uint32_t m_ct;
|
||||
|
||||
/// Last processed byte
|
||||
uint8_t m_lastByte;
|
||||
|
||||
/// Data source to read from
|
||||
PDFBitReader* m_reader;
|
||||
};
|
||||
|
@ -89,7 +89,7 @@ private:
|
||||
|
||||
/// Bit-reader, which can read n-bit unsigned integers from the stream.
|
||||
/// Number of bits can be set in the constructor and is constant.
|
||||
class PDFBitReader
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFBitReader
|
||||
{
|
||||
public:
|
||||
using Value = uint64_t;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "pdffunction.h"
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfexception.h"
|
||||
#include "pdfjbig2decoder.h"
|
||||
|
||||
#include <regex>
|
||||
|
||||
@ -53,6 +54,7 @@ private slots:
|
||||
void test_exponential_function();
|
||||
void test_stitching_function();
|
||||
void test_postscript_function();
|
||||
void test_jbig2_arithmetic_decoder();
|
||||
|
||||
private:
|
||||
void scanWholeStream(const char* stream);
|
||||
@ -309,7 +311,7 @@ void LexicalAnalyzerTest::test_lzw_filter()
|
||||
// This example is from PDF 1.7 Reference
|
||||
QByteArray byteArray = QByteArray::fromHex("800B6050220C0C8501");
|
||||
pdf::PDFLzwDecodeFilter filter;
|
||||
QByteArray decoded = filter.apply(byteArray, nullptr, pdf::PDFObject(), nullptr);
|
||||
QByteArray decoded = filter.apply(byteArray, [](const pdf::PDFObject& object) -> const pdf::PDFObject& { return object; }, pdf::PDFObject(), nullptr);
|
||||
QByteArray valid = "-----A---B";
|
||||
|
||||
QCOMPARE(decoded, valid);
|
||||
@ -1056,6 +1058,45 @@ void LexicalAnalyzerTest::test_postscript_function()
|
||||
test01("2.0 1 index exch div exch pop", [](double x) { return x / 2.0; });
|
||||
}
|
||||
|
||||
void LexicalAnalyzerTest::test_jbig2_arithmetic_decoder()
|
||||
{
|
||||
std::vector<uint8_t> compressed = { 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC };
|
||||
std::vector<uint8_t> decompressed = { 0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0, 0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6, 0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF };
|
||||
|
||||
QByteArray input;
|
||||
input.append(reinterpret_cast<char*>(compressed.data()), static_cast<int>(compressed.size()));
|
||||
|
||||
pdf::PDFBitReader reader(&input, 1);
|
||||
pdf::PDFJBIG2ArithmeticDecoder decoder(&reader);
|
||||
decoder.initialize();
|
||||
|
||||
pdf::PDFJBIG2ArithmeticDecoderState state;
|
||||
state.reset(1);
|
||||
std::vector<uint8_t> decompressedByAD;
|
||||
decompressedByAD.reserve(decompressed.size());
|
||||
/*
|
||||
for (size_t i = 0; i < decompressed.size() * 8; ++i)
|
||||
{
|
||||
uint32_t Qe = state.getQe(0);
|
||||
uint8_t MPS = state.getMPS(0);
|
||||
qDebug() << (i - 1) << ", Qe = " << qPrintable(QString("0x%1").arg(Qe, 8, 16, QChar(' '))) << ", MPS = " << MPS <<
|
||||
", A = " << qPrintable(QString("0x%1").arg(decoder.getRegisterA(), 8, 16, QChar(' '))) << ", CT = " << decoder.getRegisterCT() <<
|
||||
", C = " << qPrintable(QString("0x%1").arg(decoder.getRegisterC(), 8, 16, QChar(' '))) ;
|
||||
decoder.readBit(0, &state);
|
||||
}
|
||||
|
||||
reader.seek(0);
|
||||
state.reset(1);
|
||||
decoder.initialize();*/
|
||||
|
||||
for (size_t i = 0; i < decompressed.size(); ++i)
|
||||
{
|
||||
decompressedByAD.push_back(decoder.readByte(0, &state));
|
||||
}
|
||||
|
||||
QVERIFY(decompressed == decompressedByAD);
|
||||
}
|
||||
|
||||
void LexicalAnalyzerTest::scanWholeStream(const char* stream)
|
||||
{
|
||||
pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));
|
||||
|
Reference in New Issue
Block a user