JBIG2 - symbol dictionary (first part)

This commit is contained in:
Jakub Melka
2019-10-31 15:47:33 +01:00
parent 2e359d63d3
commit a77bfbd896
2 changed files with 694 additions and 36 deletions

View File

@ -22,32 +22,6 @@
namespace pdf namespace pdf
{ {
static constexpr uint16_t HUFFMAN_LOW_VALUE = 0xFFFE;
static constexpr uint16_t HUFFMAN_OOB_VALUE = 0xFFFF;
struct PDFJBIG2HuffmanTableEntry
{
enum class Type : uint8_t
{
Standard,
Negative,
OutOfBand
};
/// Returns true, if current row represents interval (-∞, value),
/// it means 32bit number must be read and
bool isLowValue() const { return type == Type::Negative; }
/// Returns true, if current row represents out-of-band value
bool isOutOfBand() const { return type == Type::OutOfBand; }
int32_t value = 0; ///< Base value
uint16_t prefixBitLength = 0; ///< Bit length of prefix
uint16_t rangeBitLength = 0; ///< Bit length of additional value
uint16_t prefix = 0; ///< Bit prefix of the huffman code
Type type = Type::Standard; ///< Type of the value
};
static constexpr PDFJBIG2HuffmanTableEntry PDFJBIG2StandardHuffmanTable_A[] = static constexpr PDFJBIG2HuffmanTableEntry PDFJBIG2StandardHuffmanTable_A[] =
{ {
{ 0, 1, 4, 0b0, PDFJBIG2HuffmanTableEntry::Type::Standard}, { 0, 1, 4, 0b0, PDFJBIG2HuffmanTableEntry::Type::Standard},
@ -362,6 +336,100 @@ uint32_t PDFJBIG2ArithmeticDecoder::readByte(size_t context, PDFJBIG2ArithmeticD
return byte; return byte;
} }
int32_t PDFJBIG2ArithmeticDecoder::getIAID(uint32_t size, PDFJBIG2ArithmeticDecoderState* state)
{
// Algorithm A.3 in annex A in the specification
uint32_t PREV = 1;
for (uint32_t i = 0; i < size; ++i)
{
uint32_t bit = readBit(PREV, state);
PREV = (PREV << 1) | bit;
}
// Jakub Melka: we must subtract 1 << size, because at the start of the algorithm,
// PREV is initialized to 1, which we don't want in the result, so we subtract the value.
return int32_t(PREV) - int32_t(1 << size);
}
std::optional<int32_t> PDFJBIG2ArithmeticDecoder::getSignedInteger(PDFJBIG2ArithmeticDecoderState* state)
{
// Algorithm A.2 in annex A in the specification
uint32_t PREV = 1;
auto readIntBit = [this, &PREV, state]()
{
uint32_t bit = readBit(PREV, state);
if (PREV < 256)
{
PREV = (PREV << 1) | bit;
}
else
{
PREV = (((PREV << 1) | bit) & 0x01FF) | 0x0100;
}
Q_ASSERT(PREV < 512);
return bit;
};
auto readIntBits = [&readIntBit](uint32_t bits)
{
uint32_t result = 0;
for (uint32_t i = 0; i < bits; ++i)
{
result = (result << 1) | readIntBit();
}
return result;
};
uint32_t S = readIntBit(); // S = sign of number
uint32_t V = 0; // V = value of number
if (!readIntBit())
{
V = readIntBits(2);
}
else if (!readIntBit())
{
V = readIntBits(4) + 4;
}
else if (!readIntBit())
{
V = readIntBits(6) + 20;
}
else if (!readIntBit())
{
V = readIntBits(8) + 84;
}
else if (!readIntBit())
{
V = readIntBits(12) + 340;
}
else
{
V = readIntBits(32) + 4436;
}
if (S)
{
if (V == 0)
{
return std::nullopt;
}
else
{
return -static_cast<int32_t>(V);
}
}
else
{
return V;
}
}
void PDFJBIG2ArithmeticDecoder::perform_INITDEC() void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
{ {
// Used figure G.1, in annex G, of specification // Used figure G.1, in annex G, of specification
@ -835,7 +903,274 @@ void PDFJBIG2Decoder::processStream()
void PDFJBIG2Decoder::processSymbolDictionary(const PDFJBIG2SegmentHeader& header) void PDFJBIG2Decoder::processSymbolDictionary(const PDFJBIG2SegmentHeader& header)
{ {
// TODO: JBIG2 - processSymbolDictionary /* 7.4.2.2 step 1) */
PDFJBIG2SymbolDictionaryDecodingParameters parameters;
const uint16_t symbolDictionaryFlags = m_reader.readUnsignedWord();
parameters.SDHUFF = symbolDictionaryFlags & 0x0001;
parameters.SDREFAGG = symbolDictionaryFlags & 0x0002;
parameters.SDHUFFDH = (symbolDictionaryFlags >> 2) & 0x0003;
parameters.SDHUFFDH = (symbolDictionaryFlags >> 4) & 0x0003;
parameters.SDHUFFBMSIZE = (symbolDictionaryFlags >> 6) & 0x0001;
parameters.SDHUFFAGGINST = (symbolDictionaryFlags >> 7) & 0x0001;
parameters.isArithmeticCodingStateUsed = (symbolDictionaryFlags >> 8) & 0x0001;
parameters.isArithmeticCodingStateRetained = (symbolDictionaryFlags >> 9) & 0x0001;
parameters.SDTEMPLATE = (symbolDictionaryFlags >> 10) & 0x0003;
parameters.SDRTEMPLATE = (symbolDictionaryFlags >> 12) & 0x0001;
parameters.SDAT = readATTemplatePixelPositions((parameters.SDHUFF == 0) ? ((parameters.SDTEMPLATE == 0) ? 4 : 1) : 0);
parameters.SDRAT = readATTemplatePixelPositions((parameters.SDREFAGG == 1 && parameters.SDRTEMPLATE == 0) ? 2 : 0);
parameters.SDNUMEXSYMS = m_reader.readUnsignedInt();
parameters.SDNUMNEWSYMS = m_reader.readUnsignedInt();
/* sanity checks */
if ((symbolDictionaryFlags >> 13) != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid flags for symbol dictionary segment."));
}
if (!parameters.SDHUFF || !parameters.SDREFAGG)
{
if (parameters.SDHUFFAGGINST != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid flags for symbol dictionary segment."));
}
}
if (!parameters.SDHUFF)
{
if (parameters.SDHUFFDH != 0 || parameters.SDHUFFDH != 0 || parameters.SDHUFFBMSIZE != 0 || parameters.SDHUFFAGGINST != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid flags for symbol dictionary segment."));
}
}
else
{
if (!parameters.SDREFAGG && (parameters.isArithmeticCodingStateUsed || parameters.isArithmeticCodingStateRetained || parameters.SDRTEMPLATE != 0))
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid flags for symbol dictionary segment."));
}
if (parameters.SDTEMPLATE != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid flags for symbol dictionary segment."));
}
}
/* 7.4.2.2 step 2) */
PDFJBIG2ReferencedSegments references = getReferencedSegments(header);
for (const PDFJBIG2SymbolDictionary* dictionary : references.symbolDictionaries)
{
const std::vector<PDFJBIG2Bitmap>& bitmaps = dictionary->getBitmaps();
parameters.SDINSYMS.reserve(parameters.SDINSYMS.size() + bitmaps.size());
for (const PDFJBIG2Bitmap& bitmap : bitmaps)
{
parameters.SDINSYMS.push_back(&bitmap);
}
}
parameters.SDNUMINSYMS = static_cast<uint32_t>(parameters.SDINSYMS.size());
/* 7.4.2.1.6 - huffman table selection */
if (parameters.SDHUFF)
{
size_t currentUserCodeTableIndex = 0;
auto getUserTable = [&](void) -> PDFJBIG2HuffmanDecoder
{
if (currentUserCodeTableIndex < references.codeTables.size())
{
return PDFJBIG2HuffmanDecoder(&m_reader, references.codeTables[currentUserCodeTableIndex++]);
}
else
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid user huffman code table."));
}
return PDFJBIG2HuffmanDecoder();
};
switch (parameters.SDHUFFDH)
{
case 0:
parameters.SDHUFFDH_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_D), std::end(PDFJBIG2StandardHuffmanTable_D));
break;
case 1:
parameters.SDHUFFDH_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_E), std::end(PDFJBIG2StandardHuffmanTable_E));
break;
case 3:
parameters.SDHUFFDH_Decoder = getUserTable();
break;
default:
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid user huffman code table."));
}
switch (parameters.SDHUFFDW)
{
case 0:
parameters.SDHUFFDW_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_B), std::end(PDFJBIG2StandardHuffmanTable_B));
break;
case 1:
parameters.SDHUFFDW_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_C), std::end(PDFJBIG2StandardHuffmanTable_C));
break;
case 3:
parameters.SDHUFFDW_Decoder = getUserTable();
break;
default:
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid user huffman code table."));
}
switch (parameters.SDHUFFBMSIZE)
{
case 0:
parameters.SDHUFFBMSIZE_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_A), std::end(PDFJBIG2StandardHuffmanTable_A));
break;
case 1:
parameters.SDHUFFBMSIZE_Decoder = getUserTable();
break;
default:
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid user huffman code table."));
}
switch (parameters.SDHUFFAGGINST)
{
case 0:
parameters.SDHUFFAGGINST_Decoder = PDFJBIG2HuffmanDecoder(&m_reader, std::begin(PDFJBIG2StandardHuffmanTable_A), std::end(PDFJBIG2StandardHuffmanTable_A));
break;
case 1:
parameters.SDHUFFAGGINST_Decoder = getUserTable();
break;
default:
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid user huffman code table."));
}
if (currentUserCodeTableIndex != references.codeTables.size())
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid number of huffam code table - %1 unused.").arg(references.codeTables.size() - currentUserCodeTableIndex));
}
}
else
{
/* 7.4.2.2 step 3) and 4) - initialize arithmetic encoder */
if (parameters.isArithmeticCodingStateUsed)
{
if (references.symbolDictionaries.empty())
{
throw PDFException(PDFTranslationContext::tr("JBIG2 trying to use aritmetic decoder context from previous symbol dictionary, but it doesn't exist."));
}
resetArithmeticStatesGeneric(parameters.SDTEMPLATE, &references.symbolDictionaries.back()->getGenericState());
}
else
{
resetArithmeticStatesGeneric(parameters.SDTEMPLATE, nullptr);
}
if (parameters.SDREFAGG)
{
if (parameters.isArithmeticCodingStateUsed)
{
if (references.symbolDictionaries.empty())
{
throw PDFException(PDFTranslationContext::tr("JBIG2 trying to use aritmetic decoder context from previous symbol dictionary, but it doesn't exist."));
}
resetArithmeticStatesGenericRefinement(parameters.SDRTEMPLATE, &references.symbolDictionaries.back()->getGenericRefinementState());
}
else
{
resetArithmeticStatesGenericRefinement(parameters.SDRTEMPLATE, nullptr);
}
}
}
PDFJBIG2ArithmeticDecoder decoder(&m_reader);
PDFJBIG2ArithmeticDecoderState IADH;
PDFJBIG2ArithmeticDecoderState IADW;
if (!parameters.SDHUFF)
{
decoder.initialize();
IADH.reset(9);
IADW.reset(9);
}
/* 6.5.5 - algorithm for decoding symbol dictionary */
/* 6.5.5 step 1) - create output bitmaps */
parameters.SDNEWSYMS.resize(parameters.SDNUMNEWSYMS);
/* 6.5.5 step 2) - initalize width array */
if (parameters.SDHUFF == 1 && parameters.SDREFAGG == 0)
{
parameters.SDNEWSYMWIDTHS.resize(parameters.SDNUMNEWSYMS, 0);
}
/* 6.5.5 step 3) - initalize variables to zero */
uint32_t HCHEIGHT = 0;
uint32_t NSYMSDECODED = 0;
/* 6.5.5 step 4) - read all bitmaps */
while (NSYMSDECODED < parameters.SDNUMNEWSYMS)
{
/* 6.5.5 step 4) b) - decode height class delta height according to 6.5.6 */
int32_t HCDH = checkInteger(parameters.SDHUFF ? parameters.SDHUFFDH_Decoder.readSignedInteger() : decoder.getSignedInteger(&IADH));
HCHEIGHT += HCDH;
uint32_t SYMWIDTH = 0;
uint32_t TOTWIDTH = 0;
uint32_t HCFIRSTSYM = NSYMSDECODED;
/* 6.5.5 step 4) c) - read height class */
while (NSYMSDECODED < parameters.SDNUMNEWSYMS)
{
/* 6.5.5 step 4) c) i) - Delta width acc. to 6.5.7 */
std::optional<int32_t> DW = parameters.SDHUFF ? parameters.SDHUFFDW_Decoder.readSignedInteger() : decoder.getSignedInteger(&IADW);
if (!DW.has_value())
{
// All symbols of this height class have been decoded
break;
}
if (NSYMSDECODED >= parameters.SDNUMNEWSYMS)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 symbol height class has more symbols, than defined in the symbol dictionary header."));
}
SYMWIDTH += *DW;
TOTWIDTH += SYMWIDTH;
if (parameters.SDHUFF == 0 || parameters.SDREFAGG == 1)
{
/* 6.5.5 step 4) c) ii) - read bitmap acc. to 6.5.8 */
// TODO: JBIG2 read bitmap
}
else
{
/* 6.5.5 step 4) c) iii) - update value of widths */
parameters.SDNEWSYMWIDTHS[NSYMSDECODED] = SYMWIDTH;
}
/* 6.5.5 step 4) c) iv) - update decoded symbols counter */
++NSYMSDECODED;
}
/* 6.5.5 step 4) d) - create collective bitmap (if it does exist) */
// TODO: JBIG2 - create collective bitmap
}
/* 6.5.5 step 5) - determine exports */
// TODO: JBIG2 - dodelat
} }
void PDFJBIG2Decoder::processTextRegion(const PDFJBIG2SegmentHeader& header) void PDFJBIG2Decoder::processTextRegion(const PDFJBIG2SegmentHeader& header)
@ -873,7 +1208,7 @@ void PDFJBIG2Decoder::processGenericRegion(const PDFJBIG2SegmentHeader& header)
{ {
// We will use arithmetic coding, read template pixels and reset arithmetic coder state // We will use arithmetic coding, read template pixels and reset arithmetic coder state
parameters.ATXY = readATTemplatePixelPositions((parameters.GBTEMPLATE == 0) ? 4 : 1); parameters.ATXY = readATTemplatePixelPositions((parameters.GBTEMPLATE == 0) ? 4 : 1);
resetArithmeticStatesGeneric(parameters.GBTEMPLATE); resetArithmeticStatesGeneric(parameters.GBTEMPLATE, nullptr);
} }
// Determine segment data length // Determine segment data length
@ -988,7 +1323,7 @@ void PDFJBIG2Decoder::processGenericRefinementRegion(const PDFJBIG2SegmentHeader
throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid referred bitmap size [%1 x %2] instead of [%3 x %4] for generic refinement region.").arg(GRREFERENCE.getWidth()).arg(GRREFERENCE.getHeight()).arg(field.width).arg(field.height)); throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid referred bitmap size [%1 x %2] instead of [%3 x %4] for generic refinement region.").arg(GRREFERENCE.getWidth()).arg(GRREFERENCE.getHeight()).arg(field.width).arg(field.height));
} }
resetArithmeticStatesGenericRefinement(GRTEMPLATE); resetArithmeticStatesGenericRefinement(GRTEMPLATE, nullptr);
PDFJBIG2BitmapRefinementDecodingParameters parameters; PDFJBIG2BitmapRefinementDecodingParameters parameters;
parameters.GRTEMPLATE = GRTEMPLATE; parameters.GRTEMPLATE = GRTEMPLATE;
@ -1594,7 +1929,7 @@ PDFJBIG2ATPositions PDFJBIG2Decoder::readATTemplatePixelPositions(int count)
return result; return result;
} }
void PDFJBIG2Decoder::resetArithmeticStatesGeneric(const uint8_t templateMode) void PDFJBIG2Decoder::resetArithmeticStatesGeneric(const uint8_t templateMode, const PDFJBIG2ArithmeticDecoderState* state)
{ {
uint8_t bits = 0; uint8_t bits = 0;
switch (templateMode) switch (templateMode)
@ -1617,10 +1952,17 @@ void PDFJBIG2Decoder::resetArithmeticStatesGeneric(const uint8_t templateMode)
break; break;
} }
if (!state)
{
m_arithmeticDecoderStates[Generic].reset(bits); m_arithmeticDecoderStates[Generic].reset(bits);
}
else
{
m_arithmeticDecoderStates[Generic].reset(bits, *state);
}
} }
void PDFJBIG2Decoder::resetArithmeticStatesGenericRefinement(const uint8_t templateMode) void PDFJBIG2Decoder::resetArithmeticStatesGenericRefinement(const uint8_t templateMode, const PDFJBIG2ArithmeticDecoderState* state)
{ {
uint8_t bits = 0; uint8_t bits = 0;
switch (templateMode) switch (templateMode)
@ -1638,7 +1980,14 @@ void PDFJBIG2Decoder::resetArithmeticStatesGenericRefinement(const uint8_t templ
break; break;
} }
if (!state)
{
m_arithmeticDecoderStates[Refinement].reset(bits); m_arithmeticDecoderStates[Refinement].reset(bits);
}
else
{
m_arithmeticDecoderStates[Refinement].reset(bits, *state);
}
} }
void PDFJBIG2Decoder::skipSegment(const PDFJBIG2SegmentHeader& header) void PDFJBIG2Decoder::skipSegment(const PDFJBIG2SegmentHeader& header)
@ -1653,6 +2002,42 @@ void PDFJBIG2Decoder::skipSegment(const PDFJBIG2SegmentHeader& header)
} }
} }
PDFJBIG2ReferencedSegments PDFJBIG2Decoder::getReferencedSegments(const PDFJBIG2SegmentHeader& header) const
{
PDFJBIG2ReferencedSegments segments;
for (const uint32_t referredSegmentId : header.getReferredSegments())
{
auto it = m_segments.find(referredSegmentId);
if (it != m_segments.cend() && it->second)
{
const PDFJBIG2Segment* referredSegment = it->second.get();
if (const PDFJBIG2Bitmap* bitmap = referredSegment->asBitmap())
{
segments.bitmaps.push_back(bitmap);
}
else if (const PDFJBIG2HuffmanCodeTable* huffmanCodeTable = referredSegment->asHuffmanCodeTable())
{
segments.codeTables.push_back(huffmanCodeTable);
}
else if (const PDFJBIG2SymbolDictionary* symbolDictionary = referredSegment->asSymbolDictionary())
{
segments.symbolDictionaries.push_back(symbolDictionary);
}
else
{
Q_ASSERT(false);
}
}
else
{
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid referred segment %1 referenced by segment %2.").arg(referredSegmentId).arg(header.getSegmentNumber()));
}
}
return segments;
}
void PDFJBIG2Decoder::checkBitmapSize(const uint32_t size) void PDFJBIG2Decoder::checkBitmapSize(const uint32_t size)
{ {
if (size > MAX_BITMAP_SIZE) if (size > MAX_BITMAP_SIZE)
@ -1679,6 +2064,20 @@ void PDFJBIG2Decoder::checkRegionSegmentInformationField(const PDFJBIG2RegionSeg
} }
} }
int32_t PDFJBIG2Decoder::checkInteger(std::optional<int32_t> value)
{
if (value.has_value())
{
return *value;
}
else
{
throw PDFException(PDFTranslationContext::tr("JBIG2 can't read integer."));
}
return 0;
}
PDFJBIG2Bitmap::PDFJBIG2Bitmap() : PDFJBIG2Bitmap::PDFJBIG2Bitmap() :
m_width(0), m_width(0),
m_height(0) m_height(0)
@ -1859,4 +2258,76 @@ PDFJBIG2Segment::~PDFJBIG2Segment()
} }
PDFJBIG2HuffmanDecoder::PDFJBIG2HuffmanDecoder(PDFBitReader* reader, const PDFJBIG2HuffmanCodeTable* table) :
m_reader(reader)
{
m_entries = table->getEntries();
if (!m_entries.empty())
{
m_begin = m_entries.data();
m_end = m_entries.data() + m_entries.size();
}
}
bool PDFJBIG2HuffmanDecoder::isOutOfBandSupported() const
{
if (!isValid())
{
return false;
}
for (auto it = m_begin; it != m_end; ++it)
{
if (it->isOutOfBand())
{
return true;
}
}
return false;
}
std::optional<int32_t> PDFJBIG2HuffmanDecoder::readSignedInteger()
{
uint32_t prefixBitCount = 0;
uint32_t prefix = 0;
for (const PDFJBIG2HuffmanTableEntry* it = m_begin; it != m_end; ++it)
{
// Align prefix with current bit value
Q_ASSERT(prefixBitCount <= it->prefixBitLength);
while (prefixBitCount < it->prefixBitLength)
{
prefix = (prefix << 1) | m_reader->read(1);
++prefixBitCount;
}
if (prefix == it->prefix)
{
// We have found value. Now, there are three cases:
// 1) Out of band value
// 2) Negative value
// 3) Standard value
if (it->isOutOfBand())
{
return std::nullopt;
}
else if (it->isLowValue())
{
return it->value - m_reader->read(32);
}
else if (it->rangeBitLength == 0)
{
return it->value;
}
else
{
return it->value + m_reader->read(it->rangeBitLength);
}
}
}
return std::nullopt;
}
} // namespace pdf } // namespace pdf

View File

@ -21,11 +21,14 @@
#include "pdfutils.h" #include "pdfutils.h"
#include "pdfcolorspaces.h" #include "pdfcolorspaces.h"
#include <optional>
namespace pdf namespace pdf
{ {
class PDFJBIG2Bitmap; class PDFJBIG2Bitmap;
class PDFRenderErrorReporter; class PDFRenderErrorReporter;
class PDFJBIG2HuffmanCodeTable; class PDFJBIG2HuffmanCodeTable;
class PDFJBIG2SymbolDictionary;
struct PDFJBIG2HuffmanTableEntry; struct PDFJBIG2HuffmanTableEntry;
@ -39,6 +42,29 @@ enum class PDFJBIG2BitOperation
Replace Replace
}; };
struct PDFJBIG2HuffmanTableEntry
{
enum class Type : uint8_t
{
Standard,
Negative,
OutOfBand
};
/// Returns true, if current row represents interval (-∞, value),
/// it means 32bit number must be read and
bool isLowValue() const { return type == Type::Negative; }
/// Returns true, if current row represents out-of-band value
bool isOutOfBand() const { return type == Type::OutOfBand; }
int32_t value = 0; ///< Base value
uint16_t prefixBitLength = 0; ///< Bit length of prefix
uint16_t rangeBitLength = 0; ///< Bit length of additional value
uint16_t prefix = 0; ///< Bit prefix of the huffman code
Type type = Type::Standard; ///< Type of the value
};
/// Arithmetic decoder state for JBIG2 data streams. It contains state for context, /// Arithmetic decoder state for JBIG2 data streams. It contains state for context,
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used /// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
/// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit /// to store Qe value index (current row in the table, number 0-46), and lowest 1 bit
@ -64,6 +90,15 @@ public:
} }
} }
/// Resets the context using another context
inline void reset(const uint8_t bits, const PDFJBIG2ArithmeticDecoderState& other)
{
reset(bits);
const size_t size = qMin(m_state.size(), other.m_state.size());
std::copy(other.m_state.begin(), other.m_state.begin() + size, m_state.begin());
}
/// Returns row index to Qe value table, according to document ISO/IEC 14492:2001, /// Returns row index to Qe value table, according to document ISO/IEC 14492:2001,
/// annex E, table E.1 (Qe values and probability estimation process). /// annex E, table E.1 (Qe values and probability estimation process).
inline uint8_t getQeRowIndex(size_t context) const inline uint8_t getQeRowIndex(size_t context) const
@ -121,6 +156,9 @@ public:
uint32_t getRegisterA() const { return m_a; } uint32_t getRegisterA() const { return m_a; }
uint32_t getRegisterCT() const { return m_ct; } uint32_t getRegisterCT() const { return m_ct; }
int32_t getIAID(uint32_t size, PDFJBIG2ArithmeticDecoderState* state);
std::optional<int32_t> getSignedInteger(PDFJBIG2ArithmeticDecoderState* state);
private: private:
/// Performs INITDEC operation as described in the specification /// Performs INITDEC operation as described in the specification
void perform_INITDEC(); void perform_INITDEC();
@ -223,13 +261,15 @@ public:
virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const { return nullptr; } virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const { return nullptr; }
virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() { return nullptr; } virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() { return nullptr; }
virtual const PDFJBIG2SymbolDictionary* asSymbolDictionary() const { return nullptr; }
virtual PDFJBIG2SymbolDictionary* asSymbolDictionary() { return nullptr; }
}; };
class PDFJBIG2HuffmanCodeTable : public PDFJBIG2Segment class PDFJBIG2HuffmanCodeTable : public PDFJBIG2Segment
{ {
public: public:
explicit PDFJBIG2HuffmanCodeTable(std::vector<PDFJBIG2HuffmanTableEntry>&& entries); explicit PDFJBIG2HuffmanCodeTable(std::vector<PDFJBIG2HuffmanTableEntry>&& entries);
virtual ~PDFJBIG2HuffmanCodeTable(); virtual ~PDFJBIG2HuffmanCodeTable();
virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const override { return this; } virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const override { return this; }
@ -246,6 +286,43 @@ private:
std::vector<PDFJBIG2HuffmanTableEntry> m_entries; std::vector<PDFJBIG2HuffmanTableEntry> m_entries;
}; };
/// Huffman decoder - can decode integers / out of band values from huffman table.
class PDFJBIG2HuffmanDecoder
{
public:
explicit inline PDFJBIG2HuffmanDecoder() = default;
/// Constructs huffman decoder from static tables, so no memory are allocated (vector is empty)
explicit inline PDFJBIG2HuffmanDecoder(PDFBitReader* reader, const PDFJBIG2HuffmanTableEntry* begin, const PDFJBIG2HuffmanTableEntry* end) :
m_reader(reader),
m_begin(begin),
m_end(end)
{
}
/// Constructs huffman decoder from huffman code table, in this case, memory is allocated
explicit PDFJBIG2HuffmanDecoder(PDFBitReader* reader, const PDFJBIG2HuffmanCodeTable* table);
/// Returns true, if huffman table is valid (and usable)
bool isValid() const { return m_begin != m_end; }
/// Returns true, if huffman table has out-of-band value
bool isOutOfBandSupported() const;
/// Tries to read signed integer using the table and current reader.
/// \returns Integer, or out-of-band value, using the std::optional semantics
std::optional<int32_t> readSignedInteger();
private:
/// Data source to read from
PDFBitReader* m_reader = nullptr;
const PDFJBIG2HuffmanTableEntry* m_begin = nullptr;
const PDFJBIG2HuffmanTableEntry* m_end = nullptr;
std::vector<PDFJBIG2HuffmanTableEntry> m_entries;
};
class PDFFORQTLIBSHARED_EXPORT PDFJBIG2Bitmap : public PDFJBIG2Segment class PDFFORQTLIBSHARED_EXPORT PDFJBIG2Bitmap : public PDFJBIG2Segment
{ {
public: public:
@ -309,6 +386,40 @@ private:
std::vector<uint8_t> m_data; std::vector<uint8_t> m_data;
}; };
class PDFJBIG2SymbolDictionary : public PDFJBIG2Segment
{
public:
explicit inline PDFJBIG2SymbolDictionary() = default;
explicit inline PDFJBIG2SymbolDictionary(std::vector<PDFJBIG2Bitmap>&& bitmaps,
PDFJBIG2ArithmeticDecoderState&& genericState,
PDFJBIG2ArithmeticDecoderState&& genericRefinementState) :
m_bitmaps(qMove(bitmaps)),
m_genericState(qMove(genericState)),
m_genericRefinementState(qMove(genericRefinementState))
{
}
virtual const PDFJBIG2SymbolDictionary* asSymbolDictionary() const override { return this; }
virtual PDFJBIG2SymbolDictionary* asSymbolDictionary() override { return this; }
const std::vector<PDFJBIG2Bitmap>& getBitmaps() const { return m_bitmaps; }
const PDFJBIG2ArithmeticDecoderState& getGenericState() const { return m_genericState; }
const PDFJBIG2ArithmeticDecoderState& getGenericRefinementState() const { return m_genericRefinementState; }
private:
std::vector<PDFJBIG2Bitmap> m_bitmaps;
PDFJBIG2ArithmeticDecoderState m_genericState;
PDFJBIG2ArithmeticDecoderState m_genericRefinementState;
};
struct PDFJBIG2ReferencedSegments
{
std::vector<const PDFJBIG2Bitmap*> bitmaps;
std::vector<const PDFJBIG2HuffmanCodeTable*> codeTables;
std::vector<const PDFJBIG2SymbolDictionary*> symbolDictionaries;
};
/// Region segment information field, see chapter 7.4.1 in the specification /// Region segment information field, see chapter 7.4.1 in the specification
struct PDFJBIG2RegionSegmentInformationField struct PDFJBIG2RegionSegmentInformationField
{ {
@ -391,6 +502,71 @@ struct PDFJBIG2BitmapRefinementDecodingParameters
PDFJBIG2ATPositions GRAT = { }; PDFJBIG2ATPositions GRAT = { };
}; };
/// Info structure for symbol dictionary decoding procedure
struct PDFJBIG2SymbolDictionaryDecodingParameters
{
/// If true, huffman encoding is used to decode dictionary,
/// otherwise arithmetic decoding is used to decode dictionary.
bool SDHUFF = false;
/// If true, each symbol is refinement/aggregate. If false,
/// then symbols are ordinary bitmaps.
bool SDREFAGG = false;
/// Table selector for huffman table encoding (height)
uint8_t SDHUFFDH = 0;
/// Table selector for huffman table encoding (width)
uint8_t SDHUFFDW = 0;
/// Table selector for huffman table encoding
uint8_t SDHUFFBMSIZE = 0;
/// Table selector for huffman table encoding
uint8_t SDHUFFAGGINST = 0;
/// Is statistics for arithmetic coding used from previous symbol dictionary?
bool isArithmeticCodingStateUsed = false;
/// Is statistics for arithmetic coding symbols retained for future use?
bool isArithmeticCodingStateRetained = false;
/// Template for decoding
uint8_t SDTEMPLATE = 0;
/// Template for decoding refinements
uint8_t SDRTEMPLATE = 0;
/// Adaptative pixel positions
PDFJBIG2ATPositions SDAT = { };
/// Adaptative pixel positions
PDFJBIG2ATPositions SDRAT = { };
/// Number of exported symbols
uint32_t SDNUMEXSYMS = 0;
/// Number of new symbols
uint32_t SDNUMNEWSYMS = 0;
PDFJBIG2HuffmanDecoder SDHUFFDH_Decoder;
PDFJBIG2HuffmanDecoder SDHUFFDW_Decoder;
PDFJBIG2HuffmanDecoder SDHUFFBMSIZE_Decoder;
PDFJBIG2HuffmanDecoder SDHUFFAGGINST_Decoder;
/// Input bitmaps
std::vector<const PDFJBIG2Bitmap*> SDINSYMS;
/// Number of input bitmaps
uint32_t SDNUMINSYMS = 0;
/// Output bitmaps
std::vector<PDFJBIG2Bitmap> SDNEWSYMS;
/// Widths
std::vector<int32_t> SDNEWSYMWIDTHS;
};
/// Decoder of JBIG2 data streams. Decodes the black/white monochrome image. /// Decoder of JBIG2 data streams. Decodes the black/white monochrome image.
/// Handles also global segments. Decoder decodes data using the specification /// Handles also global segments. Decoder decodes data using the specification
/// ISO/IEC 14492:2001, T.88. /// ISO/IEC 14492:2001, T.88.
@ -478,15 +654,26 @@ private:
PDFJBIG2ATPositions readATTemplatePixelPositions(int count); PDFJBIG2ATPositions readATTemplatePixelPositions(int count);
/// Reset arithmetic decoder stats for generic /// Reset arithmetic decoder stats for generic
void resetArithmeticStatesGeneric(const uint8_t templateMode); /// \param templateMode Template mode
/// \param state State to copy from (can be nullptr)
void resetArithmeticStatesGeneric(const uint8_t templateMode, const PDFJBIG2ArithmeticDecoderState* state);
/// Reset arithmetic decoder stats for generic refinement /// Reset arithmetic decoder stats for generic refinement
void resetArithmeticStatesGenericRefinement(const uint8_t templateMode); /// \param templateMode Template mode
/// \param state State to copy from (can be nullptr)
void resetArithmeticStatesGenericRefinement(const uint8_t templateMode, const PDFJBIG2ArithmeticDecoderState* state);
/// Skip segment data
void skipSegment(const PDFJBIG2SegmentHeader& header); void skipSegment(const PDFJBIG2SegmentHeader& header);
/// Returns structure containing referenced segments. If segment numbers
/// are wrong, or invalid segments appears, then exception is thrown.
/// \param header Header, from which referred segments are read
PDFJBIG2ReferencedSegments getReferencedSegments(const PDFJBIG2SegmentHeader& header) const;
static void checkBitmapSize(const uint32_t size); static void checkBitmapSize(const uint32_t size);
static void checkRegionSegmentInformationField(const PDFJBIG2RegionSegmentInformationField& field); static void checkRegionSegmentInformationField(const PDFJBIG2RegionSegmentInformationField& field);
static int32_t checkInteger(std::optional<int32_t> value);
QByteArray m_data; QByteArray m_data;
QByteArray m_globalData; QByteArray m_globalData;