diff --git a/PdfForQtLib/sources/pdfjbig2decoder.cpp b/PdfForQtLib/sources/pdfjbig2decoder.cpp index aa1bee7..f8b6ae5 100644 --- a/PdfForQtLib/sources/pdfjbig2decoder.cpp +++ b/PdfForQtLib/sources/pdfjbig2decoder.cpp @@ -22,6 +22,72 @@ namespace pdf { +class PDFJBIG2HuffmanCodeTable : public PDFJBIG2Segment +{ +public: + explicit PDFJBIG2HuffmanCodeTable(std::vector&& entries); + virtual ~PDFJBIG2HuffmanCodeTable(); + + virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const override { return this; } + virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() override { return this; } + + const std::vector& getEntries() const { return m_entries; } + + /// Builds prefixes using algorithm in annex B.3 of specification. Unused rows are removed. + /// Rows are sorted according the criteria. Prefixes are then filled. + /// \param entries Entries for building the table + static std::vector buildPrefixes(const std::vector& entries); + +private: + std::vector m_entries; +}; + +class PDFJBIG2SymbolDictionary : public PDFJBIG2Segment +{ +public: + explicit inline PDFJBIG2SymbolDictionary() = default; + explicit inline PDFJBIG2SymbolDictionary(std::vector&& bitmaps, + PDFJBIG2ArithmeticDecoderState&& genericState, + PDFJBIG2ArithmeticDecoderState&& genericRefinementState) : + m_bitmaps(qMove(bitmaps)), + m_genericState(qMove(genericState)), + m_genericRefinementState(qMove(genericRefinementState)) + { + + } + + virtual const PDFJBIG2SymbolDictionary* asSymbolDictionary() const override { return this; } + virtual PDFJBIG2SymbolDictionary* asSymbolDictionary() override { return this; } + + const std::vector& getBitmaps() const { return m_bitmaps; } + const PDFJBIG2ArithmeticDecoderState& getGenericState() const { return m_genericState; } + const PDFJBIG2ArithmeticDecoderState& getGenericRefinementState() const { return m_genericRefinementState; } + +private: + std::vector m_bitmaps; + PDFJBIG2ArithmeticDecoderState m_genericState; + PDFJBIG2ArithmeticDecoderState m_genericRefinementState; +}; + +class PDFJBIG2PatternDictionary : public PDFJBIG2Segment +{ +public: + explicit inline PDFJBIG2PatternDictionary() = default; + explicit inline PDFJBIG2PatternDictionary(std::vector&& bitmaps) : + m_bitmaps(qMove(bitmaps)) + { + + } + + virtual const PDFJBIG2PatternDictionary* asPatternDictionary() const override { return this; } + virtual PDFJBIG2PatternDictionary* asPatternDictionary() override { return this; } + + const std::vector& getBitmaps() const { return m_bitmaps; } + +private: + std::vector m_bitmaps; +}; + /// Structure containing arithmetic decoder states struct PDFJBIG2ArithmeticDecoderStates { @@ -300,6 +366,72 @@ struct PDFJBIG2BitmapRefinementDecodingParameters PDFJBIG2ArithmeticDecoder* decoder = nullptr; }; +/// Info structure for symbol dictionary decoding procedure +struct PDFJBIG2SymbolDictionaryDecodingParameters +{ + /// If true, huffman encoding is used to decode dictionary, + /// otherwise arithmetic decoding is used to decode dictionary. + bool SDHUFF = false; + + /// If true, each symbol is refinement/aggregate. If false, + /// then symbols are ordinary bitmaps. + bool SDREFAGG = false; + + /// Table selector for huffman table encoding (height) + uint8_t SDHUFFDH = 0; + + /// Table selector for huffman table encoding (width) + uint8_t SDHUFFDW = 0; + + /// Table selector for huffman table encoding + uint8_t SDHUFFBMSIZE = 0; + + /// Table selector for huffman table encoding + uint8_t SDHUFFAGGINST = 0; + + /// Is statistics for arithmetic coding used from previous symbol dictionary? + bool isArithmeticCodingStateUsed = false; + + /// Is statistics for arithmetic coding symbols retained for future use? + bool isArithmeticCodingStateRetained = false; + + /// Template for decoding + uint8_t SDTEMPLATE = 0; + + /// Template for decoding refinements + uint8_t SDRTEMPLATE = 0; + + /// Adaptative pixel positions + PDFJBIG2ATPositions SDAT = { }; + + /// Adaptative pixel positions + PDFJBIG2ATPositions SDRAT = { }; + + /// Number of exported symbols + uint32_t SDNUMEXSYMS = 0; + + /// Number of new symbols + uint32_t SDNUMNEWSYMS = 0; + + PDFJBIG2HuffmanDecoder SDHUFFDH_Decoder; + PDFJBIG2HuffmanDecoder SDHUFFDW_Decoder; + PDFJBIG2HuffmanDecoder SDHUFFBMSIZE_Decoder; + PDFJBIG2HuffmanDecoder SDHUFFAGGINST_Decoder; + PDFJBIG2HuffmanDecoder EXRUNLENGTH_Decoder; + + /// Input bitmaps + std::vector SDINSYMS; + + /// Number of input bitmaps + uint32_t SDNUMINSYMS = 0; + + /// Output bitmaps + std::vector SDNEWSYMS; + + /// Widths + std::vector SDNEWSYMWIDTHS; +}; + static constexpr PDFJBIG2HuffmanTableEntry PDFJBIG2StandardHuffmanTable_A[] = { { 0, 1, 4, 0b0, PDFJBIG2HuffmanTableEntry::Type::Standard}, @@ -2024,12 +2156,118 @@ void PDFJBIG2Decoder::processTextRegion(const PDFJBIG2SegmentHeader& header) void PDFJBIG2Decoder::processPatternDictionary(const PDFJBIG2SegmentHeader& header) { - // TODO: JBIG2 - processPatternDictionary - throw PDFException(PDFTranslationContext::tr("JBIG2 NOT IMPLEMENTED.")); + const int segmentStartPosition = m_reader.getPosition(); + const uint8_t flags = m_reader.readUnsignedByte(); + const uint8_t HDPW = m_reader.readUnsignedByte(); + const uint8_t HDPH = m_reader.readUnsignedByte(); + const uint32_t GRAYMAX = m_reader.readUnsignedInt(); + const bool HDMMR = flags & 0x01; + const uint8_t HDTEMPLATE = (flags >> 1) &0x03; + + if ((flags & 0b11111000) != 0) + { + throw PDFException(PDFTranslationContext::tr("JBIG2 invalid pattern dictionary flags.")); + } + + QByteArray mmrData; + PDFJBIG2ArithmeticDecoder arithmeticDecoder(&m_reader); + PDFJBIG2ArithmeticDecoderState genericState; + if (!HDMMR) + { + arithmeticDecoder.initialize(); + PDFJBIG2ArithmeticDecoderStates::resetArithmeticStatesGeneric(&genericState, HDTEMPLATE, nullptr); + } + else + { + // Determine segment data length + const int segmentDataStartPosition = m_reader.getPosition(); + const int segmentHeaderBytes = segmentDataStartPosition - segmentStartPosition; + if (header.isSegmentDataLengthDefined()) + { + int segmentDataBytes = header.getSegmentDataLength() - segmentHeaderBytes; + mmrData = m_reader.readSubstream(segmentDataBytes); + } + else + { + throw PDFException(PDFTranslationContext::tr("JBIG2 unknown data length for pattern dictionary.")); + } + } + + PDFJBIG2BitmapDecodingParameters parameters; + parameters.MMR = HDMMR; + parameters.GBW = (GRAYMAX + 1) * HDPW; + parameters.GBH = HDPH; + parameters.GBTEMPLATE = HDTEMPLATE; + parameters.TPGDON = false; + parameters.SKIP = nullptr; + parameters.GBAT[0] = { -static_cast(HDPW), 0 }; + parameters.GBAT[1] = { -3, -1 }; + parameters.GBAT[2] = { 2, -2 }; + parameters.GBAT[3] = { -2, -2 }; + parameters.arithmeticDecoder = &arithmeticDecoder; + parameters.arithmeticDecoderState = &genericState; + parameters.data = qMove(mmrData); + + PDFJBIG2Bitmap collectiveBitmap = readBitmap(parameters); + + if (!HDMMR) + { + arithmeticDecoder.finalize(); + } + + if (collectiveBitmap.getWidth() != parameters.GBW || collectiveBitmap.getHeight() != parameters.GBH) + { + throw PDFException(PDFTranslationContext::tr("JBIG2 invalid pattern dictionary collective bitmap.")); + } + + std::vector bitmaps; + bitmaps.reserve(GRAYMAX + 1); + + int offsetX = 0; + for (uint32_t i = 0; i <= GRAYMAX; ++i) + { + bitmaps.push_back(collectiveBitmap.getSubbitmap(offsetX, 0, HDPW, HDPH)); + offsetX += HDPW; + } + + m_segments[header.getSegmentNumber()] = std::make_unique(qMove(bitmaps)); } void PDFJBIG2Decoder::processHalftoneRegion(const PDFJBIG2SegmentHeader& header) { + PDFJBIG2RegionSegmentInformationField field = readRegionSegmentInformationField(); + const uint8_t flags = m_reader.readUnsignedByte(); + const bool HMMR = flags & 0x01; + const uint8_t HTEMPLATE = (flags >> 1) & 0x03; + const bool HENABLESKIP = flags & 0x08; + const uint8_t HCOMBOOP = (flags >> 4) & 0x07; + const uint8_t HDEFPIXEL = (flags >> 7) & 0x01; + const uint32_t HGW = m_reader.readUnsignedInt(); + const uint32_t HGH = m_reader.readUnsignedInt(); + const uint32_t HGX = m_reader.readSignedInt(); + const uint32_t HGY = m_reader.readSignedInt(); + const uint16_t HRX = m_reader.readUnsignedWord(); + const uint16_t HRY = m_reader.readUnsignedWord(); + + PDFJBIG2ReferencedSegments references = getReferencedSegments(header); + if (references.patternDictionaries.size() != 1) + { + throw PDFException(PDFTranslationContext::tr("JBIG2 invalid referenced pattern dictionaries for halftone segment.")); + } + + std::vector HPATS = references.getPatternBitmaps(); + const uint32_t HNUMPATS = static_cast(HPATS.size()); + + if (!HNUMPATS) + { + throw PDFException(PDFTranslationContext::tr("JBIG2 invalid patterns for halftone segment.")); + } + + const PDFJBIG2Bitmap* firstBitmap = HPATS.front(); + const int HPW = firstBitmap->getWidth(); + const int HPH = firstBitmap->getHeight(); + + // TODO: JBIG2 - processHalftoneRegion throw PDFException(PDFTranslationContext::tr("JBIG2 NOT IMPLEMENTED.")); } @@ -3038,6 +3276,10 @@ PDFJBIG2ReferencedSegments PDFJBIG2Decoder::getReferencedSegments(const PDFJBIG2 { segments.symbolDictionaries.push_back(symbolDictionary); } + else if (const PDFJBIG2PatternDictionary* patternDictionary = referredSegment->asPatternDictionary()) + { + segments.patternDictionaries.push_back(patternDictionary); + } else { Q_ASSERT(false); @@ -3377,6 +3619,23 @@ std::vector PDFJBIG2ReferencedSegments::getSymbolBitmaps( return result; } +std::vector PDFJBIG2ReferencedSegments::getPatternBitmaps() const +{ + std::vector result; + + for (const PDFJBIG2PatternDictionary* dictionary : patternDictionaries) + { + const std::vector& bitmaps = dictionary->getBitmaps(); + result.reserve(result.size() + bitmaps.size()); + for (const PDFJBIG2Bitmap& bitmap : bitmaps) + { + result.push_back(&bitmap); + } + } + + return result; +} + PDFJBIG2HuffmanDecoder PDFJBIG2ReferencedSegments::getUserTable(PDFBitReader* reader) { if (currentUserCodeTableIndex < codeTables.size()) diff --git a/PdfForQtLib/sources/pdfjbig2decoder.h b/PdfForQtLib/sources/pdfjbig2decoder.h index c74a88c..07d2ddb 100644 --- a/PdfForQtLib/sources/pdfjbig2decoder.h +++ b/PdfForQtLib/sources/pdfjbig2decoder.h @@ -29,6 +29,7 @@ class PDFJBIG2Bitmap; class PDFRenderErrorReporter; class PDFJBIG2HuffmanCodeTable; class PDFJBIG2SymbolDictionary; +class PDFJBIG2PatternDictionary; struct PDFJBIG2HuffmanTableEntry; struct PDFJBIG2BitmapDecodingParameters; @@ -271,26 +272,9 @@ public: virtual const PDFJBIG2SymbolDictionary* asSymbolDictionary() const { return nullptr; } virtual PDFJBIG2SymbolDictionary* asSymbolDictionary() { return nullptr; } -}; -class PDFJBIG2HuffmanCodeTable : public PDFJBIG2Segment -{ -public: - explicit PDFJBIG2HuffmanCodeTable(std::vector&& entries); - virtual ~PDFJBIG2HuffmanCodeTable(); - - virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const override { return this; } - virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() override { return this; } - - const std::vector& getEntries() const { return m_entries; } - - /// Builds prefixes using algorithm in annex B.3 of specification. Unused rows are removed. - /// Rows are sorted according the criteria. Prefixes are then filled. - /// \param entries Entries for building the table - static std::vector buildPrefixes(const std::vector& entries); - -private: - std::vector m_entries; + virtual const PDFJBIG2PatternDictionary* asPatternDictionary() const { return nullptr; } + virtual PDFJBIG2PatternDictionary* asPatternDictionary() { return nullptr; } }; /// Huffman decoder - can decode integers / out of band values from huffman table. @@ -396,43 +380,20 @@ private: std::vector m_data; }; -class PDFJBIG2SymbolDictionary : public PDFJBIG2Segment -{ -public: - explicit inline PDFJBIG2SymbolDictionary() = default; - explicit inline PDFJBIG2SymbolDictionary(std::vector&& bitmaps, - PDFJBIG2ArithmeticDecoderState&& genericState, - PDFJBIG2ArithmeticDecoderState&& genericRefinementState) : - m_bitmaps(qMove(bitmaps)), - m_genericState(qMove(genericState)), - m_genericRefinementState(qMove(genericRefinementState)) - { - - } - - virtual const PDFJBIG2SymbolDictionary* asSymbolDictionary() const override { return this; } - virtual PDFJBIG2SymbolDictionary* asSymbolDictionary() override { return this; } - - const std::vector& getBitmaps() const { return m_bitmaps; } - const PDFJBIG2ArithmeticDecoderState& getGenericState() const { return m_genericState; } - const PDFJBIG2ArithmeticDecoderState& getGenericRefinementState() const { return m_genericRefinementState; } - -private: - std::vector m_bitmaps; - PDFJBIG2ArithmeticDecoderState m_genericState; - PDFJBIG2ArithmeticDecoderState m_genericRefinementState; -}; - struct PDFJBIG2ReferencedSegments { std::vector bitmaps; std::vector codeTables; std::vector symbolDictionaries; + std::vector patternDictionaries; size_t currentUserCodeTableIndex = 0; /// Returns symbol bitmaps from all symbol dictionaries std::vector getSymbolBitmaps() const; + /// Returns pattern bitmaps from all pattern dictionaries + std::vector getPatternBitmaps() const; + /// Returns current user huffman table according the index. If index /// is out of range, then exception is thrown. PDFJBIG2HuffmanDecoder getUserTable(PDFBitReader* reader); @@ -457,72 +418,6 @@ struct PDFJBIG2ATPosition using PDFJBIG2ATPositions = std::array; -/// Info structure for symbol dictionary decoding procedure -struct PDFJBIG2SymbolDictionaryDecodingParameters -{ - /// If true, huffman encoding is used to decode dictionary, - /// otherwise arithmetic decoding is used to decode dictionary. - bool SDHUFF = false; - - /// If true, each symbol is refinement/aggregate. If false, - /// then symbols are ordinary bitmaps. - bool SDREFAGG = false; - - /// Table selector for huffman table encoding (height) - uint8_t SDHUFFDH = 0; - - /// Table selector for huffman table encoding (width) - uint8_t SDHUFFDW = 0; - - /// Table selector for huffman table encoding - uint8_t SDHUFFBMSIZE = 0; - - /// Table selector for huffman table encoding - uint8_t SDHUFFAGGINST = 0; - - /// Is statistics for arithmetic coding used from previous symbol dictionary? - bool isArithmeticCodingStateUsed = false; - - /// Is statistics for arithmetic coding symbols retained for future use? - bool isArithmeticCodingStateRetained = false; - - /// Template for decoding - uint8_t SDTEMPLATE = 0; - - /// Template for decoding refinements - uint8_t SDRTEMPLATE = 0; - - /// Adaptative pixel positions - PDFJBIG2ATPositions SDAT = { }; - - /// Adaptative pixel positions - PDFJBIG2ATPositions SDRAT = { }; - - /// Number of exported symbols - uint32_t SDNUMEXSYMS = 0; - - /// Number of new symbols - uint32_t SDNUMNEWSYMS = 0; - - PDFJBIG2HuffmanDecoder SDHUFFDH_Decoder; - PDFJBIG2HuffmanDecoder SDHUFFDW_Decoder; - PDFJBIG2HuffmanDecoder SDHUFFBMSIZE_Decoder; - PDFJBIG2HuffmanDecoder SDHUFFAGGINST_Decoder; - PDFJBIG2HuffmanDecoder EXRUNLENGTH_Decoder; - - /// Input bitmaps - std::vector SDINSYMS; - - /// Number of input bitmaps - uint32_t SDNUMINSYMS = 0; - - /// Output bitmaps - std::vector SDNEWSYMS; - - /// Widths - std::vector SDNEWSYMWIDTHS; -}; - /// Decoder of JBIG2 data streams. Decodes the black/white monochrome image. /// Handles also global segments. Decoder decodes data using the specification /// ISO/IEC 14492:2001, T.88.