From e20dfe6a5cbe4ee9c438c436554b08379e1e9c76 Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Sat, 12 Oct 2019 18:10:25 +0200 Subject: [PATCH] CCITT fax decoder - second part --- PdfForQtLib/sources/pdfccittfaxdecoder.cpp | 277 ++++++++++++++++++++- PdfForQtLib/sources/pdfccittfaxdecoder.h | 69 ++++- PdfForQtLib/sources/pdfimage.cpp | 5 +- PdfForQtLib/sources/pdfutils.cpp | 6 + PdfForQtLib/sources/pdfutils.h | 5 + 5 files changed, 358 insertions(+), 4 deletions(-) diff --git a/PdfForQtLib/sources/pdfccittfaxdecoder.cpp b/PdfForQtLib/sources/pdfccittfaxdecoder.cpp index 0ba6234..ddb67df 100644 --- a/PdfForQtLib/sources/pdfccittfaxdecoder.cpp +++ b/PdfForQtLib/sources/pdfccittfaxdecoder.cpp @@ -17,6 +17,7 @@ #include "pdfccittfaxdecoder.h" +#include "pdfexception.h" namespace pdf { @@ -27,6 +28,42 @@ constexpr uint8_t operator "" _bitlength() return sizeof...(Digits); } +enum CCITT_2D_Code_Mode +{ + Pass, + Horizontal, + Vertical_3L, + Vertical_2L, + Vertical_1L, + Vertical_0, + Vertical_1R, + Vertical_2R, + Vertical_3R, + Invalid +}; + +struct PDFCCITT2DModeInfo +{ + CCITT_2D_Code_Mode mode; + uint16_t code; + uint8_t bits; +}; + +static constexpr uint8_t MAX_2D_MODE_BIT_LENGTH = 7; + +static constexpr PDFCCITT2DModeInfo CCITT_2D_CODE_MODES[] = +{ + { Pass, 0b0001, 0001_bitlength }, + { Horizontal, 0b001, 001_bitlength }, + { Vertical_3L, 0b0000010, 0000010_bitlength }, + { Vertical_2L, 0b000010, 000010_bitlength }, + { Vertical_1L, 0b010, 010_bitlength }, + { Vertical_0, 0b1, 1_bitlength }, + { Vertical_1R, 0b011, 011_bitlength }, + { Vertical_2R, 0b000011, 000011_bitlength }, + { Vertical_3R, 0b0000011, 0000011_bitlength } +}; + struct PDFCCITTCode { uint16_t length; @@ -260,12 +297,224 @@ static constexpr PDFCCITTCode CCITT_BLACK_CODES[] = { { 2560, 0b000000011111, 000000011111_bitlength } }; -PDFCCITTFaxDecoder::PDFCCITTFaxDecoder(const QByteArray* stream) : - m_reader(stream, 1) +PDFCCITTFaxDecoder::PDFCCITTFaxDecoder(const QByteArray* stream, const PDFCCITTFaxDecoderParameters& parameters) : + m_reader(stream, 1), + m_parameters(parameters) { } +PDFImageData PDFCCITTFaxDecoder::decode() +{ + PDFBitWriter writer(1); + std::vector codingLine; + std::vector referenceLine; + + int row = 0; + const size_t lineSize = m_parameters.columns + 2; + codingLine.resize(lineSize, m_parameters.columns); + referenceLine.resize(lineSize, m_parameters.columns); + bool isUsing2DEncoding = m_parameters.K < 0; + bool isEndOfLineOccured = m_parameters.hasEndOfLine; + + auto updateIsUsing2DEncoding = [this, &isUsing2DEncoding]() + { + if (m_parameters.K > 0) + { + // Mixed encoding + isUsing2DEncoding = !m_reader.read(1); + } + }; + + isEndOfLineOccured = skipFillAndEOL() || isEndOfLineOccured; + updateIsUsing2DEncoding(); + + while (!m_reader.isAtEnd()) + { + int a0_index = 0; + bool isCurrentPixelBlack = false; + + + if (isUsing2DEncoding) + { + int b1_index = 0; + + // 2D encoding + while (codingLine[a0_index] < m_parameters.columns) + { + CCITT_2D_Code_Mode mode = get2DMode(); + switch (mode) + { + case Pass: + { + // In this mode, we set a0 to the b2 (from reference line). In pass mode, + // we do not change pixel color. Why we are adding 2 to the b1_index? + // We want to skip both b1, b2, because they will be left of new a0. + const size_t b2_index = b1_index + 1; + if (b2_index < referenceLine.size()) + { + addPixels(codingLine, a0_index, referenceLine[b2_index], isCurrentPixelBlack, false); + b1_index += 2; + } + else + { + throw PDFException(PDFTranslationContext::tr("CCITT b2 index out of range.")); + } + + break; + } + + case Horizontal: + { + // We scan two sequence length. + int a0a1 = getRunLength(!isCurrentPixelBlack); + int a1a2 = getRunLength(isCurrentPixelBlack); + + addPixels(codingLine, a0_index, codingLine[a0_index] + a0a1, isCurrentPixelBlack, false); + addPixels(codingLine, a0_index, codingLine[a0_index] + a1a2, !isCurrentPixelBlack, false); + + while (referenceLine[b1_index] <= codingLine[a0_index] && b1_index < m_parameters.columns) + { + // We do not want to change the color (b1 should have opposite color of a0, + // should be first changing element of reference line right of a0). + b1_index += 2; + } + + break; + } + + case Vertical_3L: + case Vertical_2L: + case Vertical_1L: + case Vertical_0: + case Vertical_1R: + case Vertical_2R: + case Vertical_3R: + { + const int32_t a1 = static_cast(referenceLine[b1_index]) + mode - static_cast(Vertical_0); + + if (a1 < 0 || a1 > m_parameters.columns) + { + throw PDFException(PDFTranslationContext::tr("Invalid vertical encoding data in CCITT stream.")); + } + + addPixels(codingLine, a0_index, static_cast(a1), isCurrentPixelBlack, mode < Vertical_0); + isCurrentPixelBlack = !isCurrentPixelBlack; + + if (codingLine[a0_index] < m_parameters.columns) + { + ++b1_index; + + while (referenceLine[b1_index] <= codingLine[a0_index] && b1_index < m_parameters.columns) + { + // We do not want to change the color (b1 should have opposite color of a0, + // should be first changing element of reference line right of a0). + b1_index += 2; + } + } + + break; + } + + default: + Q_ASSERT(false); + break; + } + } + } + else + { + // Simple 1D encoding + while (codingLine[a0_index] < m_parameters.columns) + { + const uint32_t sequenceLength = getRunLength(!isCurrentPixelBlack); + addPixels(codingLine, a0_index, codingLine[a0_index] + sequenceLength, isCurrentPixelBlack, false); + isCurrentPixelBlack = !isCurrentPixelBlack; + } + } + + // Write the line to the output buffer + isCurrentPixelBlack = false; + int index = 0; + for (int i = 0; i < m_parameters.columns; ++i) + { + if (i == codingLine[index]) + { + isCurrentPixelBlack = !isCurrentPixelBlack; + ++index; + } + + writer.write((isCurrentPixelBlack != m_parameters.hasBlackIsOne) ? 0 : 1); + } + writer.finishLine(); + + ++row; + + if (!m_parameters.hasEndOfBlock && row == m_parameters.rows) + { + // We have reached number of rows, stop reading the data + break; + } + pokracovat zde + + std::swap(codingLine, referenceLine); + + } +} + +void PDFCCITTFaxDecoder::skipFill() +{ + // This functions skips zero bits (because codewords have at most 12 bits, + // we use 12 bit lookahead to ensure, that we do not broke data sequence). + + while (m_reader.look(12) == 0) + { + m_reader.read(1); + } +} + +bool PDFCCITTFaxDecoder::skipEOL() +{ + if (m_reader.look(12) == 1) + { + m_reader.read(12); + return true; + } + + return false; +} + +void PDFCCITTFaxDecoder::addPixels(std::vector& line, int& a0_index, int a1, bool isCurrentPixelBlack, bool isA1LeftOfA0Allowed) +{ + if (a1 > line[a0_index]) + { + if (a1 > m_parameters.columns) + { + throw PDFException(PDFTranslationContext::tr("Invalid index of CCITT changing element a1: a1 = %1, columns = %2.").arg(a1).arg(m_parameters.columns)); + } + + // If we are changing the color, increment a0_index. a0_index == 0 is white, a0_index == 1 is black, etc., + // sequence of white and black runs alternates. + if ((a0_index & 1) != isCurrentPixelBlack) + { + ++a0_index; + } + + line[a0_index] = a1; + } + else if (isA1LeftOfA0Allowed && a1 < line[a0_index]) + { + // We want to find first index, for which it holds: + // a1 > line[a0_index - 1], so if we set line[a0_index] = a1, + // then we get a valid increasing sequence. + while (a0_index > 0 && a1 <= line[a0_index - 1]) + { + --a0_index; + } + line[a0_index] = a1; + } +} + uint32_t PDFCCITTFaxDecoder::getRunLength(bool white) { uint32_t value = 0; @@ -322,7 +571,31 @@ uint32_t PDFCCITTFaxDecoder::getCode(const PDFCCITTCode* codes, size_t codeCount } } + throw PDFException(PDFTranslationContext::tr("Invalid CCITT run length code word.")); return 0; } +CCITT_2D_Code_Mode PDFCCITTFaxDecoder::get2DMode() +{ + uint32_t code = 0; + uint8_t bits = 0; + + while (bits <= MAX_2D_MODE_BIT_LENGTH) + { + code = (code << 1) + m_reader.read(1); + ++bits; + + for (const PDFCCITT2DModeInfo& info : CCITT_2D_CODE_MODES) + { + if (info.bits == bits && info.code == code) + { + return info.mode; + } + } + } + + throw PDFException(PDFTranslationContext::tr("Invalid CCITT 2D mode.")); + return Invalid; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfccittfaxdecoder.h b/PdfForQtLib/sources/pdfccittfaxdecoder.h index d79047d..0764000 100644 --- a/PdfForQtLib/sources/pdfccittfaxdecoder.h +++ b/PdfForQtLib/sources/pdfccittfaxdecoder.h @@ -19,18 +19,84 @@ #define PDFCCITTFAXDECODER_H #include "pdfutils.h" +#include "pdfimage.h" namespace pdf { struct PDFCCITTCode; +struct PDFCCITTFaxDecoderParameters +{ + /// Type of encoding. Has this meaning: + /// K < 0 - pure two dimensional encoding (Group 4) + /// K = 0 - pure one dimensional encoding + /// K > 0 - mixed encoding; one dimensional encoded line can be followed by at most K - 1 two dimensional encoded lines + PDFInteger K = 0; + + /// Pixel width of the image. Default value is 1728. + PDFInteger columns = 1728; + + /// Pixel height of the image. This value can be zero or be absent, in this case, + /// end of block pattern must be present and end the stream. + PDFInteger rows = 0; + + /// This parameter is ignored in this library. If positive, and \p hasEndOfLine is true, + /// and K is nonnegative, then if error occurs, end-of-line pattern is searched and + /// data are copied from previous line, or are set to white, if previous line is also damaged. + PDFInteger damagedRowsBeforeError = 0; + + /// Flag indicating, that end of line patterns are required in the encoded data. + /// Stream filter must always accept end of line patterns, but require them only, + /// if this flag is set to true. + bool hasEndOfLine = false; + + /// Flag indicating that lines are byte aligned, i.e. 0 bits are inserted before each line + /// to achieve byte alignment. + bool hasEncodedByteAlign = false; + + /// Flag indicating, that filter excepts the data be terminated by end of block bit pattern. + /// In this case, \p rows parameter is ignored. Otherwise, rows parameter is used, or image + /// is terminated by end of data stream, whichever occurs first. The end of block is marked + /// as end-of-facsimile block (EOFB), or return to control (RTC), according the K parameter. + bool hasEndOfBlock = true; + + /// If this flag is true, then 1 means black pixel, 0 white pixel. Otherwise, if false, + /// then 0 means black pixel and 1 white pixel. + bool hasBlackIsOne = false; +}; + +enum CCITT_2D_Code_Mode; + class PDFCCITTFaxDecoder { public: - explicit PDFCCITTFaxDecoder(const QByteArray* stream); + explicit PDFCCITTFaxDecoder(const QByteArray* stream, const PDFCCITTFaxDecoderParameters& parameters); + + PDFImageData decode(); private: + /// Skip zero bits at the start + void skipFill(); + + /// Skip end-of-line, if occured. Returns true, if EOL was skipped. + bool skipEOL(); + + /// Skip fill bits and then try to skip EOL. If EOL is found, then + /// true is returned, otherwise false is returned. + bool skipFillAndEOL() { skipFill(); return skipEOL(); } + + /// Add pixels to the line. + /// \param line Line with changing element indices + /// \param a0_index Reference changing element index (index to the \p line array) + /// \param a1 Current changing element index (column index, not index to the \p line array) + /// \param isCurrentPixelBlack Are pixels black? + /// \param isA1LeftOfA0Allowed Allow a1 to be left of a0 (not a0_index, but line[a0_index], which is a0) + void addPixels(std::vector& line, int& a0_index, int a1, bool isCurrentPixelBlack, bool isA1LeftOfA0Allowed); + + /// Get 2D mode from the stream + CCITT_2D_Code_Mode get2DMode(); + uint32_t getRunLength(bool white); uint32_t getWhiteCode(); @@ -39,6 +105,7 @@ private: uint32_t getCode(const PDFCCITTCode* codes, size_t codeCount); PDFBitReader m_reader; + PDFCCITTFaxDecoderParameters m_parameters; }; } // namespace pdf diff --git a/PdfForQtLib/sources/pdfimage.cpp b/PdfForQtLib/sources/pdfimage.cpp index 69ada7c..cab3619 100644 --- a/PdfForQtLib/sources/pdfimage.cpp +++ b/PdfForQtLib/sources/pdfimage.cpp @@ -97,6 +97,7 @@ PDFImage PDFImage::createImage(const PDFDocument* document, const PDFStream* str else if (object.isStream()) { // TODO: Implement Mask Image + PDFImage maskImage = createImage(document, object.getStream(), colorSpace, false, errorReporter); maskingType = PDFImageData::MaskingType::Image; throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Mask image is not implemented.")); } @@ -492,7 +493,7 @@ PDFImage PDFImage::createImage(const PDFDocument* document, const PDFStream* str } else if (imageFilterName == "CCITTFaxDecode" || imageFilterName == "CCF") { - throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'CCITFaxDecode'.")); + throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'CCITTFaxDecode'.")); } else if (imageFilterName == "JBIG2Decode") { @@ -658,4 +659,6 @@ OPJ_OFF_T PDFJPEG2000ImageData::skip(OPJ_OFF_T p_nb_bytes, void* p_user_data) return length; } +// Implement image rendering intent + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfutils.cpp b/PdfForQtLib/sources/pdfutils.cpp index cb9e3cb..ad4aeee 100644 --- a/PdfForQtLib/sources/pdfutils.cpp +++ b/PdfForQtLib/sources/pdfutils.cpp @@ -56,6 +56,12 @@ PDFBitReader::Value PDFBitReader::read(PDFBitReader::Value bits) return value; } +PDFBitReader::Value PDFBitReader::look(PDFBitReader::Value bits) const +{ + PDFBitReader temp(*this); + return temp.read(bits); +} + void PDFBitReader::seek(qint64 position) { if (position < m_stream->size()) diff --git a/PdfForQtLib/sources/pdfutils.h b/PdfForQtLib/sources/pdfutils.h index 5620860..a8da857 100644 --- a/PdfForQtLib/sources/pdfutils.h +++ b/PdfForQtLib/sources/pdfutils.h @@ -107,6 +107,11 @@ public: /// then exception is thrown. Value read(Value bits); + /// Reads single n-bit value from the stream. If stream hasn't enough data, + /// then exception is thrown. State of the stream is not changed, i.e., read + /// bits are reverted back. + Value look(Value bits) const; + /// Seeks the desired position in the data stream. If position can't be seeked, /// then exception is thrown. void seek(qint64 position);