mirror of https://github.com/JakubMelka/PDF4QT.git
CCITT fax decoder - second part
This commit is contained in:
parent
ec5785d52a
commit
e20dfe6a5c
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
|
|
||||||
#include "pdfccittfaxdecoder.h"
|
#include "pdfccittfaxdecoder.h"
|
||||||
|
#include "pdfexception.h"
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
@ -27,6 +28,42 @@ constexpr uint8_t operator "" _bitlength()
|
||||||
return sizeof...(Digits);
|
return sizeof...(Digits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum CCITT_2D_Code_Mode
|
||||||
|
{
|
||||||
|
Pass,
|
||||||
|
Horizontal,
|
||||||
|
Vertical_3L,
|
||||||
|
Vertical_2L,
|
||||||
|
Vertical_1L,
|
||||||
|
Vertical_0,
|
||||||
|
Vertical_1R,
|
||||||
|
Vertical_2R,
|
||||||
|
Vertical_3R,
|
||||||
|
Invalid
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PDFCCITT2DModeInfo
|
||||||
|
{
|
||||||
|
CCITT_2D_Code_Mode mode;
|
||||||
|
uint16_t code;
|
||||||
|
uint8_t bits;
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr uint8_t MAX_2D_MODE_BIT_LENGTH = 7;
|
||||||
|
|
||||||
|
static constexpr PDFCCITT2DModeInfo CCITT_2D_CODE_MODES[] =
|
||||||
|
{
|
||||||
|
{ Pass, 0b0001, 0001_bitlength },
|
||||||
|
{ Horizontal, 0b001, 001_bitlength },
|
||||||
|
{ Vertical_3L, 0b0000010, 0000010_bitlength },
|
||||||
|
{ Vertical_2L, 0b000010, 000010_bitlength },
|
||||||
|
{ Vertical_1L, 0b010, 010_bitlength },
|
||||||
|
{ Vertical_0, 0b1, 1_bitlength },
|
||||||
|
{ Vertical_1R, 0b011, 011_bitlength },
|
||||||
|
{ Vertical_2R, 0b000011, 000011_bitlength },
|
||||||
|
{ Vertical_3R, 0b0000011, 0000011_bitlength }
|
||||||
|
};
|
||||||
|
|
||||||
struct PDFCCITTCode
|
struct PDFCCITTCode
|
||||||
{
|
{
|
||||||
uint16_t length;
|
uint16_t length;
|
||||||
|
@ -260,12 +297,224 @@ static constexpr PDFCCITTCode CCITT_BLACK_CODES[] = {
|
||||||
{ 2560, 0b000000011111, 000000011111_bitlength }
|
{ 2560, 0b000000011111, 000000011111_bitlength }
|
||||||
};
|
};
|
||||||
|
|
||||||
PDFCCITTFaxDecoder::PDFCCITTFaxDecoder(const QByteArray* stream) :
|
PDFCCITTFaxDecoder::PDFCCITTFaxDecoder(const QByteArray* stream, const PDFCCITTFaxDecoderParameters& parameters) :
|
||||||
m_reader(stream, 1)
|
m_reader(stream, 1),
|
||||||
|
m_parameters(parameters)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFImageData PDFCCITTFaxDecoder::decode()
|
||||||
|
{
|
||||||
|
PDFBitWriter writer(1);
|
||||||
|
std::vector<int> codingLine;
|
||||||
|
std::vector<int> referenceLine;
|
||||||
|
|
||||||
|
int row = 0;
|
||||||
|
const size_t lineSize = m_parameters.columns + 2;
|
||||||
|
codingLine.resize(lineSize, m_parameters.columns);
|
||||||
|
referenceLine.resize(lineSize, m_parameters.columns);
|
||||||
|
bool isUsing2DEncoding = m_parameters.K < 0;
|
||||||
|
bool isEndOfLineOccured = m_parameters.hasEndOfLine;
|
||||||
|
|
||||||
|
auto updateIsUsing2DEncoding = [this, &isUsing2DEncoding]()
|
||||||
|
{
|
||||||
|
if (m_parameters.K > 0)
|
||||||
|
{
|
||||||
|
// Mixed encoding
|
||||||
|
isUsing2DEncoding = !m_reader.read(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
isEndOfLineOccured = skipFillAndEOL() || isEndOfLineOccured;
|
||||||
|
updateIsUsing2DEncoding();
|
||||||
|
|
||||||
|
while (!m_reader.isAtEnd())
|
||||||
|
{
|
||||||
|
int a0_index = 0;
|
||||||
|
bool isCurrentPixelBlack = false;
|
||||||
|
|
||||||
|
|
||||||
|
if (isUsing2DEncoding)
|
||||||
|
{
|
||||||
|
int b1_index = 0;
|
||||||
|
|
||||||
|
// 2D encoding
|
||||||
|
while (codingLine[a0_index] < m_parameters.columns)
|
||||||
|
{
|
||||||
|
CCITT_2D_Code_Mode mode = get2DMode();
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case Pass:
|
||||||
|
{
|
||||||
|
// In this mode, we set a0 to the b2 (from reference line). In pass mode,
|
||||||
|
// we do not change pixel color. Why we are adding 2 to the b1_index?
|
||||||
|
// We want to skip both b1, b2, because they will be left of new a0.
|
||||||
|
const size_t b2_index = b1_index + 1;
|
||||||
|
if (b2_index < referenceLine.size())
|
||||||
|
{
|
||||||
|
addPixels(codingLine, a0_index, referenceLine[b2_index], isCurrentPixelBlack, false);
|
||||||
|
b1_index += 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw PDFException(PDFTranslationContext::tr("CCITT b2 index out of range."));
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Horizontal:
|
||||||
|
{
|
||||||
|
// We scan two sequence length.
|
||||||
|
int a0a1 = getRunLength(!isCurrentPixelBlack);
|
||||||
|
int a1a2 = getRunLength(isCurrentPixelBlack);
|
||||||
|
|
||||||
|
addPixels(codingLine, a0_index, codingLine[a0_index] + a0a1, isCurrentPixelBlack, false);
|
||||||
|
addPixels(codingLine, a0_index, codingLine[a0_index] + a1a2, !isCurrentPixelBlack, false);
|
||||||
|
|
||||||
|
while (referenceLine[b1_index] <= codingLine[a0_index] && b1_index < m_parameters.columns)
|
||||||
|
{
|
||||||
|
// We do not want to change the color (b1 should have opposite color of a0,
|
||||||
|
// should be first changing element of reference line right of a0).
|
||||||
|
b1_index += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Vertical_3L:
|
||||||
|
case Vertical_2L:
|
||||||
|
case Vertical_1L:
|
||||||
|
case Vertical_0:
|
||||||
|
case Vertical_1R:
|
||||||
|
case Vertical_2R:
|
||||||
|
case Vertical_3R:
|
||||||
|
{
|
||||||
|
const int32_t a1 = static_cast<int32_t>(referenceLine[b1_index]) + mode - static_cast<int32_t>(Vertical_0);
|
||||||
|
|
||||||
|
if (a1 < 0 || a1 > m_parameters.columns)
|
||||||
|
{
|
||||||
|
throw PDFException(PDFTranslationContext::tr("Invalid vertical encoding data in CCITT stream."));
|
||||||
|
}
|
||||||
|
|
||||||
|
addPixels(codingLine, a0_index, static_cast<uint32_t>(a1), isCurrentPixelBlack, mode < Vertical_0);
|
||||||
|
isCurrentPixelBlack = !isCurrentPixelBlack;
|
||||||
|
|
||||||
|
if (codingLine[a0_index] < m_parameters.columns)
|
||||||
|
{
|
||||||
|
++b1_index;
|
||||||
|
|
||||||
|
while (referenceLine[b1_index] <= codingLine[a0_index] && b1_index < m_parameters.columns)
|
||||||
|
{
|
||||||
|
// We do not want to change the color (b1 should have opposite color of a0,
|
||||||
|
// should be first changing element of reference line right of a0).
|
||||||
|
b1_index += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
Q_ASSERT(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Simple 1D encoding
|
||||||
|
while (codingLine[a0_index] < m_parameters.columns)
|
||||||
|
{
|
||||||
|
const uint32_t sequenceLength = getRunLength(!isCurrentPixelBlack);
|
||||||
|
addPixels(codingLine, a0_index, codingLine[a0_index] + sequenceLength, isCurrentPixelBlack, false);
|
||||||
|
isCurrentPixelBlack = !isCurrentPixelBlack;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the line to the output buffer
|
||||||
|
isCurrentPixelBlack = false;
|
||||||
|
int index = 0;
|
||||||
|
for (int i = 0; i < m_parameters.columns; ++i)
|
||||||
|
{
|
||||||
|
if (i == codingLine[index])
|
||||||
|
{
|
||||||
|
isCurrentPixelBlack = !isCurrentPixelBlack;
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.write((isCurrentPixelBlack != m_parameters.hasBlackIsOne) ? 0 : 1);
|
||||||
|
}
|
||||||
|
writer.finishLine();
|
||||||
|
|
||||||
|
++row;
|
||||||
|
|
||||||
|
if (!m_parameters.hasEndOfBlock && row == m_parameters.rows)
|
||||||
|
{
|
||||||
|
// We have reached number of rows, stop reading the data
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pokracovat zde
|
||||||
|
|
||||||
|
std::swap(codingLine, referenceLine);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PDFCCITTFaxDecoder::skipFill()
|
||||||
|
{
|
||||||
|
// This functions skips zero bits (because codewords have at most 12 bits,
|
||||||
|
// we use 12 bit lookahead to ensure, that we do not broke data sequence).
|
||||||
|
|
||||||
|
while (m_reader.look(12) == 0)
|
||||||
|
{
|
||||||
|
m_reader.read(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PDFCCITTFaxDecoder::skipEOL()
|
||||||
|
{
|
||||||
|
if (m_reader.look(12) == 1)
|
||||||
|
{
|
||||||
|
m_reader.read(12);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PDFCCITTFaxDecoder::addPixels(std::vector<int>& line, int& a0_index, int a1, bool isCurrentPixelBlack, bool isA1LeftOfA0Allowed)
|
||||||
|
{
|
||||||
|
if (a1 > line[a0_index])
|
||||||
|
{
|
||||||
|
if (a1 > m_parameters.columns)
|
||||||
|
{
|
||||||
|
throw PDFException(PDFTranslationContext::tr("Invalid index of CCITT changing element a1: a1 = %1, columns = %2.").arg(a1).arg(m_parameters.columns));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are changing the color, increment a0_index. a0_index == 0 is white, a0_index == 1 is black, etc.,
|
||||||
|
// sequence of white and black runs alternates.
|
||||||
|
if ((a0_index & 1) != isCurrentPixelBlack)
|
||||||
|
{
|
||||||
|
++a0_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
line[a0_index] = a1;
|
||||||
|
}
|
||||||
|
else if (isA1LeftOfA0Allowed && a1 < line[a0_index])
|
||||||
|
{
|
||||||
|
// We want to find first index, for which it holds:
|
||||||
|
// a1 > line[a0_index - 1], so if we set line[a0_index] = a1,
|
||||||
|
// then we get a valid increasing sequence.
|
||||||
|
while (a0_index > 0 && a1 <= line[a0_index - 1])
|
||||||
|
{
|
||||||
|
--a0_index;
|
||||||
|
}
|
||||||
|
line[a0_index] = a1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t PDFCCITTFaxDecoder::getRunLength(bool white)
|
uint32_t PDFCCITTFaxDecoder::getRunLength(bool white)
|
||||||
{
|
{
|
||||||
uint32_t value = 0;
|
uint32_t value = 0;
|
||||||
|
@ -322,7 +571,31 @@ uint32_t PDFCCITTFaxDecoder::getCode(const PDFCCITTCode* codes, size_t codeCount
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
throw PDFException(PDFTranslationContext::tr("Invalid CCITT run length code word."));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CCITT_2D_Code_Mode PDFCCITTFaxDecoder::get2DMode()
|
||||||
|
{
|
||||||
|
uint32_t code = 0;
|
||||||
|
uint8_t bits = 0;
|
||||||
|
|
||||||
|
while (bits <= MAX_2D_MODE_BIT_LENGTH)
|
||||||
|
{
|
||||||
|
code = (code << 1) + m_reader.read(1);
|
||||||
|
++bits;
|
||||||
|
|
||||||
|
for (const PDFCCITT2DModeInfo& info : CCITT_2D_CODE_MODES)
|
||||||
|
{
|
||||||
|
if (info.bits == bits && info.code == code)
|
||||||
|
{
|
||||||
|
return info.mode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw PDFException(PDFTranslationContext::tr("Invalid CCITT 2D mode."));
|
||||||
|
return Invalid;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -19,18 +19,84 @@
|
||||||
#define PDFCCITTFAXDECODER_H
|
#define PDFCCITTFAXDECODER_H
|
||||||
|
|
||||||
#include "pdfutils.h"
|
#include "pdfutils.h"
|
||||||
|
#include "pdfimage.h"
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
|
||||||
struct PDFCCITTCode;
|
struct PDFCCITTCode;
|
||||||
|
|
||||||
|
struct PDFCCITTFaxDecoderParameters
|
||||||
|
{
|
||||||
|
/// Type of encoding. Has this meaning:
|
||||||
|
/// K < 0 - pure two dimensional encoding (Group 4)
|
||||||
|
/// K = 0 - pure one dimensional encoding
|
||||||
|
/// K > 0 - mixed encoding; one dimensional encoded line can be followed by at most K - 1 two dimensional encoded lines
|
||||||
|
PDFInteger K = 0;
|
||||||
|
|
||||||
|
/// Pixel width of the image. Default value is 1728.
|
||||||
|
PDFInteger columns = 1728;
|
||||||
|
|
||||||
|
/// Pixel height of the image. This value can be zero or be absent, in this case,
|
||||||
|
/// end of block pattern must be present and end the stream.
|
||||||
|
PDFInteger rows = 0;
|
||||||
|
|
||||||
|
/// This parameter is ignored in this library. If positive, and \p hasEndOfLine is true,
|
||||||
|
/// and K is nonnegative, then if error occurs, end-of-line pattern is searched and
|
||||||
|
/// data are copied from previous line, or are set to white, if previous line is also damaged.
|
||||||
|
PDFInteger damagedRowsBeforeError = 0;
|
||||||
|
|
||||||
|
/// Flag indicating, that end of line patterns are required in the encoded data.
|
||||||
|
/// Stream filter must always accept end of line patterns, but require them only,
|
||||||
|
/// if this flag is set to true.
|
||||||
|
bool hasEndOfLine = false;
|
||||||
|
|
||||||
|
/// Flag indicating that lines are byte aligned, i.e. 0 bits are inserted before each line
|
||||||
|
/// to achieve byte alignment.
|
||||||
|
bool hasEncodedByteAlign = false;
|
||||||
|
|
||||||
|
/// Flag indicating, that filter excepts the data be terminated by end of block bit pattern.
|
||||||
|
/// In this case, \p rows parameter is ignored. Otherwise, rows parameter is used, or image
|
||||||
|
/// is terminated by end of data stream, whichever occurs first. The end of block is marked
|
||||||
|
/// as end-of-facsimile block (EOFB), or return to control (RTC), according the K parameter.
|
||||||
|
bool hasEndOfBlock = true;
|
||||||
|
|
||||||
|
/// If this flag is true, then 1 means black pixel, 0 white pixel. Otherwise, if false,
|
||||||
|
/// then 0 means black pixel and 1 white pixel.
|
||||||
|
bool hasBlackIsOne = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum CCITT_2D_Code_Mode;
|
||||||
|
|
||||||
class PDFCCITTFaxDecoder
|
class PDFCCITTFaxDecoder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit PDFCCITTFaxDecoder(const QByteArray* stream);
|
explicit PDFCCITTFaxDecoder(const QByteArray* stream, const PDFCCITTFaxDecoderParameters& parameters);
|
||||||
|
|
||||||
|
PDFImageData decode();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Skip zero bits at the start
|
||||||
|
void skipFill();
|
||||||
|
|
||||||
|
/// Skip end-of-line, if occured. Returns true, if EOL was skipped.
|
||||||
|
bool skipEOL();
|
||||||
|
|
||||||
|
/// Skip fill bits and then try to skip EOL. If EOL is found, then
|
||||||
|
/// true is returned, otherwise false is returned.
|
||||||
|
bool skipFillAndEOL() { skipFill(); return skipEOL(); }
|
||||||
|
|
||||||
|
/// Add pixels to the line.
|
||||||
|
/// \param line Line with changing element indices
|
||||||
|
/// \param a0_index Reference changing element index (index to the \p line array)
|
||||||
|
/// \param a1 Current changing element index (column index, not index to the \p line array)
|
||||||
|
/// \param isCurrentPixelBlack Are pixels black?
|
||||||
|
/// \param isA1LeftOfA0Allowed Allow a1 to be left of a0 (not a0_index, but line[a0_index], which is a0)
|
||||||
|
void addPixels(std::vector<int>& line, int& a0_index, int a1, bool isCurrentPixelBlack, bool isA1LeftOfA0Allowed);
|
||||||
|
|
||||||
|
/// Get 2D mode from the stream
|
||||||
|
CCITT_2D_Code_Mode get2DMode();
|
||||||
|
|
||||||
uint32_t getRunLength(bool white);
|
uint32_t getRunLength(bool white);
|
||||||
|
|
||||||
uint32_t getWhiteCode();
|
uint32_t getWhiteCode();
|
||||||
|
@ -39,6 +105,7 @@ private:
|
||||||
uint32_t getCode(const PDFCCITTCode* codes, size_t codeCount);
|
uint32_t getCode(const PDFCCITTCode* codes, size_t codeCount);
|
||||||
|
|
||||||
PDFBitReader m_reader;
|
PDFBitReader m_reader;
|
||||||
|
PDFCCITTFaxDecoderParameters m_parameters;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -97,6 +97,7 @@ PDFImage PDFImage::createImage(const PDFDocument* document, const PDFStream* str
|
||||||
else if (object.isStream())
|
else if (object.isStream())
|
||||||
{
|
{
|
||||||
// TODO: Implement Mask Image
|
// TODO: Implement Mask Image
|
||||||
|
PDFImage maskImage = createImage(document, object.getStream(), colorSpace, false, errorReporter);
|
||||||
maskingType = PDFImageData::MaskingType::Image;
|
maskingType = PDFImageData::MaskingType::Image;
|
||||||
throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Mask image is not implemented."));
|
throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Mask image is not implemented."));
|
||||||
}
|
}
|
||||||
|
@ -492,7 +493,7 @@ PDFImage PDFImage::createImage(const PDFDocument* document, const PDFStream* str
|
||||||
}
|
}
|
||||||
else if (imageFilterName == "CCITTFaxDecode" || imageFilterName == "CCF")
|
else if (imageFilterName == "CCITTFaxDecode" || imageFilterName == "CCF")
|
||||||
{
|
{
|
||||||
throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'CCITFaxDecode'."));
|
throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'CCITTFaxDecode'."));
|
||||||
}
|
}
|
||||||
else if (imageFilterName == "JBIG2Decode")
|
else if (imageFilterName == "JBIG2Decode")
|
||||||
{
|
{
|
||||||
|
@ -658,4 +659,6 @@ OPJ_OFF_T PDFJPEG2000ImageData::skip(OPJ_OFF_T p_nb_bytes, void* p_user_data)
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Implement image rendering intent
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -56,6 +56,12 @@ PDFBitReader::Value PDFBitReader::read(PDFBitReader::Value bits)
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFBitReader::Value PDFBitReader::look(PDFBitReader::Value bits) const
|
||||||
|
{
|
||||||
|
PDFBitReader temp(*this);
|
||||||
|
return temp.read(bits);
|
||||||
|
}
|
||||||
|
|
||||||
void PDFBitReader::seek(qint64 position)
|
void PDFBitReader::seek(qint64 position)
|
||||||
{
|
{
|
||||||
if (position < m_stream->size())
|
if (position < m_stream->size())
|
||||||
|
|
|
@ -107,6 +107,11 @@ public:
|
||||||
/// then exception is thrown.
|
/// then exception is thrown.
|
||||||
Value read(Value bits);
|
Value read(Value bits);
|
||||||
|
|
||||||
|
/// Reads single n-bit value from the stream. If stream hasn't enough data,
|
||||||
|
/// then exception is thrown. State of the stream is not changed, i.e., read
|
||||||
|
/// bits are reverted back.
|
||||||
|
Value look(Value bits) const;
|
||||||
|
|
||||||
/// Seeks the desired position in the data stream. If position can't be seeked,
|
/// Seeks the desired position in the data stream. If position can't be seeked,
|
||||||
/// then exception is thrown.
|
/// then exception is thrown.
|
||||||
void seek(qint64 position);
|
void seek(qint64 position);
|
||||||
|
|
Loading…
Reference in New Issue