JBIG2 - some basic segment interpretations

This commit is contained in:
Jakub Melka 2019-10-27 15:52:50 +01:00
parent abbe7a920a
commit 6a0cdec4e4
5 changed files with 418 additions and 21 deletions

View File

@ -669,9 +669,7 @@ PDFImage PDFImage::createImage(const PDFDocument* document,
}
PDFJBIG2Decoder decoder(qMove(data), qMove(globalData), errorReporter);
decoder.decode();
// TODO: Finish JBIG2 decoder
image.m_imageData = decoder.decode(maskingType);
}
else if (colorSpace || isSoftMask)
{

View File

@ -35,16 +35,16 @@ struct PDFJBIG2HuffmanTableEntry
/// Returns true, if current row represents interval (-∞, value),
/// it means 32bit number must be read and
bool isLowValue() const { return rangeBitLength == 0xFFFE; }
bool isLowValue() const { return type == Type::Negative; }
/// Returns true, if current row represents out-of-band value
bool isOutOfBand() const { return rangeBitLength == HUFFMAN_OOB_VALUE; }
bool isOutOfBand() const { return type == Type::OutOfBand; }
int32_t value; ///< Base value
uint16_t prefixBitLength; ///< Bit length of prefix
uint16_t rangeBitLength; ///< Bit length of additional value
uint16_t prefix; ///< Bit prefix of the huffman code
Type type; ///< Type of the value
int32_t value = 0; ///< Base value
uint16_t prefixBitLength = 0; ///< Bit length of prefix
uint16_t rangeBitLength = 0; ///< Bit length of additional value
uint16_t prefix = 0; ///< Bit prefix of the huffman code
Type type = Type::Standard; ///< Type of the value
};
static constexpr PDFJBIG2HuffmanTableEntry PDFJBIG2StandardHuffmanTable_A[] =
@ -353,7 +353,7 @@ static constexpr PDFJBIG2ArithmeticDecoderQeValue JBIG2_ARITHMETIC_DECODER_QE_VA
void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
{
// Used figure G.1, in annex G, of specification
uint32_t B = m_reader->read(8);
uint32_t B = m_reader->readUnsignedByte();
m_c = (B ^ 0xFF) << 16;
perform_BYTEIN();
m_c = m_c << 7;
@ -364,7 +364,7 @@ void PDFJBIG2ArithmeticDecoder::perform_INITDEC()
void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
{
// Used figure G.3, in annex G, of specification
const uint32_t B = m_reader->read(8);
const uint32_t B = m_reader->readUnsignedByte();
if (B == 0xFF)
{
const uint32_t B1 = m_reader->look(8);
@ -376,7 +376,7 @@ void PDFJBIG2ArithmeticDecoder::perform_BYTEIN()
{
m_c = m_c + (0xFE00 - (B << 9));
m_ct = 7;
m_reader->read(8);
m_reader->readUnsignedByte();
}
}
else
@ -470,8 +470,8 @@ PDFJBIG2SegmentHeader PDFJBIG2SegmentHeader::read(PDFBitReader* reader)
PDFJBIG2SegmentHeader header;
// Parse segment headers and segment flags
header.m_segmentNumber = reader->read(32);
const uint8_t flags = reader->read(8);
header.m_segmentNumber = reader->readUnsignedInt();
const uint8_t flags = reader->readUnsignedByte();
const uint8_t type = flags & 0x3F;
const bool isPageAssociationSize4ByteLong = flags & 0x40;
@ -479,7 +479,7 @@ PDFJBIG2SegmentHeader PDFJBIG2SegmentHeader::read(PDFBitReader* reader)
// these bits. Data format is described in chapter 7.2.4 of the specification. According
// the specification, values 5 or 6 can't be in bits 6,7,8, of the first byte. If these
// occurs, exception is thrown.
uint32_t retentionField = reader->read(8);
uint32_t retentionField = reader->readUnsignedByte();
uint32_t referredSegmentsCount = retentionField >> 5; // Bits 6,7,8
if (referredSegmentsCount == 5 || referredSegmentsCount == 6)
@ -518,7 +518,7 @@ PDFJBIG2SegmentHeader PDFJBIG2SegmentHeader::read(PDFBitReader* reader)
}
header.m_pageAssociation = reader->read(isPageAssociationSize4ByteLong ? 32 : 8);
header.m_segmentDataLength = reader->read(32);
header.m_segmentDataLength = reader->readUnsignedInt();
header.m_lossless = type & 0x01;
header.m_immediate = type & 0x02;
@ -591,7 +591,7 @@ PDFJBIG2SegmentHeader PDFJBIG2SegmentHeader::read(PDFBitReader* reader)
return header;
}
void PDFJBIG2Decoder::decode()
PDFImageData PDFJBIG2Decoder::decode(PDFImageData::MaskingType maskingType)
{
for (const QByteArray* data : { &m_globalData, &m_data })
{
@ -601,6 +601,27 @@ void PDFJBIG2Decoder::decode()
processStream();
}
}
if (m_pageBitmap.isValid())
{
PDFBitWriter writer(1);
const size_t columns = m_pageBitmap.getWidth();
const size_t rows = m_pageBitmap.getHeight();
for (size_t row = 0; row < rows; ++row)
{
for (size_t column = 0; column < columns; ++column)
{
writer.write(m_pageBitmap.getPixel(column, row));
}
writer.finishLine();
}
return PDFImageData(1, 1, static_cast<uint32_t>(columns), static_cast<uint32_t>(rows), static_cast<uint32_t>((columns + 7) / 8), maskingType, writer.takeByteArray(), { }, { }, { });
}
return PDFImageData();
}
void PDFJBIG2Decoder::processStream()
@ -691,10 +712,179 @@ void PDFJBIG2Decoder::processStream()
}
}
void PDFJBIG2Decoder::processSymbolDictionary(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processSymbolDictionary
}
void PDFJBIG2Decoder::processTextRegion(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processTextRegion
}
void PDFJBIG2Decoder::processPatternDictionary(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processPatternDictionary
}
void PDFJBIG2Decoder::processHalftoneRegion(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processHalftoneRegion
}
void PDFJBIG2Decoder::processGenericRegion(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processGenericRegion
}
void PDFJBIG2Decoder::processGenericRefinementRegion(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processGenericRefinementRegion
}
void PDFJBIG2Decoder::processPageInformation(const PDFJBIG2SegmentHeader&)
{
const uint32_t width = m_reader.readUnsignedInt();
const uint32_t height = m_reader.readUnsignedInt();
// Skip 8 bites - resolution. We do not need the resolution values.
m_reader.skipBytes(sizeof(uint32_t) * 2);
const uint8_t flags = m_reader.readUnsignedByte();
const uint16_t striping = m_reader.readUnsignedWord();
m_pageDefaultPixelValue = (flags & 0x04) ? 0xFF : 0x00;
m_pageDefaultCompositionOperatorOverriden = (flags & 0x40);
const uint8_t defaultOperator = (flags >> 3) & 0b11;
switch (defaultOperator)
{
case 0:
m_pageDefaultCompositionOperator = PDFJBIG2BitOperation::Or;
break;
case 1:
m_pageDefaultCompositionOperator = PDFJBIG2BitOperation::And;
break;
case 2:
m_pageDefaultCompositionOperator = PDFJBIG2BitOperation::Xor;
break;
case 3:
m_pageDefaultCompositionOperator = PDFJBIG2BitOperation::NotXor;
break;
default:
Q_ASSERT(false);
break;
}
const uint32_t correctedWidth = width;
const uint32_t correctedHeight = (height != 0xFFFFFFFF) ? height : 0;
checkBitmapSize(correctedWidth);
checkBitmapSize(correctedHeight);
m_pageBitmap = PDFJBIG2Bitmap(width, height, m_pageDefaultPixelValue);
}
void PDFJBIG2Decoder::processEndOfPage(const PDFJBIG2SegmentHeader& header)
{
if (header.getSegmentDataLength() != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 end-of-page segment shouldn't contain any data, but has extra data of %1 bytes.").arg(header.getSegmentDataLength()));
}
// We will write a warning, because end-of-page segments should not be in PDF according to specification
m_errorReporter->reportRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("JBIG2 end-of-page segment detected and ignored."));
}
void PDFJBIG2Decoder::processEndOfStripe(const PDFJBIG2SegmentHeader& header)
{
// Just skip the segment, do nothing
skipSegment(header);
}
void PDFJBIG2Decoder::processEndOfFile(const PDFJBIG2SegmentHeader& header)
{
if (header.getSegmentDataLength() != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 end-of-file segment shouldn't contain any data, but has extra data of %1 bytes.").arg(header.getSegmentDataLength()));
}
// We will write a warning, because end-of-file segments should not be in PDF according to specification
m_errorReporter->reportRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("JBIG2 end-of-file segment detected and ignored."));
}
void PDFJBIG2Decoder::processProfiles(const PDFJBIG2SegmentHeader& header)
{
skipSegment(header);
}
void PDFJBIG2Decoder::processCodeTables(const PDFJBIG2SegmentHeader& header)
{
const uint8_t flags = m_reader.readUnsignedByte();
const int32_t htLow = m_reader.readSignedInt();
const int32_t htHigh = m_reader.readSignedInt();
if (htLow == std::numeric_limits<int32_t>::min())
{
// Check for underflow, we subtract 1 from htLow value
throw PDFException(PDFTranslationContext::tr("JBIG2 underflow of the low value in huffman table."));
}
const bool hasOOB = flags & 0x01;
const PDFBitReader::Value htps = ((flags >> 1) & 0b111) + 1;
const PDFBitReader::Value htrs = ((flags >> 4) & 0b111) + 1;
std::vector<PDFJBIG2HuffmanTableEntry> table;
table.reserve(32);
// Read standard values
int32_t currentRangeLow = htLow;
while (currentRangeLow < htHigh)
{
PDFJBIG2HuffmanTableEntry entry;
entry.prefixBitLength = m_reader.read(htps);
entry.rangeBitLength = m_reader.read(htrs);
entry.value = currentRangeLow;
currentRangeLow += 1 << entry.rangeBitLength;
table.push_back(entry);
}
// Read "low" value
PDFJBIG2HuffmanTableEntry lowEntry;
lowEntry.prefixBitLength = m_reader.read(htps);
lowEntry.rangeBitLength = 32;
lowEntry.value = htLow - 1;
lowEntry.type = PDFJBIG2HuffmanTableEntry::Type::Negative;
table.push_back(lowEntry);
// Read "high" value
PDFJBIG2HuffmanTableEntry highEntry;
highEntry.prefixBitLength = m_reader.read(htps);
highEntry.rangeBitLength = 32;
highEntry.value = htHigh;
table.push_back(highEntry);
// Read out-of-band value, if we have it
if (hasOOB)
{
PDFJBIG2HuffmanTableEntry oobEntry;
oobEntry.prefixBitLength = m_reader.read(htps);
oobEntry.type = PDFJBIG2HuffmanTableEntry::Type::OutOfBand;
table.push_back(oobEntry);
}
table = PDFJBIG2HuffmanCodeTable::buildPrefixes(table);
m_segments[header.getSegmentNumber()] = std::make_unique<PDFJBIG2HuffmanCodeTable>(qMove(table));
}
void PDFJBIG2Decoder::processExtension(const PDFJBIG2SegmentHeader& header)
{
// We will read the extension header, and check "Necessary bit"
const uint32_t extensionHeader = m_reader.read(32);
const uint32_t extensionHeader = m_reader.readUnsignedInt();
if (extensionHeader & 0x8000000)
{
const uint32_t extensionCode = extensionHeader & 0x3FFFFFFF;
@ -711,4 +901,100 @@ void PDFJBIG2Decoder::processExtension(const PDFJBIG2SegmentHeader& header)
}
}
void PDFJBIG2Decoder::skipSegment(const PDFJBIG2SegmentHeader& header)
{
if (header.isSegmentDataLengthDefined())
{
m_reader.skipBytes(header.getSegmentDataLength());
}
else
{
throw PDFException(PDFTranslationContext::tr("JBIG2 segment with unknown data length can't be skipped."));
}
}
void PDFJBIG2Decoder::checkBitmapSize(const uint32_t size)
{
if (size > MAX_BITMAP_SIZE)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 maximum bitmap size exceeded (%1 > %2).").arg(size).arg(MAX_BITMAP_SIZE));
}
}
PDFJBIG2Bitmap::PDFJBIG2Bitmap() :
m_width(0),
m_height(0)
{
}
PDFJBIG2Bitmap::PDFJBIG2Bitmap(size_t width, size_t height) :
m_width(width),
m_height(height)
{
m_data.resize(width * height, 0);
}
PDFJBIG2Bitmap::PDFJBIG2Bitmap(size_t width, size_t height, uint8_t fill) :
m_width(width),
m_height(height)
{
m_data.resize(width * height, fill);
}
PDFJBIG2HuffmanCodeTable::PDFJBIG2HuffmanCodeTable(std::vector<PDFJBIG2HuffmanTableEntry>&& entries) :
m_entries(qMove(entries))
{
}
PDFJBIG2HuffmanCodeTable::~PDFJBIG2HuffmanCodeTable()
{
}
std::vector<PDFJBIG2HuffmanTableEntry> PDFJBIG2HuffmanCodeTable::buildPrefixes(const std::vector<PDFJBIG2HuffmanTableEntry>& entries)
{
std::vector<PDFJBIG2HuffmanTableEntry> result = entries;
result.erase(std::remove_if(result.begin(), result.end(), [](const PDFJBIG2HuffmanTableEntry& entry) { return entry.prefixBitLength == 0; }), result.end());
std::stable_sort(result.begin(), result.end(), [](const PDFJBIG2HuffmanTableEntry& l, const PDFJBIG2HuffmanTableEntry& r) { return l.prefixBitLength < r.prefixBitLength; });
if (!result.empty())
{
result[0].prefix = 0;
// Strategy: we will have variable prefix containing actual prefix value. If we are changing
// the number of bits, then we must update "FIRSTCODE" variable as in the specification, i.e.
// compute FIRSTCODE[current bit length] = (FIRSTCODE[previous bit length] + #number of items) * 2.
// Number of items is automatically computed by incrementing the variable prefix, so at the end
// of each cycle, when we are about to shift number of bits in next cycle, we have computed
// variable (FIRSTCODE[last bit length] + #number of items), so in next cycle, we just do a bit shift.
uint16_t prefix = 1;
uint16_t count = 1;
for (uint32_t i = 1; i < result.size(); ++i)
{
const uint16_t bitShift = result[i].prefixBitLength - result[i - 1].prefixBitLength;
if (bitShift > 0)
{
// Bit length of the prefix changed, we must shift the prefix by amount of new bits
prefix = prefix << bitShift;
count = 0;
}
result[i].prefix = prefix;
++prefix;
++count;
if (count > (1 << result[i].prefixBitLength))
{
// We have "overflow" of values, for binary number with prefixBitLength digits (0/1), we can
// have only 2^prefixBitLength values, which we exceeded. This is unrecoverable error.
throw PDFException(PDFTranslationContext::tr("JBIG2 overflow of prefix bit values in huffman table."));
}
}
}
return result;
}
} // namespace pdf

View File

@ -19,10 +19,24 @@
#define PDFJBIG2DECODER_H
#include "pdfutils.h"
#include "pdfcolorspaces.h"
namespace pdf
{
class PDFJBIG2Bitmap;
class PDFRenderErrorReporter;
class PDFJBIG2HuffmanCodeTable;
struct PDFJBIG2HuffmanTableEntry;
enum class PDFJBIG2BitOperation
{
Invalid,
Or,
And,
Xor,
NotXor
};
/// Arithmetic decoder state for JBIG2 data streams. It contains state for context,
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
@ -168,6 +182,68 @@ private:
std::vector<uint32_t> m_referredSegments;
};
class PDFJBIG2Segment
{
public:
explicit inline PDFJBIG2Segment() = default;
virtual ~PDFJBIG2Segment() = default;
virtual const PDFJBIG2Bitmap* asBitmap() const { return nullptr; }
virtual PDFJBIG2Bitmap* asBitmap() { return nullptr; }
virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const { return nullptr; }
virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() { return nullptr; }
};
class PDFJBIG2HuffmanCodeTable : public PDFJBIG2Segment
{
public:
explicit PDFJBIG2HuffmanCodeTable(std::vector<PDFJBIG2HuffmanTableEntry>&& entries);
virtual ~PDFJBIG2HuffmanCodeTable();
virtual const PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() const override { return this; }
virtual PDFJBIG2HuffmanCodeTable* asHuffmanCodeTable() override { return this; }
const std::vector<PDFJBIG2HuffmanTableEntry>& getEntries() const { return m_entries; }
/// Builds prefixes using algorithm in annex B.3 of specification. Unused rows are removed.
/// Rows are sorted according the criteria. Prefixes are then filled.
/// \param entries Entries for building the table
static std::vector<PDFJBIG2HuffmanTableEntry> buildPrefixes(const std::vector<PDFJBIG2HuffmanTableEntry>& entries);
private:
std::vector<PDFJBIG2HuffmanTableEntry> m_entries;
};
class PDFJBIG2Bitmap : public PDFJBIG2Segment
{
public:
explicit PDFJBIG2Bitmap();
explicit PDFJBIG2Bitmap(size_t width, size_t height);
explicit PDFJBIG2Bitmap(size_t width, size_t height, uint8_t fill);
virtual const PDFJBIG2Bitmap* asBitmap() const override { return this; }
virtual PDFJBIG2Bitmap* asBitmap() override { return this; }
inline size_t getWidth() const { return m_width; }
inline size_t getHeight() const { return m_height; }
inline size_t getPixelCount() const { return m_width * m_height; }
inline uint8_t getPixel(size_t x, size_t y) const { return m_data[y * m_width + x]; }
inline void setPixel(size_t x, size_t y, uint8_t value) { m_data[y * m_width + x] = value; }
inline void fill(uint8_t value) { std::fill(m_data.begin(), m_data.end(), value); }
inline void fillZero() { fill(0); }
inline void fillOne() { fill(0xFF); }
inline bool isValid() const { return getPixelCount() > 0; }
private:
size_t m_width;
size_t m_height;
std::vector<uint8_t> m_data;
};
/// Decoder of JBIG2 data streams. Decodes the black/white monochrome image.
/// Handles also global segments. Decoder decodes data using the specification
/// ISO/IEC 14492:2001, T.88.
@ -178,14 +254,22 @@ public:
m_data(qMove(data)),
m_globalData(qMove(globalData)),
m_errorReporter(errorReporter),
m_reader(nullptr, 8)
m_reader(nullptr, 8),
m_pageDefaultPixelValue(0),
m_pageDefaultCompositionOperator(PDFJBIG2BitOperation::Invalid),
m_pageDefaultCompositionOperatorOverriden(false)
{
}
void decode();
/// Decodes image interpreting the data as JBIG2 data stream. If image cannot
/// be decoded, exception is thrown (or invalid PDFImageData is returned).
/// \param maskingType Image masking type
PDFImageData decode(PDFImageData::MaskingType maskingType);
private:
static constexpr const uint32_t MAX_BITMAP_SIZE = 65536;
/// Processes current data stream (reads all data from the stream, interprets
/// them as segments and processes the segments).
void processStream();
@ -204,10 +288,21 @@ private:
void processCodeTables(const PDFJBIG2SegmentHeader& header);
void processExtension(const PDFJBIG2SegmentHeader& header);
void skipSegment(const PDFJBIG2SegmentHeader& header);
static void checkBitmapSize(const uint32_t size);
QByteArray m_data;
QByteArray m_globalData;
PDFRenderErrorReporter* m_errorReporter;
PDFBitReader m_reader;
std::map<uint32_t, std::unique_ptr<PDFJBIG2Segment>> m_segments;
/// Page default pixel value
uint8_t m_pageDefaultPixelValue;
PDFJBIG2BitOperation m_pageDefaultCompositionOperator;
bool m_pageDefaultCompositionOperatorOverriden;
PDFJBIG2Bitmap m_pageBitmap;
};
} // namespace pdf

View File

@ -120,6 +120,12 @@ bool PDFBitReader::isAtEnd() const
return (m_position >= m_stream->size()) && m_bitsInBuffer == 0;
}
int32_t PDFBitReader::readSignedInt()
{
const uint32_t value = read(32);
return *reinterpret_cast<const int32_t*>(&value);
}
PDFBitWriter::PDFBitWriter(Value bitsPerComponent) :
m_bitsPerComponent(bitsPerComponent),
m_mask((static_cast<Value>(1) << m_bitsPerComponent) - static_cast<Value>(1)),

View File

@ -135,6 +135,18 @@ public:
/// result of this function is sometimes inaccurate)
int getPosition() const { return m_position; }
/// Reads signed 32-bit integer from the stream
int32_t readSignedInt();
/// Reads unsigned 32-bit integer from the stream
uint32_t readUnsignedInt() { return read(32); }
/// Reads unsigned 16-bit integer from the stream
uint16_t readUnsignedWord() { return read(16); }
/// Reads unsigned 8-bit integer from the stream
uint8_t readUnsignedByte() { return read(8); }
private:
const QByteArray* m_stream;
int m_position;