JBIG2 - Generic refinement region

This commit is contained in:
Jakub Melka 2019-10-30 17:44:04 +01:00
parent a60b13c3a0
commit 2e359d63d3
5 changed files with 465 additions and 14 deletions

View File

@ -89,19 +89,15 @@ void MainWindow::on_actionAdd_JBIG2_image_triggered()
try
{
if (data.startsWith( "\x97\x4A\x42\x32\x0D\x0A\x1A\x0A"))
{
data = data.mid(13);
}
pdf::PDFJBIG2Decoder decoder(data, QByteArray(), this);
pdf::PDFImageData imageData = decoder.decode(pdf::PDFImageData::MaskingType::None);
pdf::PDFImageData imageData = decoder.decodeFileStream();
if (imageData.isValid())
{
QImage image(imageData.getWidth(), imageData.getHeight(), QImage::Format_Mono);
const uchar* sourceData = reinterpret_cast<const uchar*>(imageData.getData().constData());
std::copy(sourceData, sourceData + imageData.getData().size(), image.bits());
Q_ASSERT(imageData.getData().size() == image.byteCount());
std::transform(sourceData, sourceData + imageData.getData().size(), image.bits(), [](const uchar value) { return ~value; });
addImage(file.fileName(), qMove(image));
}
}

View File

@ -646,6 +646,105 @@ PDFImageData PDFJBIG2Decoder::decode(PDFImageData::MaskingType maskingType)
return PDFImageData();
}
PDFImageData PDFJBIG2Decoder::decodeFileStream()
{
m_reader = PDFBitReader(&m_data, 8);
constexpr const char* JBIG2_FILE_HEADER = "\x97\x4A\x42\x32\x0D\x0A\x1A\x0A";
if (!m_data.startsWith(JBIG2_FILE_HEADER))
{
throw PDFException(PDFTranslationContext::tr("Invalid JBIG2 file header."));
}
m_reader.skipBytes(std::strlen(JBIG2_FILE_HEADER));
// File flags
const uint8_t fileFlags = m_reader.readUnsignedByte();
if (fileFlags & 0xFC)
{
// Jakub Melka: According the specification, bits 2-7 should be reserved and zero.
// If they are nonzero, probably a new version of JBIG2 format exists, but
// is not decodable by this decoder. So, in this case, we don't do decoding
// and report error immediately.
throw PDFException(PDFTranslationContext::tr("Invalid JBIG2 file header flags."));
}
const bool isFileOrganizationSequential = fileFlags & 0x01;
const bool isUknownNumberOfPages = fileFlags & 0x02;
if (isUknownNumberOfPages)
{
throw PDFException(PDFTranslationContext::tr("Invalid JBIG2 file - unknown number of pages."));
}
const uint32_t numberOfPages = m_reader.readUnsignedInt();
if (numberOfPages != 1)
{
throw PDFException(PDFTranslationContext::tr("Invalid JBIG2 file - invalid number of pages (%1).").arg(numberOfPages));
}
if (isFileOrganizationSequential)
{
// We are lucky, file organization is sequential. Just copy the data.
m_data = m_reader.readSubstream(-1);
}
else
{
// We must transform random organization to the sequential one
QByteArray sequentialData;
struct SegmentInfo
{
PDFJBIG2SegmentHeader header;
QByteArray headerData;
QByteArray segmentData;
};
std::vector<SegmentInfo> segmentInfos;
while (true)
{
SegmentInfo segmentInfo;
const int headerStartPosition = m_reader.getPosition();
segmentInfo.header = PDFJBIG2SegmentHeader::read(&m_reader);
const int headerEndPosition = m_reader.getPosition();
segmentInfo.headerData = m_data.mid(headerStartPosition, headerEndPosition - headerStartPosition);
segmentInfos.push_back(qMove(segmentInfo));
if (segmentInfo.header.getSegmentType() == JBIG2SegmentType::EndOfFile)
{
break;
}
}
for (SegmentInfo& info : segmentInfos)
{
if (!info.header.isSegmentDataLengthDefined())
{
throw PDFException(PDFTranslationContext::tr("Invalid JBIG2 file - segment length is not defined."));
}
info.segmentData = m_reader.readSubstream(info.header.getSegmentDataLength());
}
for (const SegmentInfo& info : segmentInfos)
{
if (info.header.getSegmentType() == JBIG2SegmentType::EndOfPage ||
info.header.getSegmentType() == JBIG2SegmentType::EndOfFile)
{
continue;
}
sequentialData.append(info.headerData);
sequentialData.append(info.segmentData);
}
m_data = qMove(sequentialData);
}
return decode(PDFImageData::MaskingType::None);
}
void PDFJBIG2Decoder::processStream()
{
while (!m_reader.isAtEnd())
@ -841,7 +940,83 @@ void PDFJBIG2Decoder::processGenericRegion(const PDFJBIG2SegmentHeader& header)
void PDFJBIG2Decoder::processGenericRefinementRegion(const PDFJBIG2SegmentHeader& header)
{
// TODO: JBIG2 - processGenericRefinementRegion
PDFJBIG2RegionSegmentInformationField field = readRegionSegmentInformationField();
const uint8_t flags = m_reader.readUnsignedByte();
if ((flags & 0b11111100) != 0)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid flags for generic refinement region."));
}
const uint8_t GRTEMPLATE = flags & 0x01;
const bool TPGRON = flags & 0x02;
PDFJBIG2ATPositions GRAT = { };
if (GRTEMPLATE == 0)
{
GRAT = readATTemplatePixelPositions(2);
}
PDFJBIG2Bitmap GRREFERENCE;
const std::vector<uint32_t>& referredSegments = header.getReferredSegments();
switch (referredSegments.size())
{
case 0:
{
// According the specification, operator must be REPLACE
if (field.operation != PDFJBIG2BitOperation::Replace)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 - operation must be REPLACE for generic refinement region."));
}
GRREFERENCE = m_pageBitmap.getSubbitmap(field.offsetX, field.offsetY, field.width, field.height);
break;
}
case 1:
{
GRREFERENCE = getBitmap(referredSegments.front(), true);
break;
}
default:
throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid referred segments (%1) for generic refinement region.").arg(referredSegments.size()));
}
if (GRREFERENCE.getWidth() != field.width || GRREFERENCE.getHeight() != field.height)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid referred bitmap size [%1 x %2] instead of [%3 x %4] for generic refinement region.").arg(GRREFERENCE.getWidth()).arg(GRREFERENCE.getHeight()).arg(field.width).arg(field.height));
}
resetArithmeticStatesGenericRefinement(GRTEMPLATE);
PDFJBIG2BitmapRefinementDecodingParameters parameters;
parameters.GRTEMPLATE = GRTEMPLATE;
parameters.TPGRON = TPGRON;
parameters.GRW = field.width;
parameters.GRH = field.height;
parameters.GRAT = GRAT;
parameters.arithmeticDecoderState = &m_arithmeticDecoderStates[Refinement];
parameters.GRREFERENCE = &GRREFERENCE;
parameters.GRREFERENCEX = 0;
parameters.GRREFERENCEY = 0;
PDFJBIG2Bitmap refinementBitmap = readRefinementBitmap(parameters);
if (refinementBitmap.isValid())
{
if (header.isImmediate())
{
m_pageBitmap.paint(refinementBitmap, field.offsetX, field.offsetY, field.operation, m_pageSizeUndefined, m_pageDefaultPixelValue);
}
else
{
m_segments[header.getSegmentNumber()] = std::make_unique<PDFJBIG2Bitmap>(qMove(refinementBitmap));
}
}
else
{
throw PDFException(PDFTranslationContext::tr("JBIG2 - invalid bitmap for generic refinement region."));
}
}
void PDFJBIG2Decoder::processPageInformation(const PDFJBIG2SegmentHeader&)
@ -889,7 +1064,7 @@ void PDFJBIG2Decoder::processPageInformation(const PDFJBIG2SegmentHeader&)
checkBitmapSize(correctedWidth);
checkBitmapSize(correctedHeight);
m_pageBitmap = PDFJBIG2Bitmap(width, height, m_pageDefaultPixelValue);
m_pageBitmap = PDFJBIG2Bitmap(correctedWidth, correctedHeight, m_pageDefaultPixelValue);
}
void PDFJBIG2Decoder::processEndOfPage(const PDFJBIG2SegmentHeader& header)
@ -1004,6 +1179,37 @@ void PDFJBIG2Decoder::processExtension(const PDFJBIG2SegmentHeader& header)
}
}
PDFJBIG2Bitmap PDFJBIG2Decoder::getBitmap(const uint32_t segmentIndex, bool remove)
{
PDFJBIG2Bitmap result;
auto it = m_segments.find(segmentIndex);
if (it != m_segments.cend())
{
PDFJBIG2Bitmap* bitmap = it->second->asBitmap();
if (!bitmap)
{
throw PDFException(PDFTranslationContext::tr("JBIG2 segment %1 is not a bitmap.").arg(segmentIndex));
}
if (remove)
{
result = qMove(*bitmap);
m_segments.erase(it);
}
else
{
result = *bitmap;
}
return result;
}
throw PDFException(PDFTranslationContext::tr("JBIG2 bitmap segment %1 not found.").arg(segmentIndex));
return result;
}
PDFJBIG2Bitmap PDFJBIG2Decoder::readBitmap(const PDFJBIG2BitmapDecodingParameters& parameters)
{
if (parameters.MMR)
@ -1014,6 +1220,8 @@ PDFJBIG2Bitmap PDFJBIG2Decoder::readBitmap(const PDFJBIG2BitmapDecodingParameter
ccittParameters.columns = parameters.width;
ccittParameters.rows = parameters.height;
ccittParameters.hasEndOfBlock = false;
ccittParameters.decode = { 1.0, 0.0 };
ccittParameters.hasBlackIsOne = true;
PDFCCITTFaxDecoder decoder(&parameters.data, ccittParameters);
PDFImageData data = decoder.decode();
@ -1200,14 +1408,139 @@ PDFJBIG2Bitmap PDFJBIG2Decoder::readBitmap(const PDFJBIG2BitmapDecodingParameter
return PDFJBIG2Bitmap();
}
PDFJBIG2Bitmap PDFJBIG2Decoder::readRefinementBitmap(const PDFJBIG2BitmapRefinementDecodingParameters& parameters)
{
// Use algorithm described in 6.3.5.6
PDFJBIG2Bitmap GRREG(parameters.GRW, parameters.GRH, 0x00);
// Use arithmetic encoding. For templates, we fill bytes from right to left, from bottom to top bits,
// filling from lowest bit to highest bit. We will have a maximum of 13 bits.
uint32_t LTP = 0;
const uint32_t LTPContext = !parameters.GRTEMPLATE ? 0b0000100000000 : 0b0010000000;
PDFBitReader reader(m_reader.getStream(), 1);
PDFJBIG2ArithmeticDecoder decoder(&reader);
decoder.initialize();
auto createContext = [&](int x, int y) -> uint16_t
{
uint16_t pixelContext = 0;
uint16_t pixelContextShift = 0;
auto createContextBit = [&](const PDFJBIG2Bitmap* bitmap, int offsetX, int offsetY)
{
uint16_t bit = bitmap->getPixelSafe(offsetX, offsetY) ? 1 : 0;
bit = bit << pixelContextShift;
pixelContext |= bit;
++pixelContextShift;
};
if (!parameters.GRTEMPLATE)
{
// 13-bit context
createContextBit(&GRREG, x - 1, y);
createContextBit(&GRREG, x + 1, y - 1);
createContextBit(&GRREG, x + 0, y - 1);
createContextBit(&GRREG, x + parameters.GRAT[0].x, y + parameters.GRAT[0].y);
const int refX = x - parameters.GRREFERENCEX;
const int refY = y - parameters.GRREFERENCEY;
createContextBit(parameters.GRREFERENCE, refX + 1, refY + 1);
createContextBit(parameters.GRREFERENCE, refX + 0, refY + 1);
createContextBit(parameters.GRREFERENCE, refX - 1, refY + 1);
createContextBit(parameters.GRREFERENCE, refX + 1, refY + 0);
createContextBit(parameters.GRREFERENCE, refX + 0, refY + 0);
createContextBit(parameters.GRREFERENCE, refX - 1, refY + 0);
createContextBit(parameters.GRREFERENCE, refX + 1, refY - 1);
createContextBit(parameters.GRREFERENCE, refX + 0, refY - 1);
createContextBit(parameters.GRREFERENCE, refX + parameters.GRAT[1].x, refY + parameters.GRAT[1].y);
}
else
{
// 10-bit context
createContextBit(&GRREG, x - 1, y);
createContextBit(&GRREG, x + 1, y - 1);
createContextBit(&GRREG, x + 0, y - 1);
createContextBit(&GRREG, x - 1, y - 1);
const int refX = x - parameters.GRREFERENCEX;
const int refY = y - parameters.GRREFERENCEY;
createContextBit(parameters.GRREFERENCE, refX + 1, refY + 1);
createContextBit(parameters.GRREFERENCE, refX + 0, refY + 1);
createContextBit(parameters.GRREFERENCE, refX + 1, refY + 0);
createContextBit(parameters.GRREFERENCE, refX + 0, refY + 0);
createContextBit(parameters.GRREFERENCE, refX - 1, refY + 0);
createContextBit(parameters.GRREFERENCE, refX + 0, refY - 1);
}
return pixelContext;
};
auto evaluateTPGRPIX = [&](int x, int y, uint8_t& value) -> bool
{
const int refX = x - parameters.GRREFERENCEX;
const int refY = y - parameters.GRREFERENCEY;
value = parameters.GRREFERENCE->getPixelSafe(refX, refY);
return parameters.GRREFERENCE->getPixelSafe(refX - 1, refY - 1) == value &&
parameters.GRREFERENCE->getPixelSafe(refX + 0, refY - 1) == value &&
parameters.GRREFERENCE->getPixelSafe(refX + 1, refY - 1) == value &&
parameters.GRREFERENCE->getPixelSafe(refX - 1, refY + 0) == value &&
parameters.GRREFERENCE->getPixelSafe(refX + 1, refY + 0) == value &&
parameters.GRREFERENCE->getPixelSafe(refX - 1, refY + 1) == value &&
parameters.GRREFERENCE->getPixelSafe(refX + 0, refY + 1) == value &&
parameters.GRREFERENCE->getPixelSafe(refX + 1, refY + 1) == value;
};
for (int32_t y = 0; y < static_cast<int32_t>(parameters.GRH); ++y)
{
if (parameters.TPGRON)
{
LTP = LTP ^ decoder.readBit(LTPContext, parameters.arithmeticDecoderState);
if (LTP)
{
}
}
if (!LTP)
{
for (int32_t x = 0; x < static_cast<int32_t>(parameters.GRW); ++x)
{
GRREG.setPixel(x, y, (decoder.readBit(createContext(x, y), parameters.arithmeticDecoderState)) ? 0xFF : 0x00);
}
}
else
{
for (int32_t x = 0; x < static_cast<int32_t>(parameters.GRW); ++x)
{
uint8_t TPGRVAL = 0;
if (evaluateTPGRPIX(x, y, TPGRVAL))
{
GRREG.setPixel(x, y, TPGRVAL);
}
else
{
GRREG.setPixel(x, y, (decoder.readBit(createContext(x, y), parameters.arithmeticDecoderState)) ? 0xFF : 0x00);
}
}
}
}
return GRREG;
}
PDFJBIG2RegionSegmentInformationField PDFJBIG2Decoder::readRegionSegmentInformationField()
{
PDFJBIG2RegionSegmentInformationField result;
result.width = m_reader.readUnsignedInt();
result.height = m_reader.readUnsignedInt();
result.offsetX = m_reader.readUnsignedInt();
result.offsetY = m_reader.readUnsignedInt();
result.offsetX = m_reader.readSignedInt();
result.offsetY = m_reader.readSignedInt();
// Parse flags
const uint8_t flags = m_reader.readUnsignedByte();
@ -1287,6 +1620,27 @@ void PDFJBIG2Decoder::resetArithmeticStatesGeneric(const uint8_t templateMode)
m_arithmeticDecoderStates[Generic].reset(bits);
}
void PDFJBIG2Decoder::resetArithmeticStatesGenericRefinement(const uint8_t templateMode)
{
uint8_t bits = 0;
switch (templateMode)
{
case 0:
bits = 13;
break;
case 1:
bits = 10;
break;
default:
Q_ASSERT(false);
break;
}
m_arithmeticDecoderStates[Refinement].reset(bits);
}
void PDFJBIG2Decoder::skipSegment(const PDFJBIG2SegmentHeader& header)
{
if (header.isSegmentDataLengthDefined())
@ -1351,6 +1705,21 @@ PDFJBIG2Bitmap::~PDFJBIG2Bitmap()
}
PDFJBIG2Bitmap PDFJBIG2Bitmap::getSubbitmap(int offsetX, int offsetY, int width, int height) const
{
PDFJBIG2Bitmap result(width, height, 0x00);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
result.setPixel(x, y, getPixelSafe(x + offsetX, y + offsetY));
}
}
return result;
}
void PDFJBIG2Bitmap::paint(const PDFJBIG2Bitmap& bitmap, int offsetX, int offsetY, PDFJBIG2BitOperation operation, bool expandY, const uint8_t expandPixel)
{
if (!bitmap.isValid())

View File

@ -195,6 +195,9 @@ public:
/// Returns true, if segmend data length is defined
inline bool isSegmentDataLengthDefined() const { return m_segmentDataLength != 0xFFFFFFFF; }
/// Returns referred segments
inline const std::vector<uint32_t>& getReferredSegments() const { return m_referredSegments; }
/// Reads the segment header from the data stream. If error occurs, then
/// exception is thrown.
static PDFJBIG2SegmentHeader read(PDFBitReader* reader);
@ -276,6 +279,14 @@ public:
inline bool isValid() const { return getPixelCount() > 0; }
/// Returns subbitmap of this bitmap. If some pixels of subbitmap are outside
/// of current bitmap, then they are reset to zero.
/// \param offsetX Horizontal offset of subbitmap
/// \param offsetY Vertical offset of subbitmap
/// \param width Width of subbitmap
/// \param height Height of subbitmap
PDFJBIG2Bitmap getSubbitmap(int offsetX, int offsetY, int width, int height) const;
/// Paints another bitmap onto this bitmap. If bitmap is invalid, nothing is done.
/// If \p expandY is true, height of target bitmap is expanded to fit source draw area.
/// \param bitmap Bitmap to be painted on this
@ -303,8 +314,8 @@ struct PDFJBIG2RegionSegmentInformationField
{
uint32_t width = 0;
uint32_t height = 0;
uint32_t offsetX = 0;
uint32_t offsetY = 0;
int32_t offsetX = 0;
int32_t offsetY = 0;
PDFJBIG2BitOperation operation = PDFJBIG2BitOperation::Invalid;
};
@ -349,6 +360,37 @@ struct PDFJBIG2BitmapDecodingParameters
const PDFJBIG2Bitmap* SKIP = nullptr;
};
/// Info structure for refinement bitmap decoding parameters
struct PDFJBIG2BitmapRefinementDecodingParameters
{
/// Template mode used (0/1)
uint8_t GRTEMPLATE = 0;
/// Prediction (same as previous row)
bool TPGRON = false;
/// Bitmap width
uint32_t GRW = 0;
/// Bitmap height
uint32_t GRH = 0;
/// Reference bitmap
const PDFJBIG2Bitmap* GRREFERENCE = nullptr;
/// Offset x
int32_t GRREFERENCEX = 0;
/// Offset y
int32_t GRREFERENCEY = 0;
/// State of arithmetic decoder
PDFJBIG2ArithmeticDecoderState* arithmeticDecoderState = nullptr;
/// Positions of adaptative pixels
PDFJBIG2ATPositions GRAT = { };
};
/// Decoder of JBIG2 data streams. Decodes the black/white monochrome image.
/// Handles also global segments. Decoder decodes data using the specification
/// ISO/IEC 14492:2001, T.88.
@ -382,12 +424,18 @@ public:
/// \param maskingType Image masking type
PDFImageData decode(PDFImageData::MaskingType maskingType);
/// Decodes image interpreting the data as JBIG2 file stream (not data stream).
/// Decoding procedure also handles file header/file flags and number of pages.
/// If number of pages is invalid, then exception is thrown.
PDFImageData decodeFileStream();
private:
static constexpr const uint32_t MAX_BITMAP_SIZE = 65536;
enum ArithmeticDecoderStates
{
Generic,
Refinement,
EndState
};
@ -409,10 +457,20 @@ private:
void processCodeTables(const PDFJBIG2SegmentHeader& header);
void processExtension(const PDFJBIG2SegmentHeader& header);
/// Reads the bitmap using decoding parameters
/// Returns bitmap for given segment index. If bitmap is not found, or segment
/// is of different type, then exception is thrown.
/// \param segmentIndex Segment index with bitmap
/// \param remove Remove the segment?
PDFJBIG2Bitmap getBitmap(const uint32_t segmentIndex, bool remove);
/// Reads bitmap using decoding parameters
/// \param parameters Decoding parameters
PDFJBIG2Bitmap readBitmap(const PDFJBIG2BitmapDecodingParameters& parameters);
/// Reads refined bitmap using decoding parameters
/// \param parameters Decoding parameters
PDFJBIG2Bitmap readRefinementBitmap(const PDFJBIG2BitmapRefinementDecodingParameters& parameters);
/// Reads the region segment information field (see chapter 7.4.1)
PDFJBIG2RegionSegmentInformationField readRegionSegmentInformationField();
@ -422,6 +480,9 @@ private:
/// Reset arithmetic decoder stats for generic
void resetArithmeticStatesGeneric(const uint8_t templateMode);
/// Reset arithmetic decoder stats for generic refinement
void resetArithmeticStatesGenericRefinement(const uint8_t templateMode);
void skipSegment(const PDFJBIG2SegmentHeader& header);
static void checkBitmapSize(const uint32_t size);

View File

@ -132,6 +132,26 @@ int8_t PDFBitReader::readSignedByte()
return *reinterpret_cast<const int8_t*>(&value);
}
QByteArray PDFBitReader::readSubstream(int length)
{
if (m_bitsInBuffer)
{
throw PDFException(PDFTranslationContext::tr("Can't get substream - remaining %1 bits in buffer.").arg(m_bitsInBuffer));
}
QByteArray result = m_stream->mid(m_position, length);
if (length == -1)
{
m_position = m_stream->size();
}
else
{
skipBytes(length);
}
return result;
}
PDFBitWriter::PDFBitWriter(Value bitsPerComponent) :
m_bitsPerComponent(bitsPerComponent),
m_mask((static_cast<Value>(1) << m_bitsPerComponent) - static_cast<Value>(1)),

View File

@ -153,6 +153,11 @@ public:
/// Return underlying byte stream
const QByteArray* getStream() const { return m_stream; }
/// Reads substream from current stream. This function works only on byte boundary,
/// otherwise exception is thrown.
/// \param length Length of the substream. Can be -1, in this case, all remaining data is read.
QByteArray readSubstream(int length);
private:
const QByteArray* m_stream;
int m_position;