mirror of https://github.com/JakubMelka/PDF4QT.git
JBIG2 - basic reading of segments
This commit is contained in:
parent
ee4e21320e
commit
abbe7a920a
|
@ -20,6 +20,7 @@
|
|||
#include "pdfconstants.h"
|
||||
#include "pdfexception.h"
|
||||
#include "pdfutils.h"
|
||||
#include "pdfjbig2decoder.h"
|
||||
#include "pdfccittfaxdecoder.h"
|
||||
|
||||
#include <openjpeg.h>
|
||||
|
@ -656,7 +657,21 @@ PDFImage PDFImage::createImage(const PDFDocument* document,
|
|||
}
|
||||
else if (imageFilterName == "JBIG2Decode")
|
||||
{
|
||||
throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'JBIG2Decode'."));
|
||||
QByteArray data = document->getDecodedStream(stream);
|
||||
QByteArray globalData;
|
||||
if (filterParamsDictionary)
|
||||
{
|
||||
const PDFObject& globalDataObject = document->getObject(filterParamsDictionary->get("JBIG2Globals"));
|
||||
if (globalDataObject.isStream())
|
||||
{
|
||||
globalData = document->getDecodedStream(globalDataObject.getStream());
|
||||
}
|
||||
}
|
||||
|
||||
PDFJBIG2Decoder decoder(qMove(data), qMove(globalData), errorReporter);
|
||||
decoder.decode();
|
||||
|
||||
// TODO: Finish JBIG2 decoder
|
||||
}
|
||||
else if (colorSpace || isSoftMask)
|
||||
{
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfjbig2decoder.h"
|
||||
#include "pdfexception.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
@ -464,9 +465,250 @@ uint32_t PDFJBIG2ArithmeticDecoder::perform_DECODE(size_t context, PDFJBIG2Arith
|
|||
return D;
|
||||
}
|
||||
|
||||
PDFJBIG2Decoder::PDFJBIG2Decoder()
|
||||
PDFJBIG2SegmentHeader PDFJBIG2SegmentHeader::read(PDFBitReader* reader)
|
||||
{
|
||||
PDFJBIG2SegmentHeader header;
|
||||
|
||||
// Parse segment headers and segment flags
|
||||
header.m_segmentNumber = reader->read(32);
|
||||
const uint8_t flags = reader->read(8);
|
||||
const uint8_t type = flags & 0x3F;
|
||||
const bool isPageAssociationSize4ByteLong = flags & 0x40;
|
||||
|
||||
// Jakub Melka: Now parse referred to segments. We do not use retain flags, so we skip
|
||||
// these bits. Data format is described in chapter 7.2.4 of the specification. According
|
||||
// the specification, values 5 or 6 can't be in bits 6,7,8, of the first byte. If these
|
||||
// occurs, exception is thrown.
|
||||
uint32_t retentionField = reader->read(8);
|
||||
uint32_t referredSegmentsCount = retentionField >> 5; // Bits 6,7,8
|
||||
|
||||
if (referredSegmentsCount == 5 || referredSegmentsCount == 6)
|
||||
{
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid header - bad referred segments."));
|
||||
}
|
||||
|
||||
if (referredSegmentsCount == 7)
|
||||
{
|
||||
// This signalizes, that we have more than 4 referred segments. We will read 32-bit value,
|
||||
// where bits 0-28 will be number of referred segments, and bits 29-31 should be all set to 1.
|
||||
retentionField = (retentionField << 24) | reader->read(24);
|
||||
referredSegmentsCount = retentionField & 0x1FFFFFFF;
|
||||
|
||||
if ((retentionField & 0xE0000000) != 0xE0000000)
|
||||
{
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid header - bad referred segments."));
|
||||
}
|
||||
|
||||
// According the specification, retention header is 4 + ceil( (R + 1) / 8) bytes long. We have already 4 bytes read,
|
||||
// so only ceil( (R + 1) / 8 ) bytes we must skip. So, we will add 7 "bits", so we have (R + 1 + 7) / 8 bytes
|
||||
// to be skipped. We have R + 1 bits, not R bits, because 1 bit is used for this segment retain flag.
|
||||
const uint32_t bytesToSkip = (referredSegmentsCount + 8) / 8;
|
||||
reader->skipBytes(bytesToSkip);
|
||||
}
|
||||
|
||||
// Read referred segment numbers. According to specification, chapter 7.2.5, referred segments should have
|
||||
// segment number lesser than actual segment number. So, if segment number is less, or equal to 256, then
|
||||
// 8-bit value is used to store referred segment number, if segment number is less, or equal to 65536, then
|
||||
// 16-bit value is used, otherwise 32 bit value is used.
|
||||
header.m_referredSegments.reserve(referredSegmentsCount);
|
||||
const PDFBitReader::Value referredSegmentNumberBits = (header.m_segmentNumber <= 256) ? 8 : ((header.m_segmentNumber <= 65536) ? 16 : 32);
|
||||
for (uint32_t i = 0; i < referredSegmentsCount; ++i)
|
||||
{
|
||||
header.m_referredSegments.push_back(reader->read(referredSegmentNumberBits));
|
||||
}
|
||||
|
||||
header.m_pageAssociation = reader->read(isPageAssociationSize4ByteLong ? 32 : 8);
|
||||
header.m_segmentDataLength = reader->read(32);
|
||||
header.m_lossless = type & 0x01;
|
||||
header.m_immediate = type & 0x02;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 0:
|
||||
header.m_segmentType = JBIG2SegmentType::SymbolDictionary;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
case 6:
|
||||
case 7:
|
||||
header.m_segmentType = JBIG2SegmentType::TextRegion;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
header.m_segmentType = JBIG2SegmentType::PatternDictionary;
|
||||
break;
|
||||
|
||||
case 20:
|
||||
case 22:
|
||||
case 23:
|
||||
header.m_segmentType = JBIG2SegmentType::HalftoneRegion;
|
||||
break;
|
||||
|
||||
case 36:
|
||||
case 38:
|
||||
case 39:
|
||||
header.m_segmentType = JBIG2SegmentType::GenericRegion;
|
||||
break;
|
||||
|
||||
case 40:
|
||||
case 42:
|
||||
case 43:
|
||||
header.m_segmentType = JBIG2SegmentType::GenericRefinementRegion;
|
||||
break;
|
||||
|
||||
case 48:
|
||||
header.m_segmentType = JBIG2SegmentType::PageInformation;
|
||||
break;
|
||||
|
||||
case 49:
|
||||
header.m_segmentType = JBIG2SegmentType::EndOfPage;
|
||||
break;
|
||||
|
||||
case 50:
|
||||
header.m_segmentType = JBIG2SegmentType::EndOfStripe;
|
||||
break;
|
||||
|
||||
case 51:
|
||||
header.m_segmentType = JBIG2SegmentType::EndOfFile;
|
||||
break;
|
||||
|
||||
case 52:
|
||||
header.m_segmentType = JBIG2SegmentType::Profiles;
|
||||
break;
|
||||
|
||||
case 53:
|
||||
header.m_segmentType = JBIG2SegmentType::Tables;
|
||||
break;
|
||||
|
||||
case 62:
|
||||
header.m_segmentType = JBIG2SegmentType::Extension;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid segment type %1.").arg(type));
|
||||
}
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
void PDFJBIG2Decoder::decode()
|
||||
{
|
||||
for (const QByteArray* data : { &m_globalData, &m_data })
|
||||
{
|
||||
if (!data->isEmpty())
|
||||
{
|
||||
m_reader = PDFBitReader(data, 8);
|
||||
processStream();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PDFJBIG2Decoder::processStream()
|
||||
{
|
||||
while (!m_reader.isAtEnd())
|
||||
{
|
||||
// Read the segment header, then process the segment data
|
||||
PDFJBIG2SegmentHeader segmentHeader = PDFJBIG2SegmentHeader::read(&m_reader);
|
||||
const int64_t segmentDataStartPosition = m_reader.getPosition();
|
||||
|
||||
switch (segmentHeader.getSegmentType())
|
||||
{
|
||||
case JBIG2SegmentType::SymbolDictionary:
|
||||
processSymbolDictionary(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::TextRegion:
|
||||
processTextRegion(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::PatternDictionary:
|
||||
processPatternDictionary(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::HalftoneRegion:
|
||||
processHalftoneRegion(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::GenericRegion:
|
||||
processGenericRegion(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::GenericRefinementRegion:
|
||||
processGenericRefinementRegion(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::PageInformation:
|
||||
processPageInformation(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::EndOfPage:
|
||||
processEndOfPage(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::EndOfStripe:
|
||||
processEndOfStripe(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::EndOfFile:
|
||||
processEndOfFile(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::Profiles:
|
||||
processProfiles(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::Tables:
|
||||
processCodeTables(segmentHeader);
|
||||
break;
|
||||
|
||||
case JBIG2SegmentType::Extension:
|
||||
processExtension(segmentHeader);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 invalid segment type %1.").arg(static_cast<uint32_t>(segmentHeader.getSegmentType())));
|
||||
}
|
||||
|
||||
// Make sure, that all data are processed by segment header. Positive offset means,
|
||||
// that we did not read all the data bytes. Negative offset means, that we read more
|
||||
// bytes in segment handler, that the segment has specified.
|
||||
if (segmentHeader.isSegmentDataLengthDefined())
|
||||
{
|
||||
const int64_t offset = static_cast<int64_t>(segmentDataStartPosition) + static_cast<int64_t>(segmentHeader.getSegmentDataLength()) - static_cast<int64_t>(m_reader.getPosition());
|
||||
if (offset > 0)
|
||||
{
|
||||
m_errorReporter->reportRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("JBIG2 bad segment data - handler doesn't process all segment data - %1 bytes left.").arg(offset));
|
||||
}
|
||||
else if (offset < 0)
|
||||
{
|
||||
// This is fatal error, we have read data, which doesn't belong to this segment
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 bad segment data - handler reads %1 bytes past segment end.").arg(-offset));
|
||||
}
|
||||
|
||||
// Always seek to the right position
|
||||
m_reader.seek(segmentDataStartPosition + segmentHeader.getSegmentDataLength());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PDFJBIG2Decoder::processExtension(const PDFJBIG2SegmentHeader& header)
|
||||
{
|
||||
// We will read the extension header, and check "Necessary bit"
|
||||
const uint32_t extensionHeader = m_reader.read(32);
|
||||
if (extensionHeader & 0x8000000)
|
||||
{
|
||||
const uint32_t extensionCode = extensionHeader & 0x3FFFFFFF;
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 unknown extension %1 necessary for decoding the image.").arg(extensionCode));
|
||||
}
|
||||
|
||||
if (header.isSegmentDataLengthDefined())
|
||||
{
|
||||
m_reader.skipBytes(header.getSegmentDataLength() - 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFException(PDFTranslationContext::tr("JBIG2 segment with unknown extension has not defined length."));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFRenderErrorReporter;
|
||||
|
||||
/// Arithmetic decoder state for JBIG2 data streams. It contains state for context,
|
||||
/// state is stored as 8-bit value, where only 7 bits are used. 6 bits are used
|
||||
|
@ -111,10 +112,102 @@ private:
|
|||
PDFBitReader* m_reader;
|
||||
};
|
||||
|
||||
enum class JBIG2SegmentType : uint32_t
|
||||
{
|
||||
Invalid,
|
||||
SymbolDictionary, ///< See chapter 7.4.2 in specification
|
||||
TextRegion, ///< See chapter 7.4.3 in specification
|
||||
PatternDictionary, ///< See chapter 7.4.4 in specification
|
||||
HalftoneRegion, ///< See chapter 7.4.5 in specification
|
||||
GenericRegion, ///< See chapter 7.4.6 in specification
|
||||
GenericRefinementRegion, ///< See chapter 7.4.7 in specification
|
||||
PageInformation, ///< See chapter 7.4.8 in specification
|
||||
EndOfPage, ///< See chapter 7.4.9 in specification
|
||||
EndOfStripe, ///< See chapter 7.4.10 in specification
|
||||
EndOfFile, ///< See chapter 7.4.11 in specification
|
||||
Profiles, ///< See chapter 7.4.12 in specification
|
||||
Tables, ///< See chapter 7.4.13 in specification
|
||||
Extension ///< See chapter 7.4.14 in specification
|
||||
};
|
||||
|
||||
class PDFJBIG2SegmentHeader
|
||||
{
|
||||
public:
|
||||
explicit inline PDFJBIG2SegmentHeader() = default;
|
||||
|
||||
/// Returns segment type
|
||||
inline JBIG2SegmentType getSegmentType() const { return m_segmentType; }
|
||||
|
||||
/// Returns segment number
|
||||
inline uint32_t getSegmentNumber() const { return m_segmentNumber; }
|
||||
|
||||
/// Returns segment data length (or 0xFFFFFFFF, if length is not defined)
|
||||
/// \sa isSegmentDataLengthDefined
|
||||
inline uint32_t getSegmentDataLength() const { return m_segmentDataLength; }
|
||||
|
||||
/// Returns true, if segment is immediate (direct paint on page's bitmap)
|
||||
inline bool isImmediate() const { return m_immediate; }
|
||||
|
||||
/// Returns true, if segment is lossless
|
||||
inline bool isLossless() const { return m_lossless; }
|
||||
|
||||
/// Returns true, if segmend data length is defined
|
||||
inline bool isSegmentDataLengthDefined() const { return m_segmentDataLength != 0xFFFFFFFF; }
|
||||
|
||||
/// Reads the segment header from the data stream. If error occurs, then
|
||||
/// exception is thrown.
|
||||
static PDFJBIG2SegmentHeader read(PDFBitReader* reader);
|
||||
|
||||
private:
|
||||
uint32_t m_segmentNumber = 0;
|
||||
uint32_t m_pageAssociation = 0;
|
||||
uint32_t m_segmentDataLength = 0;
|
||||
JBIG2SegmentType m_segmentType = JBIG2SegmentType::Invalid;
|
||||
bool m_immediate = false;
|
||||
bool m_lossless = false;
|
||||
std::vector<uint32_t> m_referredSegments;
|
||||
};
|
||||
|
||||
/// Decoder of JBIG2 data streams. Decodes the black/white monochrome image.
|
||||
/// Handles also global segments. Decoder decodes data using the specification
|
||||
/// ISO/IEC 14492:2001, T.88.
|
||||
class PDFJBIG2Decoder
|
||||
{
|
||||
public:
|
||||
PDFJBIG2Decoder();
|
||||
explicit inline PDFJBIG2Decoder(QByteArray data, QByteArray globalData, PDFRenderErrorReporter* errorReporter) :
|
||||
m_data(qMove(data)),
|
||||
m_globalData(qMove(globalData)),
|
||||
m_errorReporter(errorReporter),
|
||||
m_reader(nullptr, 8)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void decode();
|
||||
|
||||
private:
|
||||
/// Processes current data stream (reads all data from the stream, interprets
|
||||
/// them as segments and processes the segments).
|
||||
void processStream();
|
||||
|
||||
void processSymbolDictionary(const PDFJBIG2SegmentHeader& header);
|
||||
void processTextRegion(const PDFJBIG2SegmentHeader& header);
|
||||
void processPatternDictionary(const PDFJBIG2SegmentHeader& header);
|
||||
void processHalftoneRegion(const PDFJBIG2SegmentHeader& header);
|
||||
void processGenericRegion(const PDFJBIG2SegmentHeader& header);
|
||||
void processGenericRefinementRegion(const PDFJBIG2SegmentHeader& header);
|
||||
void processPageInformation(const PDFJBIG2SegmentHeader& header);
|
||||
void processEndOfPage(const PDFJBIG2SegmentHeader& header);
|
||||
void processEndOfStripe(const PDFJBIG2SegmentHeader& header);
|
||||
void processEndOfFile(const PDFJBIG2SegmentHeader& header);
|
||||
void processProfiles(const PDFJBIG2SegmentHeader& header);
|
||||
void processCodeTables(const PDFJBIG2SegmentHeader& header);
|
||||
void processExtension(const PDFJBIG2SegmentHeader& header);
|
||||
|
||||
QByteArray m_data;
|
||||
QByteArray m_globalData;
|
||||
PDFRenderErrorReporter* m_errorReporter;
|
||||
PDFBitReader m_reader;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -78,7 +78,7 @@ PDFBitReader::Value PDFBitReader::look(Value bits) const
|
|||
|
||||
void PDFBitReader::seek(qint64 position)
|
||||
{
|
||||
if (position < m_stream->size())
|
||||
if (position <= m_stream->size())
|
||||
{
|
||||
m_position = position;
|
||||
m_buffer = 0;
|
||||
|
@ -90,6 +90,22 @@ void PDFBitReader::seek(qint64 position)
|
|||
}
|
||||
}
|
||||
|
||||
void PDFBitReader::skipBytes(Value bytes)
|
||||
{
|
||||
// Jakub Melka: if we are lucky, then we just seek to the new position
|
||||
if (m_bitsInBuffer == 0)
|
||||
{
|
||||
seek(m_position + bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Value i = 0; i < bytes; ++i)
|
||||
{
|
||||
read(8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PDFBitReader::alignToBytes()
|
||||
{
|
||||
const Value remainder = m_bitsInBuffer % 8;
|
||||
|
|
|
@ -96,6 +96,12 @@ public:
|
|||
|
||||
explicit PDFBitReader(const QByteArray* stream, Value bitsPerComponent);
|
||||
|
||||
PDFBitReader(const PDFBitReader&) = default;
|
||||
PDFBitReader(PDFBitReader&&) = default;
|
||||
|
||||
PDFBitReader& operator=(const PDFBitReader&) = default;
|
||||
PDFBitReader& operator=(PDFBitReader&&) = default;
|
||||
|
||||
/// Returns maximal value of n-bit unsigned integer.
|
||||
Value max() const { return m_maximalValue; }
|
||||
|
||||
|
@ -116,18 +122,25 @@ public:
|
|||
/// then exception is thrown.
|
||||
void seek(qint64 position);
|
||||
|
||||
/// Skips desired number of bytes
|
||||
void skipBytes(Value bytes);
|
||||
|
||||
/// Seeks data to the byte boundary (number of processed bits is divisible by 8)
|
||||
void alignToBytes();
|
||||
|
||||
/// Returns true, if we are at the end of the data stream (no more data can be read)
|
||||
bool isAtEnd() const;
|
||||
|
||||
/// Returns position in the data stream (byte position, not bit position, so
|
||||
/// result of this function is sometimes inaccurate)
|
||||
int getPosition() const { return m_position; }
|
||||
|
||||
private:
|
||||
const QByteArray* m_stream;
|
||||
int m_position;
|
||||
|
||||
const Value m_bitsPerComponent;
|
||||
const Value m_maximalValue;
|
||||
Value m_bitsPerComponent;
|
||||
Value m_maximalValue;
|
||||
|
||||
Value m_buffer;
|
||||
Value m_bitsInBuffer;
|
||||
|
|
Loading…
Reference in New Issue