Parsing cross-reference streams

This commit is contained in:
Jakub Melka 2019-06-28 18:11:05 +02:00
parent 48f4a24923
commit 8c130ca013
6 changed files with 360 additions and 119 deletions

View File

@ -42,104 +42,33 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const
{ {
const PDFDictionary* dictionary = stream->getDictionary(); return PDFStreamFilterStorage::getDecodedStream(stream, std::bind(&PDFDocument::getObject, this, std::placeholders::_1));
}
// Retrieve filters const PDFDictionary* PDFDocument::getTrailerDictionary() const
PDFObject filters; {
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER)) const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
// Trailer object should be dictionary/stream here. It is verified in the document reader.
Q_ASSERT(trailerDictionary.isDictionary() || trailerDictionary.isStream());
if (trailerDictionary.isDictionary())
{ {
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILTER)); return trailerDictionary.getDictionary();
} }
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER)) else if (trailerDictionary.isStream())
{ {
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILE_FILTER)); return trailerDictionary.getStream()->getDictionary();
} }
// Retrieve filter parameters return nullptr;
PDFObject filterParameters;
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
{
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
}
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
{
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
}
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
if (filters.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
}
else if (filters.isArray())
{
const PDFArray* filterArray = filters.getArray();
const size_t filterCount = filterArray->getCount();
for (size_t i = 0; i < filterCount; ++i)
{
const PDFObject& object = getObject(filterArray->getItem(i));
if (object.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
}
else
{
return QByteArray();
}
}
}
else if (!filters.isNull())
{
return QByteArray();
}
if (filterParameters.isArray())
{
const PDFArray* filterParameterArray = filterParameters.getArray();
const size_t filterParameterCount = filterParameterArray->getCount();
for (size_t i = 0; i < filterParameterCount; ++i)
{
const PDFObject& object = getObject(filterParameterArray->getItem(i));
filterParameterObjects.push_back(object);
}
}
else
{
filterParameterObjects.push_back(filterParameters);
}
filterParameterObjects.resize(filterObjects.size());
std::reverse(filterObjects.begin(), filterObjects.end());
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
QByteArray result = *stream->getContent();
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
{
const PDFStreamFilter* streamFilter = filterObjects[i];
const PDFObject& streamFilterParameters = filterParameterObjects[i];
if (streamFilter)
{
result = streamFilter->apply(result, this, streamFilterParameters);
}
}
return result;
} }
void PDFDocument::init() void PDFDocument::init()
{ {
initInfo(); initInfo();
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary(); const PDFDictionary* dictionary = getTrailerDictionary();
// Trailer object should be dictionary here. It is verified in the document reader.
Q_ASSERT(trailerDictionary.isDictionary());
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
Q_ASSERT(dictionary); Q_ASSERT(dictionary);
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this); m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
@ -147,12 +76,8 @@ void PDFDocument::init()
void PDFDocument::initInfo() void PDFDocument::initInfo()
{ {
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
// Trailer object should be dictionary here. It is verified in the document reader. // Trailer object should be dictionary here. It is verified in the document reader.
Q_ASSERT(trailerDictionary.isDictionary()); const PDFDictionary* dictionary = getTrailerDictionary();
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
Q_ASSERT(dictionary); Q_ASSERT(dictionary);
if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY)) if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY))

View File

@ -317,6 +317,9 @@ public:
/// \param stream Stream to be decoded /// \param stream Stream to be decoded
QByteArray getDecodedStream(const PDFStream* stream) const; QByteArray getDecodedStream(const PDFStream* stream) const;
/// Returns the trailer dictionary
const PDFDictionary* getTrailerDictionary() const;
private: private:
friend class PDFDocumentReader; friend class PDFDocumentReader;

View File

@ -796,7 +796,7 @@ PDFObject PDFParser::getObject()
error(tr("Stream length is not specified.")); error(tr("Stream length is not specified."));
} }
PDFObject lengthObject = m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH)); PDFObject lengthObject = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH)) : dictionary->get(PDF_STREAM_DICT_LENGTH);
if (!lengthObject.isInt()) if (!lengthObject.isInt())
{ {
error(tr("Bad value of stream length. It should be an integer number.")); error(tr("Bad value of stream length. It should be an integer number."));
@ -817,7 +817,7 @@ PDFObject PDFParser::getObject()
// from the external file. // from the external file.
if (dictionary->hasKey(PDF_STREAM_DICT_FILE_SPECIFICATION)) if (dictionary->hasKey(PDF_STREAM_DICT_FILE_SPECIFICATION))
{ {
PDFObject fileName = m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION)); PDFObject fileName = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION)) : dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION);
if (!fileName.isString()) if (!fileName.isString())
{ {

View File

@ -16,8 +16,8 @@
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>. // along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfstreamfilters.h" #include "pdfstreamfilters.h"
#include "pdfdocument.h"
#include "pdfexception.h" #include "pdfexception.h"
#include "pdfconstants.h"
#include "pdfparser.h" #include "pdfparser.h"
#include <QtEndian> #include <QtEndian>
@ -25,9 +25,9 @@
namespace pdf namespace pdf
{ {
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
{ {
Q_UNUSED(document); Q_UNUSED(objectFetcher);
Q_UNUSED(parameters); Q_UNUSED(parameters);
const int indexOfEnd = data.indexOf('>'); const int indexOfEnd = data.indexOf('>');
@ -50,9 +50,9 @@ QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocum
return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size)); return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size));
} }
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
{ {
Q_UNUSED(document); Q_UNUSED(objectFetcher);
Q_UNUSED(parameters); Q_UNUSED(parameters);
const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData()); const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData());
@ -333,19 +333,28 @@ uint32_t PDFLzwStreamDecoder::getCode()
return code; return code;
} }
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
{ {
uint32_t early = 1; uint32_t early = 1;
const PDFObject& dereferencedParameters = document->getObject(parameters); const PDFObject& dereferencedParameters = objectFetcher(parameters);
if (dereferencedParameters.isDictionary()) if (dereferencedParameters.isDictionary())
{ {
const PDFDictionary* dictionary = dereferencedParameters.getDictionary(); const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
PDFDocumentDataLoaderDecorator loader(document); PDFInteger predictor = 1;
early = loader.readInteger(dictionary->get("EarlyChange"), 1); const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
if (predictorObject.isInt())
{
predictor = predictorObject.getInteger();
}
const PDFObject& earlyChangeObject = objectFetcher(dictionary->get("EarlyChange"));
if (earlyChangeObject.isInt())
{
early = earlyChangeObject.getInteger();
}
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1);
if (predictor != 1) if (predictor != 1)
{ {
// TODO: Implement Predictor algorithm // TODO: Implement Predictor algorithm
@ -357,15 +366,19 @@ QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument*
return decoder.decompress(); return decoder.decompress();
} }
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
{ {
const PDFObject& dereferencedParameters = document->getObject(parameters); const PDFObject& dereferencedParameters = objectFetcher(parameters);
if (dereferencedParameters.isDictionary()) if (dereferencedParameters.isDictionary())
{ {
const PDFDictionary* dictionary = dereferencedParameters.getDictionary(); const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
PDFDocumentDataLoaderDecorator loader(document); PDFInteger predictor = 1;
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1); const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
if (predictorObject.isInt())
{
predictor = predictorObject.getInteger();
}
if (predictor != 1) if (predictor != 1)
{ {
@ -385,9 +398,9 @@ QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument
return qUncompress(dataToUncompress); return qUncompress(dataToUncompress);
} }
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
{ {
Q_UNUSED(document); Q_UNUSED(objectFetcher);
Q_UNUSED(parameters); Q_UNUSED(parameters);
QByteArray result; QByteArray result;
@ -439,6 +452,101 @@ const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filte
return nullptr; return nullptr;
} }
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
{
const PDFDictionary* dictionary = stream->getDictionary();
// Retrieve filters
PDFObject filters;
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
{
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILTER));
}
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
{
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
}
// Retrieve filter parameters
PDFObject filterParameters;
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
{
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
}
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
{
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
}
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
if (filters.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
}
else if (filters.isArray())
{
const PDFArray* filterArray = filters.getArray();
const size_t filterCount = filterArray->getCount();
for (size_t i = 0; i < filterCount; ++i)
{
const PDFObject& object = objectFetcher(filterArray->getItem(i));
if (object.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
}
else
{
return QByteArray();
}
}
}
else if (!filters.isNull())
{
return QByteArray();
}
if (filterParameters.isArray())
{
const PDFArray* filterParameterArray = filterParameters.getArray();
const size_t filterParameterCount = filterParameterArray->getCount();
for (size_t i = 0; i < filterParameterCount; ++i)
{
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
filterParameterObjects.push_back(object);
}
}
else
{
filterParameterObjects.push_back(filterParameters);
}
filterParameterObjects.resize(filterObjects.size());
std::reverse(filterObjects.begin(), filterObjects.end());
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
QByteArray result = *stream->getContent();
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
{
const PDFStreamFilter* streamFilter = filterObjects[i];
const PDFObject& streamFilterParameters = filterParameterObjects[i];
if (streamFilter)
{
result = streamFilter->apply(result, objectFetcher, streamFilterParameters);
}
}
return result;
}
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream)
{
return getDecodedStream(stream, [](const PDFObject& object) -> const PDFObject& { return object; });
}
PDFStreamFilterStorage::PDFStreamFilterStorage() PDFStreamFilterStorage::PDFStreamFilterStorage()
{ {
// Initialize map with the filters // Initialize map with the filters

View File

@ -23,12 +23,14 @@
#include <QByteArray> #include <QByteArray>
#include <memory> #include <memory>
#include <functional>
namespace pdf namespace pdf
{ {
class PDFDocument;
class PDFStreamFilter; class PDFStreamFilter;
using PDFObjectFetcher = std::function<const PDFObject&(const PDFObject&)>;
/// Storage for stream filters. Can retrieve stream filters by name. Using singleton /// Storage for stream filters. Can retrieve stream filters by name. Using singleton
/// design pattern. Use static methods to retrieve filters. /// design pattern. Use static methods to retrieve filters.
class PDFStreamFilterStorage class PDFStreamFilterStorage
@ -39,6 +41,16 @@ public:
/// \param filterName Name of the filter to be retrieved. /// \param filterName Name of the filter to be retrieved.
static const PDFStreamFilter* getFilter(const QByteArray& filterName); static const PDFStreamFilter* getFilter(const QByteArray& filterName);
/// Returns decoded data from the stream
/// \param stream Stream containing the data
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
static QByteArray getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
/// Returns decoded data from the stream, without object fetching
/// \param stream Stream containing the data
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
static QByteArray getDecodedStream(const PDFStream* stream);
private: private:
explicit PDFStreamFilterStorage(); explicit PDFStreamFilterStorage();
@ -58,7 +70,19 @@ public:
explicit PDFStreamFilter() = default; explicit PDFStreamFilter() = default;
virtual ~PDFStreamFilter() = default; virtual ~PDFStreamFilter() = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const = 0; /// Apply with object fetcher
/// \param data Stream data to be decoded
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
/// \param parameters Stream parameters
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const = 0;
/// Apply without object fetcher - it assumes no references exists in the streams dictionary
/// \param data Stream data to be decoded
/// \param parameters Stream parameters
inline QByteArray apply(const QByteArray& data, const PDFObject& parameters) const
{
return apply(data, [](const PDFObject& object) -> const PDFObject& { return object; }, parameters);
}
}; };
class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter
@ -67,7 +91,7 @@ public:
explicit PDFAsciiHexDecodeFilter() = default; explicit PDFAsciiHexDecodeFilter() = default;
virtual ~PDFAsciiHexDecodeFilter() override = default; virtual ~PDFAsciiHexDecodeFilter() override = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override; virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
}; };
class PDFFORQTLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter class PDFFORQTLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter
@ -76,7 +100,7 @@ public:
explicit PDFAscii85DecodeFilter() = default; explicit PDFAscii85DecodeFilter() = default;
virtual ~PDFAscii85DecodeFilter() override = default; virtual ~PDFAscii85DecodeFilter() override = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override; virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
}; };
class PDFFORQTLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter class PDFFORQTLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter
@ -85,7 +109,7 @@ public:
explicit PDFLzwDecodeFilter() = default; explicit PDFLzwDecodeFilter() = default;
virtual ~PDFLzwDecodeFilter() override = default; virtual ~PDFLzwDecodeFilter() override = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override; virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
}; };
class PDFFORQTLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter class PDFFORQTLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter
@ -94,7 +118,7 @@ public:
explicit PDFFlateDecodeFilter() = default; explicit PDFFlateDecodeFilter() = default;
virtual ~PDFFlateDecodeFilter() override = default; virtual ~PDFFlateDecodeFilter() override = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override; virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
}; };
class PDFFORQTLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter class PDFFORQTLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter
@ -103,7 +127,7 @@ public:
explicit PDFRunLengthDecodeFilter() = default; explicit PDFRunLengthDecodeFilter() = default;
virtual ~PDFRunLengthDecodeFilter() override = default; virtual ~PDFRunLengthDecodeFilter() override = default;
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override; virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
}; };
} // namespace pdf } // namespace pdf

View File

@ -19,6 +19,7 @@
#include "pdfconstants.h" #include "pdfconstants.h"
#include "pdfexception.h" #include "pdfexception.h"
#include "pdfparser.h" #include "pdfparser.h"
#include "pdfstreamfilters.h"
#include <stack> #include <stack>
@ -27,7 +28,7 @@ namespace pdf
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset) void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
{ {
PDFParser parser(byteArray, context, PDFParser::None); PDFParser parser(byteArray, context, PDFParser::AllowStreams);
m_entries.clear(); m_entries.clear();
@ -96,6 +97,11 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
throw PDFParserException(tr("Bad format of reference table entry.")); throw PDFParserException(tr("Bad format of reference table entry."));
} }
if (static_cast<size_t>(objectNumber) >= m_entries.size())
{
throw PDFParserException(tr("Bad format of reference table entry."));
}
Entry entry; Entry entry;
if (occupied) if (occupied)
{ {
@ -137,13 +143,188 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
workSet.push(previousOffset.getInteger()); workSet.push(previousOffset.getInteger());
} }
if (dictionary->hasKey(PDF_XREF_TRAILER_XREFSTM)) const PDFObject& xrefstmObject = dictionary->get(PDF_XREF_TRAILER_XREFSTM);
if (xrefstmObject.isInt())
{ {
throw PDFParserException(tr("Hybrid reference tables not supported.")); workSet.push(xrefstmObject.getInteger());
} }
} }
else else
{ {
// Try to read cross-reference stream
PDFObject crossReferenceStreamObjectNumber = parser.getObject();
PDFObject crossReferenceStreamGeneration = parser.getObject();
if (!crossReferenceStreamObjectNumber.isInt() || !crossReferenceStreamGeneration.isInt())
{
throw PDFParserException(tr("Invalid format of reference table."));
}
if (!parser.fetchCommand(PDF_OBJECT_START_MARK))
{
throw PDFParserException(tr("Invalid format of reference table."));
}
PDFObject crossReferenceObject = parser.getObject();
if (!parser.fetchCommand(PDF_OBJECT_END_MARK))
{
throw PDFParserException(tr("Invalid format of reference table."));
}
if (crossReferenceObject.isStream())
{
const PDFStream* crossReferenceStream = crossReferenceObject.getStream();
const PDFDictionary* crossReferenceStreamDictionary = crossReferenceStream->getDictionary();
const PDFObject typeObject = crossReferenceStreamDictionary->get("Type");
if (typeObject.isName() && typeObject.getString() == "XRef")
{
PDFObject sizeObject = crossReferenceStreamDictionary->get("Size");
if (!sizeObject.isInt() || sizeObject.getInteger() < 0)
{
throw PDFParserException(tr("Invalid format of cross-reference stream."));
}
const PDFInteger desiredSize = sizeObject.getInteger();
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
{
m_entries.resize(desiredSize);
}
PDFObject prevObject = crossReferenceStreamDictionary->get("Prev");
if (prevObject.isInt())
{
workSet.push(prevObject.getInteger());
}
// Do not overwrite trailer dictionary, if it was already loaded.
if (m_trailerDictionary.isNull())
{
m_trailerDictionary = crossReferenceObject;
}
auto readIntegerArray = [crossReferenceStreamDictionary](const char* key, auto defaultValues) -> std::vector<PDFInteger>
{
std::vector<PDFInteger> result;
const PDFObject& object = crossReferenceStreamDictionary->get(key);
if (object.isArray())
{
const PDFArray* array = object.getArray();
result.reserve(array->getCount());
for (size_t i = 0, count = array->getCount(); i < count; ++i)
{
const PDFObject& itemObject = array->getItem(i);
if (itemObject.isInt())
{
result.push_back(itemObject.getInteger());
}
else
{
throw PDFParserException(tr("Invalid format of cross-reference stream."));
}
}
}
else
{
result = defaultValues;
}
return result;
};
std::vector<PDFInteger> indexArray = readIntegerArray("Index", std::initializer_list<PDFInteger>{ PDFInteger(0), PDFInteger(desiredSize) });
std::vector<PDFInteger> wArray = readIntegerArray("W", std::vector<PDFInteger>());
if (wArray.size() != 3 || indexArray.empty() || (indexArray.size() % 2 != 0))
{
throw PDFParserException(tr("Invalid format of cross-reference stream."));
}
const int columnTypeBytes = wArray[0];
const int columnObjectNumberOrByteOffsetBytes = wArray[1];
const int columnGenerationNumberOrObjectIndexBytes = wArray[2];
const size_t blockCount = indexArray.size() / 2;
QByteArray data = PDFStreamFilterStorage::getDecodedStream(crossReferenceStream);
QDataStream dataStream(&data, QIODevice::ReadOnly);
dataStream.setByteOrder(QDataStream::BigEndian);
auto readNumber = [&dataStream](int bytes, PDFInteger defaultValue) -> PDFInteger
{
if (bytes)
{
uint64_t value = 0;
while (bytes--)
{
uint8_t byte = 0;
dataStream >> byte;
value = (value << 8) + byte;
// Check, if stream is OK (we doesn't read past the end of the stream,
// data aren't corrupted etc.)
if (dataStream.status() != QDataStream::Ok)
{
throw PDFParserException(tr("Invalid format of cross-reference stream - not enough data in the stream."));
}
}
return static_cast<PDFInteger>(value);
}
return defaultValue;
};
for (size_t i = 0; i < blockCount; ++i)
{
PDFInteger firstObjectNumber = indexArray[2 * i];
PDFInteger count = indexArray[2 * i + 1];
const PDFInteger lastObjectIndex = firstObjectNumber + count - 1;
const PDFInteger desiredSize = lastObjectIndex + 1;
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
{
m_entries.resize(desiredSize);
}
for (PDFInteger objectNumber = firstObjectNumber; objectNumber <= lastObjectIndex; ++ objectNumber)
{
int itemType = readNumber(columnTypeBytes, 1);
int itemObjectNumberOfObjectStreamOrByteOffset = readNumber(columnObjectNumberOrByteOffsetBytes, 0);
int itemGenerationNumberOrObjectIndex = readNumber(columnGenerationNumberOrObjectIndexBytes, 0);
switch (itemType)
{
case 0:
// Free object
break;
case 1:
{
Entry entry;
entry.reference = PDFObjectReference(objectNumber, itemGenerationNumberOrObjectIndex);
entry.offset = itemObjectNumberOfObjectStreamOrByteOffset;
entry.type = EntryType::Occupied;
if (m_entries[objectNumber].type == EntryType::Free)
{
m_entries[objectNumber] = std::move(entry);
}
break;
}
case 2:
default:
// According to the specification, treat this object as null object
break;
}
}
}
}
continue;
}
throw PDFParserException(tr("Invalid format of reference table.")); throw PDFParserException(tr("Invalid format of reference table."));
} }
} }