mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-02-05 20:03:40 +01:00
Parsing cross-reference streams
This commit is contained in:
parent
48f4a24923
commit
8c130ca013
@ -42,104 +42,33 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
|
||||
|
||||
QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const
|
||||
{
|
||||
const PDFDictionary* dictionary = stream->getDictionary();
|
||||
return PDFStreamFilterStorage::getDecodedStream(stream, std::bind(&PDFDocument::getObject, this, std::placeholders::_1));
|
||||
}
|
||||
|
||||
// Retrieve filters
|
||||
PDFObject filters;
|
||||
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
|
||||
const PDFDictionary* PDFDocument::getTrailerDictionary() const
|
||||
{
|
||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||
|
||||
// Trailer object should be dictionary/stream here. It is verified in the document reader.
|
||||
Q_ASSERT(trailerDictionary.isDictionary() || trailerDictionary.isStream());
|
||||
|
||||
if (trailerDictionary.isDictionary())
|
||||
{
|
||||
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILTER));
|
||||
return trailerDictionary.getDictionary();
|
||||
}
|
||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
|
||||
else if (trailerDictionary.isStream())
|
||||
{
|
||||
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
|
||||
return trailerDictionary.getStream()->getDictionary();
|
||||
}
|
||||
|
||||
// Retrieve filter parameters
|
||||
PDFObject filterParameters;
|
||||
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
|
||||
{
|
||||
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
|
||||
}
|
||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
|
||||
{
|
||||
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
|
||||
}
|
||||
|
||||
std::vector<const PDFStreamFilter*> filterObjects;
|
||||
std::vector<PDFObject> filterParameterObjects;
|
||||
|
||||
if (filters.isName())
|
||||
{
|
||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
|
||||
}
|
||||
else if (filters.isArray())
|
||||
{
|
||||
const PDFArray* filterArray = filters.getArray();
|
||||
const size_t filterCount = filterArray->getCount();
|
||||
for (size_t i = 0; i < filterCount; ++i)
|
||||
{
|
||||
const PDFObject& object = getObject(filterArray->getItem(i));
|
||||
if (object.isName())
|
||||
{
|
||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
|
||||
}
|
||||
else
|
||||
{
|
||||
return QByteArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!filters.isNull())
|
||||
{
|
||||
return QByteArray();
|
||||
}
|
||||
|
||||
if (filterParameters.isArray())
|
||||
{
|
||||
const PDFArray* filterParameterArray = filterParameters.getArray();
|
||||
const size_t filterParameterCount = filterParameterArray->getCount();
|
||||
for (size_t i = 0; i < filterParameterCount; ++i)
|
||||
{
|
||||
const PDFObject& object = getObject(filterParameterArray->getItem(i));
|
||||
filterParameterObjects.push_back(object);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterParameterObjects.push_back(filterParameters);
|
||||
}
|
||||
|
||||
filterParameterObjects.resize(filterObjects.size());
|
||||
std::reverse(filterObjects.begin(), filterObjects.end());
|
||||
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
|
||||
|
||||
QByteArray result = *stream->getContent();
|
||||
|
||||
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
|
||||
{
|
||||
const PDFStreamFilter* streamFilter = filterObjects[i];
|
||||
const PDFObject& streamFilterParameters = filterParameterObjects[i];
|
||||
|
||||
if (streamFilter)
|
||||
{
|
||||
result = streamFilter->apply(result, this, streamFilterParameters);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void PDFDocument::init()
|
||||
{
|
||||
initInfo();
|
||||
|
||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||
|
||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||
Q_ASSERT(trailerDictionary.isDictionary());
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
const PDFDictionary* dictionary = getTrailerDictionary();
|
||||
Q_ASSERT(dictionary);
|
||||
|
||||
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
|
||||
@ -147,12 +76,8 @@ void PDFDocument::init()
|
||||
|
||||
void PDFDocument::initInfo()
|
||||
{
|
||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||
|
||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||
Q_ASSERT(trailerDictionary.isDictionary());
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
const PDFDictionary* dictionary = getTrailerDictionary();
|
||||
Q_ASSERT(dictionary);
|
||||
|
||||
if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY))
|
||||
|
@ -317,6 +317,9 @@ public:
|
||||
/// \param stream Stream to be decoded
|
||||
QByteArray getDecodedStream(const PDFStream* stream) const;
|
||||
|
||||
/// Returns the trailer dictionary
|
||||
const PDFDictionary* getTrailerDictionary() const;
|
||||
|
||||
private:
|
||||
friend class PDFDocumentReader;
|
||||
|
||||
|
@ -796,7 +796,7 @@ PDFObject PDFParser::getObject()
|
||||
error(tr("Stream length is not specified."));
|
||||
}
|
||||
|
||||
PDFObject lengthObject = m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH));
|
||||
PDFObject lengthObject = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH)) : dictionary->get(PDF_STREAM_DICT_LENGTH);
|
||||
if (!lengthObject.isInt())
|
||||
{
|
||||
error(tr("Bad value of stream length. It should be an integer number."));
|
||||
@ -817,7 +817,7 @@ PDFObject PDFParser::getObject()
|
||||
// from the external file.
|
||||
if (dictionary->hasKey(PDF_STREAM_DICT_FILE_SPECIFICATION))
|
||||
{
|
||||
PDFObject fileName = m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION));
|
||||
PDFObject fileName = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION)) : dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION);
|
||||
|
||||
if (!fileName.isString())
|
||||
{
|
||||
|
@ -16,8 +16,8 @@
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfstreamfilters.h"
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfexception.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfparser.h"
|
||||
|
||||
#include <QtEndian>
|
||||
@ -25,9 +25,9 @@
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
||||
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||
{
|
||||
Q_UNUSED(document);
|
||||
Q_UNUSED(objectFetcher);
|
||||
Q_UNUSED(parameters);
|
||||
|
||||
const int indexOfEnd = data.indexOf('>');
|
||||
@ -50,9 +50,9 @@ QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocum
|
||||
return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size));
|
||||
}
|
||||
|
||||
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
||||
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||
{
|
||||
Q_UNUSED(document);
|
||||
Q_UNUSED(objectFetcher);
|
||||
Q_UNUSED(parameters);
|
||||
|
||||
const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData());
|
||||
@ -333,19 +333,28 @@ uint32_t PDFLzwStreamDecoder::getCode()
|
||||
return code;
|
||||
}
|
||||
|
||||
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
||||
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||
{
|
||||
uint32_t early = 1;
|
||||
|
||||
const PDFObject& dereferencedParameters = document->getObject(parameters);
|
||||
const PDFObject& dereferencedParameters = objectFetcher(parameters);
|
||||
if (dereferencedParameters.isDictionary())
|
||||
{
|
||||
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
||||
|
||||
PDFDocumentDataLoaderDecorator loader(document);
|
||||
early = loader.readInteger(dictionary->get("EarlyChange"), 1);
|
||||
PDFInteger predictor = 1;
|
||||
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
|
||||
if (predictorObject.isInt())
|
||||
{
|
||||
predictor = predictorObject.getInteger();
|
||||
}
|
||||
|
||||
const PDFObject& earlyChangeObject = objectFetcher(dictionary->get("EarlyChange"));
|
||||
if (earlyChangeObject.isInt())
|
||||
{
|
||||
early = earlyChangeObject.getInteger();
|
||||
}
|
||||
|
||||
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1);
|
||||
if (predictor != 1)
|
||||
{
|
||||
// TODO: Implement Predictor algorithm
|
||||
@ -357,15 +366,19 @@ QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument*
|
||||
return decoder.decompress();
|
||||
}
|
||||
|
||||
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
||||
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||
{
|
||||
const PDFObject& dereferencedParameters = document->getObject(parameters);
|
||||
const PDFObject& dereferencedParameters = objectFetcher(parameters);
|
||||
if (dereferencedParameters.isDictionary())
|
||||
{
|
||||
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
||||
|
||||
PDFDocumentDataLoaderDecorator loader(document);
|
||||
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1);
|
||||
PDFInteger predictor = 1;
|
||||
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
|
||||
if (predictorObject.isInt())
|
||||
{
|
||||
predictor = predictorObject.getInteger();
|
||||
}
|
||||
|
||||
if (predictor != 1)
|
||||
{
|
||||
@ -385,9 +398,9 @@ QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument
|
||||
return qUncompress(dataToUncompress);
|
||||
}
|
||||
|
||||
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
||||
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||
{
|
||||
Q_UNUSED(document);
|
||||
Q_UNUSED(objectFetcher);
|
||||
Q_UNUSED(parameters);
|
||||
|
||||
QByteArray result;
|
||||
@ -439,6 +452,101 @@ const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filte
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
|
||||
{
|
||||
const PDFDictionary* dictionary = stream->getDictionary();
|
||||
|
||||
// Retrieve filters
|
||||
PDFObject filters;
|
||||
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
|
||||
{
|
||||
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILTER));
|
||||
}
|
||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
|
||||
{
|
||||
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
|
||||
}
|
||||
|
||||
// Retrieve filter parameters
|
||||
PDFObject filterParameters;
|
||||
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
|
||||
{
|
||||
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
|
||||
}
|
||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
|
||||
{
|
||||
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
|
||||
}
|
||||
|
||||
std::vector<const PDFStreamFilter*> filterObjects;
|
||||
std::vector<PDFObject> filterParameterObjects;
|
||||
|
||||
if (filters.isName())
|
||||
{
|
||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
|
||||
}
|
||||
else if (filters.isArray())
|
||||
{
|
||||
const PDFArray* filterArray = filters.getArray();
|
||||
const size_t filterCount = filterArray->getCount();
|
||||
for (size_t i = 0; i < filterCount; ++i)
|
||||
{
|
||||
const PDFObject& object = objectFetcher(filterArray->getItem(i));
|
||||
if (object.isName())
|
||||
{
|
||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
|
||||
}
|
||||
else
|
||||
{
|
||||
return QByteArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!filters.isNull())
|
||||
{
|
||||
return QByteArray();
|
||||
}
|
||||
|
||||
if (filterParameters.isArray())
|
||||
{
|
||||
const PDFArray* filterParameterArray = filterParameters.getArray();
|
||||
const size_t filterParameterCount = filterParameterArray->getCount();
|
||||
for (size_t i = 0; i < filterParameterCount; ++i)
|
||||
{
|
||||
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
|
||||
filterParameterObjects.push_back(object);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterParameterObjects.push_back(filterParameters);
|
||||
}
|
||||
|
||||
filterParameterObjects.resize(filterObjects.size());
|
||||
std::reverse(filterObjects.begin(), filterObjects.end());
|
||||
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
|
||||
|
||||
QByteArray result = *stream->getContent();
|
||||
|
||||
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
|
||||
{
|
||||
const PDFStreamFilter* streamFilter = filterObjects[i];
|
||||
const PDFObject& streamFilterParameters = filterParameterObjects[i];
|
||||
|
||||
if (streamFilter)
|
||||
{
|
||||
result = streamFilter->apply(result, objectFetcher, streamFilterParameters);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream)
|
||||
{
|
||||
return getDecodedStream(stream, [](const PDFObject& object) -> const PDFObject& { return object; });
|
||||
}
|
||||
|
||||
PDFStreamFilterStorage::PDFStreamFilterStorage()
|
||||
{
|
||||
// Initialize map with the filters
|
||||
|
@ -23,12 +23,14 @@
|
||||
#include <QByteArray>
|
||||
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDocument;
|
||||
class PDFStreamFilter;
|
||||
|
||||
using PDFObjectFetcher = std::function<const PDFObject&(const PDFObject&)>;
|
||||
|
||||
/// Storage for stream filters. Can retrieve stream filters by name. Using singleton
|
||||
/// design pattern. Use static methods to retrieve filters.
|
||||
class PDFStreamFilterStorage
|
||||
@ -39,6 +41,16 @@ public:
|
||||
/// \param filterName Name of the filter to be retrieved.
|
||||
static const PDFStreamFilter* getFilter(const QByteArray& filterName);
|
||||
|
||||
/// Returns decoded data from the stream
|
||||
/// \param stream Stream containing the data
|
||||
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||
static QByteArray getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
|
||||
|
||||
/// Returns decoded data from the stream, without object fetching
|
||||
/// \param stream Stream containing the data
|
||||
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||
static QByteArray getDecodedStream(const PDFStream* stream);
|
||||
|
||||
private:
|
||||
explicit PDFStreamFilterStorage();
|
||||
|
||||
@ -58,7 +70,19 @@ public:
|
||||
explicit PDFStreamFilter() = default;
|
||||
virtual ~PDFStreamFilter() = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const = 0;
|
||||
/// Apply with object fetcher
|
||||
/// \param data Stream data to be decoded
|
||||
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||
/// \param parameters Stream parameters
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const = 0;
|
||||
|
||||
/// Apply without object fetcher - it assumes no references exists in the streams dictionary
|
||||
/// \param data Stream data to be decoded
|
||||
/// \param parameters Stream parameters
|
||||
inline QByteArray apply(const QByteArray& data, const PDFObject& parameters) const
|
||||
{
|
||||
return apply(data, [](const PDFObject& object) -> const PDFObject& { return object; }, parameters);
|
||||
}
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter
|
||||
@ -67,7 +91,7 @@ public:
|
||||
explicit PDFAsciiHexDecodeFilter() = default;
|
||||
virtual ~PDFAsciiHexDecodeFilter() override = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter
|
||||
@ -76,7 +100,7 @@ public:
|
||||
explicit PDFAscii85DecodeFilter() = default;
|
||||
virtual ~PDFAscii85DecodeFilter() override = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter
|
||||
@ -85,7 +109,7 @@ public:
|
||||
explicit PDFLzwDecodeFilter() = default;
|
||||
virtual ~PDFLzwDecodeFilter() override = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter
|
||||
@ -94,7 +118,7 @@ public:
|
||||
explicit PDFFlateDecodeFilter() = default;
|
||||
virtual ~PDFFlateDecodeFilter() override = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter
|
||||
@ -103,7 +127,7 @@ public:
|
||||
explicit PDFRunLengthDecodeFilter() = default;
|
||||
virtual ~PDFRunLengthDecodeFilter() override = default;
|
||||
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
||||
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfexception.h"
|
||||
#include "pdfparser.h"
|
||||
#include "pdfstreamfilters.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
@ -27,7 +28,7 @@ namespace pdf
|
||||
|
||||
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
|
||||
{
|
||||
PDFParser parser(byteArray, context, PDFParser::None);
|
||||
PDFParser parser(byteArray, context, PDFParser::AllowStreams);
|
||||
|
||||
m_entries.clear();
|
||||
|
||||
@ -96,6 +97,11 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
if (static_cast<size_t>(objectNumber) >= m_entries.size())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
Entry entry;
|
||||
if (occupied)
|
||||
{
|
||||
@ -137,13 +143,188 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
||||
workSet.push(previousOffset.getInteger());
|
||||
}
|
||||
|
||||
if (dictionary->hasKey(PDF_XREF_TRAILER_XREFSTM))
|
||||
const PDFObject& xrefstmObject = dictionary->get(PDF_XREF_TRAILER_XREFSTM);
|
||||
if (xrefstmObject.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Hybrid reference tables not supported."));
|
||||
workSet.push(xrefstmObject.getInteger());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Try to read cross-reference stream
|
||||
PDFObject crossReferenceStreamObjectNumber = parser.getObject();
|
||||
PDFObject crossReferenceStreamGeneration = parser.getObject();
|
||||
|
||||
if (!crossReferenceStreamObjectNumber.isInt() || !crossReferenceStreamGeneration.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
|
||||
if (!parser.fetchCommand(PDF_OBJECT_START_MARK))
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
|
||||
PDFObject crossReferenceObject = parser.getObject();
|
||||
|
||||
if (!parser.fetchCommand(PDF_OBJECT_END_MARK))
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
|
||||
if (crossReferenceObject.isStream())
|
||||
{
|
||||
const PDFStream* crossReferenceStream = crossReferenceObject.getStream();
|
||||
const PDFDictionary* crossReferenceStreamDictionary = crossReferenceStream->getDictionary();
|
||||
const PDFObject typeObject = crossReferenceStreamDictionary->get("Type");
|
||||
if (typeObject.isName() && typeObject.getString() == "XRef")
|
||||
{
|
||||
PDFObject sizeObject = crossReferenceStreamDictionary->get("Size");
|
||||
if (!sizeObject.isInt() || sizeObject.getInteger() < 0)
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||
}
|
||||
|
||||
const PDFInteger desiredSize = sizeObject.getInteger();
|
||||
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||
{
|
||||
m_entries.resize(desiredSize);
|
||||
}
|
||||
|
||||
PDFObject prevObject = crossReferenceStreamDictionary->get("Prev");
|
||||
if (prevObject.isInt())
|
||||
{
|
||||
workSet.push(prevObject.getInteger());
|
||||
}
|
||||
|
||||
// Do not overwrite trailer dictionary, if it was already loaded.
|
||||
if (m_trailerDictionary.isNull())
|
||||
{
|
||||
m_trailerDictionary = crossReferenceObject;
|
||||
}
|
||||
|
||||
auto readIntegerArray = [crossReferenceStreamDictionary](const char* key, auto defaultValues) -> std::vector<PDFInteger>
|
||||
{
|
||||
std::vector<PDFInteger> result;
|
||||
|
||||
const PDFObject& object = crossReferenceStreamDictionary->get(key);
|
||||
if (object.isArray())
|
||||
{
|
||||
const PDFArray* array = object.getArray();
|
||||
result.reserve(array->getCount());
|
||||
|
||||
for (size_t i = 0, count = array->getCount(); i < count; ++i)
|
||||
{
|
||||
const PDFObject& itemObject = array->getItem(i);
|
||||
if (itemObject.isInt())
|
||||
{
|
||||
result.push_back(itemObject.getInteger());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result = defaultValues;
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
std::vector<PDFInteger> indexArray = readIntegerArray("Index", std::initializer_list<PDFInteger>{ PDFInteger(0), PDFInteger(desiredSize) });
|
||||
std::vector<PDFInteger> wArray = readIntegerArray("W", std::vector<PDFInteger>());
|
||||
|
||||
if (wArray.size() != 3 || indexArray.empty() || (indexArray.size() % 2 != 0))
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||
}
|
||||
|
||||
const int columnTypeBytes = wArray[0];
|
||||
const int columnObjectNumberOrByteOffsetBytes = wArray[1];
|
||||
const int columnGenerationNumberOrObjectIndexBytes = wArray[2];
|
||||
const size_t blockCount = indexArray.size() / 2;
|
||||
|
||||
QByteArray data = PDFStreamFilterStorage::getDecodedStream(crossReferenceStream);
|
||||
QDataStream dataStream(&data, QIODevice::ReadOnly);
|
||||
dataStream.setByteOrder(QDataStream::BigEndian);
|
||||
|
||||
auto readNumber = [&dataStream](int bytes, PDFInteger defaultValue) -> PDFInteger
|
||||
{
|
||||
if (bytes)
|
||||
{
|
||||
uint64_t value = 0;
|
||||
|
||||
while (bytes--)
|
||||
{
|
||||
uint8_t byte = 0;
|
||||
dataStream >> byte;
|
||||
value = (value << 8) + byte;
|
||||
|
||||
// Check, if stream is OK (we doesn't read past the end of the stream,
|
||||
// data aren't corrupted etc.)
|
||||
if (dataStream.status() != QDataStream::Ok)
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of cross-reference stream - not enough data in the stream."));
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<PDFInteger>(value);
|
||||
}
|
||||
return defaultValue;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < blockCount; ++i)
|
||||
{
|
||||
PDFInteger firstObjectNumber = indexArray[2 * i];
|
||||
PDFInteger count = indexArray[2 * i + 1];
|
||||
|
||||
const PDFInteger lastObjectIndex = firstObjectNumber + count - 1;
|
||||
const PDFInteger desiredSize = lastObjectIndex + 1;
|
||||
|
||||
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||
{
|
||||
m_entries.resize(desiredSize);
|
||||
}
|
||||
|
||||
for (PDFInteger objectNumber = firstObjectNumber; objectNumber <= lastObjectIndex; ++ objectNumber)
|
||||
{
|
||||
int itemType = readNumber(columnTypeBytes, 1);
|
||||
int itemObjectNumberOfObjectStreamOrByteOffset = readNumber(columnObjectNumberOrByteOffsetBytes, 0);
|
||||
int itemGenerationNumberOrObjectIndex = readNumber(columnGenerationNumberOrObjectIndexBytes, 0);
|
||||
|
||||
switch (itemType)
|
||||
{
|
||||
case 0:
|
||||
// Free object
|
||||
break;
|
||||
case 1:
|
||||
{
|
||||
Entry entry;
|
||||
entry.reference = PDFObjectReference(objectNumber, itemGenerationNumberOrObjectIndex);
|
||||
entry.offset = itemObjectNumberOfObjectStreamOrByteOffset;
|
||||
entry.type = EntryType::Occupied;
|
||||
|
||||
if (m_entries[objectNumber].type == EntryType::Free)
|
||||
{
|
||||
m_entries[objectNumber] = std::move(entry);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
default:
|
||||
// According to the specification, treat this object as null object
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user