mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-02-11 01:00:35 +01:00
Parsing cross-reference streams
This commit is contained in:
parent
48f4a24923
commit
8c130ca013
@ -42,104 +42,33 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
|
|||||||
|
|
||||||
QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const
|
QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const
|
||||||
{
|
{
|
||||||
const PDFDictionary* dictionary = stream->getDictionary();
|
return PDFStreamFilterStorage::getDecodedStream(stream, std::bind(&PDFDocument::getObject, this, std::placeholders::_1));
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieve filters
|
const PDFDictionary* PDFDocument::getTrailerDictionary() const
|
||||||
PDFObject filters;
|
{
|
||||||
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
|
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||||
|
|
||||||
|
// Trailer object should be dictionary/stream here. It is verified in the document reader.
|
||||||
|
Q_ASSERT(trailerDictionary.isDictionary() || trailerDictionary.isStream());
|
||||||
|
|
||||||
|
if (trailerDictionary.isDictionary())
|
||||||
{
|
{
|
||||||
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILTER));
|
return trailerDictionary.getDictionary();
|
||||||
}
|
}
|
||||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
|
else if (trailerDictionary.isStream())
|
||||||
{
|
{
|
||||||
filters = getObject(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
|
return trailerDictionary.getStream()->getDictionary();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve filter parameters
|
return nullptr;
|
||||||
PDFObject filterParameters;
|
|
||||||
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
|
|
||||||
{
|
|
||||||
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
|
|
||||||
}
|
|
||||||
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
|
|
||||||
{
|
|
||||||
filterParameters = getObject(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<const PDFStreamFilter*> filterObjects;
|
|
||||||
std::vector<PDFObject> filterParameterObjects;
|
|
||||||
|
|
||||||
if (filters.isName())
|
|
||||||
{
|
|
||||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
|
|
||||||
}
|
|
||||||
else if (filters.isArray())
|
|
||||||
{
|
|
||||||
const PDFArray* filterArray = filters.getArray();
|
|
||||||
const size_t filterCount = filterArray->getCount();
|
|
||||||
for (size_t i = 0; i < filterCount; ++i)
|
|
||||||
{
|
|
||||||
const PDFObject& object = getObject(filterArray->getItem(i));
|
|
||||||
if (object.isName())
|
|
||||||
{
|
|
||||||
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return QByteArray();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (!filters.isNull())
|
|
||||||
{
|
|
||||||
return QByteArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filterParameters.isArray())
|
|
||||||
{
|
|
||||||
const PDFArray* filterParameterArray = filterParameters.getArray();
|
|
||||||
const size_t filterParameterCount = filterParameterArray->getCount();
|
|
||||||
for (size_t i = 0; i < filterParameterCount; ++i)
|
|
||||||
{
|
|
||||||
const PDFObject& object = getObject(filterParameterArray->getItem(i));
|
|
||||||
filterParameterObjects.push_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
filterParameterObjects.push_back(filterParameters);
|
|
||||||
}
|
|
||||||
|
|
||||||
filterParameterObjects.resize(filterObjects.size());
|
|
||||||
std::reverse(filterObjects.begin(), filterObjects.end());
|
|
||||||
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
|
|
||||||
|
|
||||||
QByteArray result = *stream->getContent();
|
|
||||||
|
|
||||||
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
|
|
||||||
{
|
|
||||||
const PDFStreamFilter* streamFilter = filterObjects[i];
|
|
||||||
const PDFObject& streamFilterParameters = filterParameterObjects[i];
|
|
||||||
|
|
||||||
if (streamFilter)
|
|
||||||
{
|
|
||||||
result = streamFilter->apply(result, this, streamFilterParameters);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PDFDocument::init()
|
void PDFDocument::init()
|
||||||
{
|
{
|
||||||
initInfo();
|
initInfo();
|
||||||
|
|
||||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
const PDFDictionary* dictionary = getTrailerDictionary();
|
||||||
|
|
||||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
|
||||||
Q_ASSERT(trailerDictionary.isDictionary());
|
|
||||||
|
|
||||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
|
||||||
Q_ASSERT(dictionary);
|
Q_ASSERT(dictionary);
|
||||||
|
|
||||||
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
|
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
|
||||||
@ -147,12 +76,8 @@ void PDFDocument::init()
|
|||||||
|
|
||||||
void PDFDocument::initInfo()
|
void PDFDocument::initInfo()
|
||||||
{
|
{
|
||||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
|
||||||
|
|
||||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||||
Q_ASSERT(trailerDictionary.isDictionary());
|
const PDFDictionary* dictionary = getTrailerDictionary();
|
||||||
|
|
||||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
|
||||||
Q_ASSERT(dictionary);
|
Q_ASSERT(dictionary);
|
||||||
|
|
||||||
if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY))
|
if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY))
|
||||||
|
@ -317,6 +317,9 @@ public:
|
|||||||
/// \param stream Stream to be decoded
|
/// \param stream Stream to be decoded
|
||||||
QByteArray getDecodedStream(const PDFStream* stream) const;
|
QByteArray getDecodedStream(const PDFStream* stream) const;
|
||||||
|
|
||||||
|
/// Returns the trailer dictionary
|
||||||
|
const PDFDictionary* getTrailerDictionary() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class PDFDocumentReader;
|
friend class PDFDocumentReader;
|
||||||
|
|
||||||
|
@ -796,7 +796,7 @@ PDFObject PDFParser::getObject()
|
|||||||
error(tr("Stream length is not specified."));
|
error(tr("Stream length is not specified."));
|
||||||
}
|
}
|
||||||
|
|
||||||
PDFObject lengthObject = m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH));
|
PDFObject lengthObject = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_LENGTH)) : dictionary->get(PDF_STREAM_DICT_LENGTH);
|
||||||
if (!lengthObject.isInt())
|
if (!lengthObject.isInt())
|
||||||
{
|
{
|
||||||
error(tr("Bad value of stream length. It should be an integer number."));
|
error(tr("Bad value of stream length. It should be an integer number."));
|
||||||
@ -817,7 +817,7 @@ PDFObject PDFParser::getObject()
|
|||||||
// from the external file.
|
// from the external file.
|
||||||
if (dictionary->hasKey(PDF_STREAM_DICT_FILE_SPECIFICATION))
|
if (dictionary->hasKey(PDF_STREAM_DICT_FILE_SPECIFICATION))
|
||||||
{
|
{
|
||||||
PDFObject fileName = m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION));
|
PDFObject fileName = m_context ? m_context->getObject(dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION)) : dictionary->get(PDF_STREAM_DICT_FILE_SPECIFICATION);
|
||||||
|
|
||||||
if (!fileName.isString())
|
if (!fileName.isString())
|
||||||
{
|
{
|
||||||
|
@ -16,8 +16,8 @@
|
|||||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#include "pdfstreamfilters.h"
|
#include "pdfstreamfilters.h"
|
||||||
#include "pdfdocument.h"
|
|
||||||
#include "pdfexception.h"
|
#include "pdfexception.h"
|
||||||
|
#include "pdfconstants.h"
|
||||||
#include "pdfparser.h"
|
#include "pdfparser.h"
|
||||||
|
|
||||||
#include <QtEndian>
|
#include <QtEndian>
|
||||||
@ -25,9 +25,9 @@
|
|||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
|
||||||
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||||
{
|
{
|
||||||
Q_UNUSED(document);
|
Q_UNUSED(objectFetcher);
|
||||||
Q_UNUSED(parameters);
|
Q_UNUSED(parameters);
|
||||||
|
|
||||||
const int indexOfEnd = data.indexOf('>');
|
const int indexOfEnd = data.indexOf('>');
|
||||||
@ -50,9 +50,9 @@ QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data, const PDFDocum
|
|||||||
return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size));
|
return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size));
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||||
{
|
{
|
||||||
Q_UNUSED(document);
|
Q_UNUSED(objectFetcher);
|
||||||
Q_UNUSED(parameters);
|
Q_UNUSED(parameters);
|
||||||
|
|
||||||
const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData());
|
const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData());
|
||||||
@ -333,19 +333,28 @@ uint32_t PDFLzwStreamDecoder::getCode()
|
|||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||||
{
|
{
|
||||||
uint32_t early = 1;
|
uint32_t early = 1;
|
||||||
|
|
||||||
const PDFObject& dereferencedParameters = document->getObject(parameters);
|
const PDFObject& dereferencedParameters = objectFetcher(parameters);
|
||||||
if (dereferencedParameters.isDictionary())
|
if (dereferencedParameters.isDictionary())
|
||||||
{
|
{
|
||||||
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
||||||
|
|
||||||
PDFDocumentDataLoaderDecorator loader(document);
|
PDFInteger predictor = 1;
|
||||||
early = loader.readInteger(dictionary->get("EarlyChange"), 1);
|
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
|
||||||
|
if (predictorObject.isInt())
|
||||||
|
{
|
||||||
|
predictor = predictorObject.getInteger();
|
||||||
|
}
|
||||||
|
|
||||||
|
const PDFObject& earlyChangeObject = objectFetcher(dictionary->get("EarlyChange"));
|
||||||
|
if (earlyChangeObject.isInt())
|
||||||
|
{
|
||||||
|
early = earlyChangeObject.getInteger();
|
||||||
|
}
|
||||||
|
|
||||||
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1);
|
|
||||||
if (predictor != 1)
|
if (predictor != 1)
|
||||||
{
|
{
|
||||||
// TODO: Implement Predictor algorithm
|
// TODO: Implement Predictor algorithm
|
||||||
@ -357,15 +366,19 @@ QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data, const PDFDocument*
|
|||||||
return decoder.decompress();
|
return decoder.decompress();
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||||
{
|
{
|
||||||
const PDFObject& dereferencedParameters = document->getObject(parameters);
|
const PDFObject& dereferencedParameters = objectFetcher(parameters);
|
||||||
if (dereferencedParameters.isDictionary())
|
if (dereferencedParameters.isDictionary())
|
||||||
{
|
{
|
||||||
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
|
||||||
|
|
||||||
PDFDocumentDataLoaderDecorator loader(document);
|
PDFInteger predictor = 1;
|
||||||
PDFInteger predictor = loader.readInteger(dictionary->get("Predictor"), 1);
|
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
|
||||||
|
if (predictorObject.isInt())
|
||||||
|
{
|
||||||
|
predictor = predictorObject.getInteger();
|
||||||
|
}
|
||||||
|
|
||||||
if (predictor != 1)
|
if (predictor != 1)
|
||||||
{
|
{
|
||||||
@ -385,9 +398,9 @@ QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data, const PDFDocument
|
|||||||
return qUncompress(dataToUncompress);
|
return qUncompress(dataToUncompress);
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const
|
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const
|
||||||
{
|
{
|
||||||
Q_UNUSED(document);
|
Q_UNUSED(objectFetcher);
|
||||||
Q_UNUSED(parameters);
|
Q_UNUSED(parameters);
|
||||||
|
|
||||||
QByteArray result;
|
QByteArray result;
|
||||||
@ -439,6 +452,101 @@ const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filte
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
|
||||||
|
{
|
||||||
|
const PDFDictionary* dictionary = stream->getDictionary();
|
||||||
|
|
||||||
|
// Retrieve filters
|
||||||
|
PDFObject filters;
|
||||||
|
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
|
||||||
|
{
|
||||||
|
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILTER));
|
||||||
|
}
|
||||||
|
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
|
||||||
|
{
|
||||||
|
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieve filter parameters
|
||||||
|
PDFObject filterParameters;
|
||||||
|
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
|
||||||
|
{
|
||||||
|
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
|
||||||
|
}
|
||||||
|
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
|
||||||
|
{
|
||||||
|
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<const PDFStreamFilter*> filterObjects;
|
||||||
|
std::vector<PDFObject> filterParameterObjects;
|
||||||
|
|
||||||
|
if (filters.isName())
|
||||||
|
{
|
||||||
|
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
|
||||||
|
}
|
||||||
|
else if (filters.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* filterArray = filters.getArray();
|
||||||
|
const size_t filterCount = filterArray->getCount();
|
||||||
|
for (size_t i = 0; i < filterCount; ++i)
|
||||||
|
{
|
||||||
|
const PDFObject& object = objectFetcher(filterArray->getItem(i));
|
||||||
|
if (object.isName())
|
||||||
|
{
|
||||||
|
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return QByteArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!filters.isNull())
|
||||||
|
{
|
||||||
|
return QByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filterParameters.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* filterParameterArray = filterParameters.getArray();
|
||||||
|
const size_t filterParameterCount = filterParameterArray->getCount();
|
||||||
|
for (size_t i = 0; i < filterParameterCount; ++i)
|
||||||
|
{
|
||||||
|
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
|
||||||
|
filterParameterObjects.push_back(object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
filterParameterObjects.push_back(filterParameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
filterParameterObjects.resize(filterObjects.size());
|
||||||
|
std::reverse(filterObjects.begin(), filterObjects.end());
|
||||||
|
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
|
||||||
|
|
||||||
|
QByteArray result = *stream->getContent();
|
||||||
|
|
||||||
|
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
|
||||||
|
{
|
||||||
|
const PDFStreamFilter* streamFilter = filterObjects[i];
|
||||||
|
const PDFObject& streamFilterParameters = filterParameterObjects[i];
|
||||||
|
|
||||||
|
if (streamFilter)
|
||||||
|
{
|
||||||
|
result = streamFilter->apply(result, objectFetcher, streamFilterParameters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream)
|
||||||
|
{
|
||||||
|
return getDecodedStream(stream, [](const PDFObject& object) -> const PDFObject& { return object; });
|
||||||
|
}
|
||||||
|
|
||||||
PDFStreamFilterStorage::PDFStreamFilterStorage()
|
PDFStreamFilterStorage::PDFStreamFilterStorage()
|
||||||
{
|
{
|
||||||
// Initialize map with the filters
|
// Initialize map with the filters
|
||||||
|
@ -23,12 +23,14 @@
|
|||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
class PDFDocument;
|
|
||||||
class PDFStreamFilter;
|
class PDFStreamFilter;
|
||||||
|
|
||||||
|
using PDFObjectFetcher = std::function<const PDFObject&(const PDFObject&)>;
|
||||||
|
|
||||||
/// Storage for stream filters. Can retrieve stream filters by name. Using singleton
|
/// Storage for stream filters. Can retrieve stream filters by name. Using singleton
|
||||||
/// design pattern. Use static methods to retrieve filters.
|
/// design pattern. Use static methods to retrieve filters.
|
||||||
class PDFStreamFilterStorage
|
class PDFStreamFilterStorage
|
||||||
@ -39,6 +41,16 @@ public:
|
|||||||
/// \param filterName Name of the filter to be retrieved.
|
/// \param filterName Name of the filter to be retrieved.
|
||||||
static const PDFStreamFilter* getFilter(const QByteArray& filterName);
|
static const PDFStreamFilter* getFilter(const QByteArray& filterName);
|
||||||
|
|
||||||
|
/// Returns decoded data from the stream
|
||||||
|
/// \param stream Stream containing the data
|
||||||
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||||
|
static QByteArray getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
|
||||||
|
|
||||||
|
/// Returns decoded data from the stream, without object fetching
|
||||||
|
/// \param stream Stream containing the data
|
||||||
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||||
|
static QByteArray getDecodedStream(const PDFStream* stream);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit PDFStreamFilterStorage();
|
explicit PDFStreamFilterStorage();
|
||||||
|
|
||||||
@ -58,7 +70,19 @@ public:
|
|||||||
explicit PDFStreamFilter() = default;
|
explicit PDFStreamFilter() = default;
|
||||||
virtual ~PDFStreamFilter() = default;
|
virtual ~PDFStreamFilter() = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const = 0;
|
/// Apply with object fetcher
|
||||||
|
/// \param data Stream data to be decoded
|
||||||
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
||||||
|
/// \param parameters Stream parameters
|
||||||
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const = 0;
|
||||||
|
|
||||||
|
/// Apply without object fetcher - it assumes no references exists in the streams dictionary
|
||||||
|
/// \param data Stream data to be decoded
|
||||||
|
/// \param parameters Stream parameters
|
||||||
|
inline QByteArray apply(const QByteArray& data, const PDFObject& parameters) const
|
||||||
|
{
|
||||||
|
return apply(data, [](const PDFObject& object) -> const PDFObject& { return object; }, parameters);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter
|
class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter
|
||||||
@ -67,7 +91,7 @@ public:
|
|||||||
explicit PDFAsciiHexDecodeFilter() = default;
|
explicit PDFAsciiHexDecodeFilter() = default;
|
||||||
virtual ~PDFAsciiHexDecodeFilter() override = default;
|
virtual ~PDFAsciiHexDecodeFilter() override = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFFORQTLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter
|
class PDFFORQTLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter
|
||||||
@ -76,7 +100,7 @@ public:
|
|||||||
explicit PDFAscii85DecodeFilter() = default;
|
explicit PDFAscii85DecodeFilter() = default;
|
||||||
virtual ~PDFAscii85DecodeFilter() override = default;
|
virtual ~PDFAscii85DecodeFilter() override = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFFORQTLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter
|
class PDFFORQTLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter
|
||||||
@ -85,7 +109,7 @@ public:
|
|||||||
explicit PDFLzwDecodeFilter() = default;
|
explicit PDFLzwDecodeFilter() = default;
|
||||||
virtual ~PDFLzwDecodeFilter() override = default;
|
virtual ~PDFLzwDecodeFilter() override = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFFORQTLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter
|
class PDFFORQTLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter
|
||||||
@ -94,7 +118,7 @@ public:
|
|||||||
explicit PDFFlateDecodeFilter() = default;
|
explicit PDFFlateDecodeFilter() = default;
|
||||||
virtual ~PDFFlateDecodeFilter() override = default;
|
virtual ~PDFFlateDecodeFilter() override = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFFORQTLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter
|
class PDFFORQTLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter
|
||||||
@ -103,7 +127,7 @@ public:
|
|||||||
explicit PDFRunLengthDecodeFilter() = default;
|
explicit PDFRunLengthDecodeFilter() = default;
|
||||||
virtual ~PDFRunLengthDecodeFilter() override = default;
|
virtual ~PDFRunLengthDecodeFilter() override = default;
|
||||||
|
|
||||||
virtual QByteArray apply(const QByteArray& data, const PDFDocument* document, const PDFObject& parameters) const override;
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "pdfconstants.h"
|
#include "pdfconstants.h"
|
||||||
#include "pdfexception.h"
|
#include "pdfexception.h"
|
||||||
#include "pdfparser.h"
|
#include "pdfparser.h"
|
||||||
|
#include "pdfstreamfilters.h"
|
||||||
|
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
|
||||||
@ -27,7 +28,7 @@ namespace pdf
|
|||||||
|
|
||||||
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
|
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
|
||||||
{
|
{
|
||||||
PDFParser parser(byteArray, context, PDFParser::None);
|
PDFParser parser(byteArray, context, PDFParser::AllowStreams);
|
||||||
|
|
||||||
m_entries.clear();
|
m_entries.clear();
|
||||||
|
|
||||||
@ -96,6 +97,11 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
|||||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (static_cast<size_t>(objectNumber) >= m_entries.size())
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||||
|
}
|
||||||
|
|
||||||
Entry entry;
|
Entry entry;
|
||||||
if (occupied)
|
if (occupied)
|
||||||
{
|
{
|
||||||
@ -137,13 +143,188 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
|||||||
workSet.push(previousOffset.getInteger());
|
workSet.push(previousOffset.getInteger());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary->hasKey(PDF_XREF_TRAILER_XREFSTM))
|
const PDFObject& xrefstmObject = dictionary->get(PDF_XREF_TRAILER_XREFSTM);
|
||||||
|
if (xrefstmObject.isInt())
|
||||||
{
|
{
|
||||||
throw PDFParserException(tr("Hybrid reference tables not supported."));
|
workSet.push(xrefstmObject.getInteger());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// Try to read cross-reference stream
|
||||||
|
PDFObject crossReferenceStreamObjectNumber = parser.getObject();
|
||||||
|
PDFObject crossReferenceStreamGeneration = parser.getObject();
|
||||||
|
|
||||||
|
if (!crossReferenceStreamObjectNumber.isInt() || !crossReferenceStreamGeneration.isInt())
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of reference table."));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!parser.fetchCommand(PDF_OBJECT_START_MARK))
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of reference table."));
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFObject crossReferenceObject = parser.getObject();
|
||||||
|
|
||||||
|
if (!parser.fetchCommand(PDF_OBJECT_END_MARK))
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of reference table."));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (crossReferenceObject.isStream())
|
||||||
|
{
|
||||||
|
const PDFStream* crossReferenceStream = crossReferenceObject.getStream();
|
||||||
|
const PDFDictionary* crossReferenceStreamDictionary = crossReferenceStream->getDictionary();
|
||||||
|
const PDFObject typeObject = crossReferenceStreamDictionary->get("Type");
|
||||||
|
if (typeObject.isName() && typeObject.getString() == "XRef")
|
||||||
|
{
|
||||||
|
PDFObject sizeObject = crossReferenceStreamDictionary->get("Size");
|
||||||
|
if (!sizeObject.isInt() || sizeObject.getInteger() < 0)
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||||
|
}
|
||||||
|
|
||||||
|
const PDFInteger desiredSize = sizeObject.getInteger();
|
||||||
|
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||||
|
{
|
||||||
|
m_entries.resize(desiredSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFObject prevObject = crossReferenceStreamDictionary->get("Prev");
|
||||||
|
if (prevObject.isInt())
|
||||||
|
{
|
||||||
|
workSet.push(prevObject.getInteger());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not overwrite trailer dictionary, if it was already loaded.
|
||||||
|
if (m_trailerDictionary.isNull())
|
||||||
|
{
|
||||||
|
m_trailerDictionary = crossReferenceObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto readIntegerArray = [crossReferenceStreamDictionary](const char* key, auto defaultValues) -> std::vector<PDFInteger>
|
||||||
|
{
|
||||||
|
std::vector<PDFInteger> result;
|
||||||
|
|
||||||
|
const PDFObject& object = crossReferenceStreamDictionary->get(key);
|
||||||
|
if (object.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* array = object.getArray();
|
||||||
|
result.reserve(array->getCount());
|
||||||
|
|
||||||
|
for (size_t i = 0, count = array->getCount(); i < count; ++i)
|
||||||
|
{
|
||||||
|
const PDFObject& itemObject = array->getItem(i);
|
||||||
|
if (itemObject.isInt())
|
||||||
|
{
|
||||||
|
result.push_back(itemObject.getInteger());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = defaultValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<PDFInteger> indexArray = readIntegerArray("Index", std::initializer_list<PDFInteger>{ PDFInteger(0), PDFInteger(desiredSize) });
|
||||||
|
std::vector<PDFInteger> wArray = readIntegerArray("W", std::vector<PDFInteger>());
|
||||||
|
|
||||||
|
if (wArray.size() != 3 || indexArray.empty() || (indexArray.size() % 2 != 0))
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of cross-reference stream."));
|
||||||
|
}
|
||||||
|
|
||||||
|
const int columnTypeBytes = wArray[0];
|
||||||
|
const int columnObjectNumberOrByteOffsetBytes = wArray[1];
|
||||||
|
const int columnGenerationNumberOrObjectIndexBytes = wArray[2];
|
||||||
|
const size_t blockCount = indexArray.size() / 2;
|
||||||
|
|
||||||
|
QByteArray data = PDFStreamFilterStorage::getDecodedStream(crossReferenceStream);
|
||||||
|
QDataStream dataStream(&data, QIODevice::ReadOnly);
|
||||||
|
dataStream.setByteOrder(QDataStream::BigEndian);
|
||||||
|
|
||||||
|
auto readNumber = [&dataStream](int bytes, PDFInteger defaultValue) -> PDFInteger
|
||||||
|
{
|
||||||
|
if (bytes)
|
||||||
|
{
|
||||||
|
uint64_t value = 0;
|
||||||
|
|
||||||
|
while (bytes--)
|
||||||
|
{
|
||||||
|
uint8_t byte = 0;
|
||||||
|
dataStream >> byte;
|
||||||
|
value = (value << 8) + byte;
|
||||||
|
|
||||||
|
// Check, if stream is OK (we doesn't read past the end of the stream,
|
||||||
|
// data aren't corrupted etc.)
|
||||||
|
if (dataStream.status() != QDataStream::Ok)
|
||||||
|
{
|
||||||
|
throw PDFParserException(tr("Invalid format of cross-reference stream - not enough data in the stream."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast<PDFInteger>(value);
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < blockCount; ++i)
|
||||||
|
{
|
||||||
|
PDFInteger firstObjectNumber = indexArray[2 * i];
|
||||||
|
PDFInteger count = indexArray[2 * i + 1];
|
||||||
|
|
||||||
|
const PDFInteger lastObjectIndex = firstObjectNumber + count - 1;
|
||||||
|
const PDFInteger desiredSize = lastObjectIndex + 1;
|
||||||
|
|
||||||
|
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||||
|
{
|
||||||
|
m_entries.resize(desiredSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (PDFInteger objectNumber = firstObjectNumber; objectNumber <= lastObjectIndex; ++ objectNumber)
|
||||||
|
{
|
||||||
|
int itemType = readNumber(columnTypeBytes, 1);
|
||||||
|
int itemObjectNumberOfObjectStreamOrByteOffset = readNumber(columnObjectNumberOrByteOffsetBytes, 0);
|
||||||
|
int itemGenerationNumberOrObjectIndex = readNumber(columnGenerationNumberOrObjectIndexBytes, 0);
|
||||||
|
|
||||||
|
switch (itemType)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
// Free object
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
Entry entry;
|
||||||
|
entry.reference = PDFObjectReference(objectNumber, itemGenerationNumberOrObjectIndex);
|
||||||
|
entry.offset = itemObjectNumberOfObjectStreamOrByteOffset;
|
||||||
|
entry.type = EntryType::Occupied;
|
||||||
|
|
||||||
|
if (m_entries[objectNumber].type == EntryType::Free)
|
||||||
|
{
|
||||||
|
m_entries[objectNumber] = std::move(entry);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
default:
|
||||||
|
// According to the specification, treat this object as null object
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
throw PDFParserException(tr("Invalid format of reference table."));
|
throw PDFParserException(tr("Invalid format of reference table."));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user