From a9acfa31e3a6e7f491a553129d4b321b619687d9 Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Sun, 20 Sep 2020 11:53:46 +0200 Subject: [PATCH] Bugfix - find inline image stream length --- .../sources/pdfpagecontentprocessor.cpp | 18 +++++- PdfForQtLib/sources/pdfstreamfilters.cpp | 56 +++++++++++++++++++ PdfForQtLib/sources/pdfstreamfilters.h | 16 ++++++ 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp index 4f5f718..9e678a5 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp @@ -21,6 +21,7 @@ #include "pdfimage.h" #include "pdfpattern.h" #include "pdfexecutionpolicy.h" +#include "pdfstreamfilters.h" #include @@ -548,8 +549,21 @@ void PDFPageContentProcessor::processContent(const QByteArray& content) } else if (dictionary->hasKey("Filter")) { - // We will use EI operator position to determine stream length - dataLength = operatorEIPosition - startDataPosition; + dataLength = -1; + + // We will try to use stream filter hint + PDFDocumentDataLoaderDecorator loader(m_document); + QByteArray filterName = loader.readNameFromDictionary(dictionary, "Filter"); + if (!filterName.isEmpty()) + { + dataLength = PDFStreamFilterStorage::getStreamDataLength(content, filterName, startDataPosition); + } + + if (dataLength == -1) + { + // We will use EI operator position to determine stream length + dataLength = operatorEIPosition - startDataPosition; + } } else { diff --git a/PdfForQtLib/sources/pdfstreamfilters.cpp b/PdfForQtLib/sources/pdfstreamfilters.cpp index 15f4995..20acefe 100644 --- a/PdfForQtLib/sources/pdfstreamfilters.cpp +++ b/PdfForQtLib/sources/pdfstreamfilters.cpp @@ -457,6 +457,44 @@ QByteArray PDFFlateDecodeFilter::recompress(const QByteArray& data) return result; } +PDFInteger PDFFlateDecodeFilter::getStreamDataLength(const QByteArray& data, PDFInteger offset) const +{ + if (offset < 0 || offset >= data.size()) + { + return -1; + } + + z_stream stream = { }; + stream.next_in = const_cast(convertByteArrayToUcharPtr(data) + offset); + stream.avail_in = data.size() - offset; + + std::array outputBuffer = { }; + + int error = inflateInit(&stream); + if (error != Z_OK) + { + return -1; + } + + do + { + stream.next_out = outputBuffer.data(); + stream.avail_out = static_cast(outputBuffer.size()); + + error = inflate(&stream, Z_NO_FLUSH); + } while (error == Z_OK); + + PDFInteger dataLength = stream.total_in; + inflateEnd(&stream); + + if (error == Z_STREAM_END) + { + return dataLength; + } + + return -1; +} + QByteArray PDFFlateDecodeFilter::uncompress(const QByteArray& data) { QByteArray result; @@ -676,6 +714,16 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con return getDecodedStream(stream, [](const PDFObject& object) -> const PDFObject& { return object; }, securityHandler); } +PDFInteger PDFStreamFilterStorage::getStreamDataLength(const QByteArray& data, const QByteArray& filterName, PDFInteger offset) +{ + if (const PDFStreamFilter* filter = getFilter(filterName)) + { + return filter->getStreamDataLength(data, offset); + } + + return -1; +} + PDFStreamFilterStorage::PDFStreamFilterStorage() { // Initialize map with the filters @@ -931,4 +979,12 @@ QByteArray PDFCryptFilter::apply(const QByteArray& data, return securityHandler->decryptByFilter(data, cryptFilterName, objectReference); } +PDFInteger PDFStreamFilter::getStreamDataLength(const QByteArray& data, PDFInteger offset) const +{ + Q_UNUSED(data); + Q_UNUSED(offset); + + return -1; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfstreamfilters.h b/PdfForQtLib/sources/pdfstreamfilters.h index 26d5879..bcbbe2b 100644 --- a/PdfForQtLib/sources/pdfstreamfilters.h +++ b/PdfForQtLib/sources/pdfstreamfilters.h @@ -54,6 +54,14 @@ public: /// \param securityHandler Security handler for Crypt filters static QByteArray getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler); + /// Tries to find stream data length using given filter. Stream will + /// start at given \p offset in \p data. If stream length cannot be determined, + /// then -1 is returned. + /// \param data Buffer data + /// \param filterName Filter name + /// \param offset Offset to buffer, at which stream data starts + static PDFInteger getStreamDataLength(const QByteArray& data, const QByteArray& filterName, PDFInteger offset); + struct StreamFilters { bool valid = true; @@ -148,6 +156,12 @@ public: { return apply(data, [](const PDFObject& object) -> const PDFObject& { return object; }, parameters, securityHandler); } + + /// Tries to find stream data length. Stream will start at given \p offset in \p data. + /// If stream length cannot be determined, then -1 is returned. + /// \param data Buffer data + /// \param offset Offset to buffer, at which stream data starts + virtual PDFInteger getStreamDataLength(const QByteArray& data, PDFInteger offset) const; }; class PDFFORQTLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter @@ -197,6 +211,8 @@ public: const PDFObject& parameters, const PDFSecurityHandler* securityHandler) const override; + virtual PDFInteger getStreamDataLength(const QByteArray& data, PDFInteger offset) const; + /// Recompresses data. So, first, data are decompressed, and then /// recompressed again with maximal compress ratio possible. /// \param data Compressed data to be recompressed