2021-04-30 20:12:10 +02:00
|
|
|
// Copyright (C) 2018-2021 Jakub Melka
|
2018-12-29 18:22:13 +01:00
|
|
|
//
|
2020-12-20 19:03:58 +01:00
|
|
|
// This file is part of Pdf4Qt.
|
2018-12-29 18:22:13 +01:00
|
|
|
//
|
2020-12-20 19:03:58 +01:00
|
|
|
// Pdf4Qt is free software: you can redistribute it and/or modify
|
2018-12-29 18:22:13 +01:00
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
2021-04-30 20:12:10 +02:00
|
|
|
// with the written consent of the copyright owner, any later version.
|
2018-12-29 18:22:13 +01:00
|
|
|
//
|
2020-12-20 19:03:58 +01:00
|
|
|
// Pdf4Qt is distributed in the hope that it will be useful,
|
2018-12-29 18:22:13 +01:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
2020-12-20 19:03:58 +01:00
|
|
|
// along with Pdf4Qt. If not, see <https://www.gnu.org/licenses/>.
|
2018-12-29 18:22:13 +01:00
|
|
|
|
|
|
|
#ifndef PDFSTREAMFILTERS_H
|
|
|
|
#define PDFSTREAMFILTERS_H
|
|
|
|
|
|
|
|
#include "pdfobject.h"
|
|
|
|
|
|
|
|
#include <QByteArray>
|
|
|
|
|
2019-02-09 18:40:56 +01:00
|
|
|
#include <memory>
|
2019-06-28 18:11:05 +02:00
|
|
|
#include <functional>
|
2019-02-09 18:40:56 +01:00
|
|
|
|
2018-12-29 18:22:13 +01:00
|
|
|
namespace pdf
|
|
|
|
{
|
2019-02-09 18:40:56 +01:00
|
|
|
class PDFStreamFilter;
|
2019-08-13 15:48:01 +02:00
|
|
|
class PDFSecurityHandler;
|
2019-02-09 18:40:56 +01:00
|
|
|
|
2019-06-28 18:11:05 +02:00
|
|
|
using PDFObjectFetcher = std::function<const PDFObject&(const PDFObject&)>;
|
|
|
|
|
2019-02-09 18:40:56 +01:00
|
|
|
/// Storage for stream filters. Can retrieve stream filters by name. Using singleton
|
|
|
|
/// design pattern. Use static methods to retrieve filters.
|
|
|
|
class PDFStreamFilterStorage
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/// Retrieves filter by filter name. If filter with that name doesn't exist,
|
|
|
|
/// then nullptr is returned. This function is thread safe.
|
|
|
|
/// \param filterName Name of the filter to be retrieved.
|
|
|
|
static const PDFStreamFilter* getFilter(const QByteArray& filterName);
|
|
|
|
|
2019-06-28 18:11:05 +02:00
|
|
|
/// Returns decoded data from the stream
|
|
|
|
/// \param stream Stream containing the data
|
|
|
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
2019-08-13 15:48:01 +02:00
|
|
|
/// \param securityHandler Security handler for Crypt filters
|
|
|
|
static QByteArray getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler);
|
2019-06-28 18:11:05 +02:00
|
|
|
|
|
|
|
/// Returns decoded data from the stream, without object fetching
|
|
|
|
/// \param stream Stream containing the data
|
|
|
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
2019-08-13 15:48:01 +02:00
|
|
|
/// \param securityHandler Security handler for Crypt filters
|
|
|
|
static QByteArray getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler);
|
2019-06-28 18:11:05 +02:00
|
|
|
|
2020-09-20 11:53:46 +02:00
|
|
|
/// Tries to find stream data length using given filter. Stream will
|
|
|
|
/// start at given \p offset in \p data. If stream length cannot be determined,
|
|
|
|
/// then -1 is returned.
|
|
|
|
/// \param data Buffer data
|
|
|
|
/// \param filterName Filter name
|
|
|
|
/// \param offset Offset to buffer, at which stream data starts
|
|
|
|
static PDFInteger getStreamDataLength(const QByteArray& data, const QByteArray& filterName, PDFInteger offset);
|
|
|
|
|
2020-06-06 16:30:06 +02:00
|
|
|
struct StreamFilters
|
|
|
|
{
|
|
|
|
bool valid = true;
|
|
|
|
std::vector<const PDFStreamFilter*> filterObjects;
|
|
|
|
std::vector<PDFObject> filterParameterObjects;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Returns stream filters along with it's parameters, for this stream
|
|
|
|
/// \param stream Stream containing data
|
|
|
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
|
|
|
static StreamFilters getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
|
|
|
|
|
2019-02-09 18:40:56 +01:00
|
|
|
private:
|
|
|
|
explicit PDFStreamFilterStorage();
|
|
|
|
|
|
|
|
static const PDFStreamFilterStorage* getInstance();
|
|
|
|
|
|
|
|
/// Maps names to the instances of the stream filters
|
|
|
|
std::map<QByteArray, std::unique_ptr<PDFStreamFilter>> m_filters;
|
|
|
|
|
|
|
|
/// Filter stream names can be specified in simplified (shorter) form.
|
|
|
|
/// This map maps shorter form to the longer form.
|
|
|
|
std::map<QByteArray, QByteArray> m_abbreviations;
|
|
|
|
};
|
2018-12-29 18:22:13 +01:00
|
|
|
|
2019-07-01 12:29:57 +02:00
|
|
|
class PDFStreamPredictor
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/// Create predictor from stream parameters. If error occurs, exception is thrown.
|
|
|
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
|
|
|
/// \param parameters Parameters of the predictor (must be an dictionary)
|
|
|
|
static PDFStreamPredictor createPredictor(const PDFObjectFetcher& objectFetcher, const PDFObject& parameters);
|
|
|
|
|
|
|
|
/// Applies the predictor to the data.
|
|
|
|
/// \param data Data to be decoded using predictor
|
|
|
|
QByteArray apply(const QByteArray& data) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
enum Predictor
|
|
|
|
{
|
|
|
|
NoPredictor = 1,
|
|
|
|
TIFF = 2,
|
|
|
|
PNG_None = 10, ///< No prediction
|
|
|
|
PNG_Sub = 11, ///< Prediction based on previous byte
|
|
|
|
PNG_Up = 12, ///< Prediction based on byte above
|
|
|
|
PNG_Average = 13, ///< Prediction based on average of previous nad current byte
|
|
|
|
PNG_Paeth = 14, ///< Nonlinear function
|
|
|
|
};
|
|
|
|
|
|
|
|
inline explicit PDFStreamPredictor() = default;
|
|
|
|
|
|
|
|
inline explicit PDFStreamPredictor(Predictor predictor, int components, int bitsPerComponent, int columns) :
|
|
|
|
m_predictor(predictor),
|
|
|
|
m_components(components),
|
|
|
|
m_bitsPerComponent(bitsPerComponent),
|
|
|
|
m_columns(columns),
|
|
|
|
m_stride(0)
|
|
|
|
{
|
|
|
|
m_stride = (m_columns * m_components * m_bitsPerComponent + 7) / 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Applies PNG predictor
|
|
|
|
QByteArray applyPNGPredictor(const QByteArray& data) const;
|
|
|
|
|
|
|
|
/// Applies TIFF predictor
|
|
|
|
QByteArray applyTIFFPredictor(const QByteArray& data) const;
|
|
|
|
|
|
|
|
Predictor m_predictor = NoPredictor;
|
|
|
|
int m_components = 0;
|
|
|
|
int m_bitsPerComponent = 0;
|
|
|
|
int m_columns = 0;
|
|
|
|
int m_stride = 0;
|
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFStreamFilter() = default;
|
|
|
|
virtual ~PDFStreamFilter() = default;
|
|
|
|
|
2019-06-28 18:11:05 +02:00
|
|
|
/// Apply with object fetcher
|
|
|
|
/// \param data Stream data to be decoded
|
|
|
|
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
|
|
|
|
/// \param parameters Stream parameters
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data, const PDFObjectFetcher& objectFetcher, const PDFObject& parameters, const PDFSecurityHandler* securityHandler) const = 0;
|
2019-06-28 18:11:05 +02:00
|
|
|
|
|
|
|
/// Apply without object fetcher - it assumes no references exists in the streams dictionary
|
|
|
|
/// \param data Stream data to be decoded
|
|
|
|
/// \param parameters Stream parameters
|
2019-08-13 15:48:01 +02:00
|
|
|
inline QByteArray apply(const QByteArray& data, const PDFObject& parameters, const PDFSecurityHandler* securityHandler) const
|
2019-06-28 18:11:05 +02:00
|
|
|
{
|
2019-08-13 15:48:01 +02:00
|
|
|
return apply(data, [](const PDFObject& object) -> const PDFObject& { return object; }, parameters, securityHandler);
|
2019-06-28 18:11:05 +02:00
|
|
|
}
|
2020-09-20 11:53:46 +02:00
|
|
|
|
|
|
|
/// Tries to find stream data length. Stream will start at given \p offset in \p data.
|
|
|
|
/// If stream length cannot be determined, then -1 is returned.
|
|
|
|
/// \param data Buffer data
|
|
|
|
/// \param offset Offset to buffer, at which stream data starts
|
|
|
|
virtual PDFInteger getStreamDataLength(const QByteArray& data, PDFInteger offset) const;
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFAsciiHexDecodeFilter : public PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFAsciiHexDecodeFilter() = default;
|
|
|
|
virtual ~PDFAsciiHexDecodeFilter() override = default;
|
|
|
|
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFAscii85DecodeFilter : public PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFAscii85DecodeFilter() = default;
|
|
|
|
virtual ~PDFAscii85DecodeFilter() override = default;
|
|
|
|
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFLzwDecodeFilter : public PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFLzwDecodeFilter() = default;
|
|
|
|
virtual ~PDFLzwDecodeFilter() override = default;
|
|
|
|
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFFlateDecodeFilter : public PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFFlateDecodeFilter() = default;
|
|
|
|
virtual ~PDFFlateDecodeFilter() override = default;
|
|
|
|
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
2019-08-18 16:03:41 +02:00
|
|
|
|
2021-03-06 18:13:21 +01:00
|
|
|
virtual PDFInteger getStreamDataLength(const QByteArray& data, PDFInteger offset) const override;
|
2020-09-20 11:53:46 +02:00
|
|
|
|
2020-06-06 16:30:06 +02:00
|
|
|
/// Recompresses data. So, first, data are decompressed, and then
|
|
|
|
/// recompressed again with maximal compress ratio possible.
|
|
|
|
/// \param data Compressed data to be recompressed
|
|
|
|
static QByteArray recompress(const QByteArray& data);
|
|
|
|
|
2019-08-18 16:03:41 +02:00
|
|
|
private:
|
|
|
|
static QByteArray uncompress(const QByteArray& data);
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFRunLengthDecodeFilter : public PDFStreamFilter
|
2018-12-29 18:22:13 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFRunLengthDecodeFilter() = default;
|
|
|
|
virtual ~PDFRunLengthDecodeFilter() override = default;
|
|
|
|
|
2019-08-13 15:48:01 +02:00
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
|
|
|
};
|
|
|
|
|
2020-12-20 19:03:58 +01:00
|
|
|
class Pdf4QtLIBSHARED_EXPORT PDFCryptFilter : public PDFStreamFilter
|
2019-08-13 15:48:01 +02:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit PDFCryptFilter() = default;
|
|
|
|
virtual ~PDFCryptFilter() override = default;
|
|
|
|
|
|
|
|
virtual QByteArray apply(const QByteArray& data,
|
|
|
|
const PDFObjectFetcher& objectFetcher,
|
|
|
|
const PDFObject& parameters,
|
|
|
|
const PDFSecurityHandler* securityHandler) const override;
|
2018-12-29 18:22:13 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace pdf
|
|
|
|
|
|
|
|
#endif // PDFSTREAMFILTERS_H
|