2020-01-18 11:38:54 +01:00
|
|
|
// Copyright (C) 2018-2020 Jakub Melka
|
2018-11-17 16:48:30 +01:00
|
|
|
//
|
|
|
|
// This file is part of PdfForQt.
|
|
|
|
//
|
|
|
|
// PdfForQt is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// PdfForQt is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef PDFDOCUMENT_H
|
|
|
|
#define PDFDOCUMENT_H
|
|
|
|
|
|
|
|
#include "pdfglobal.h"
|
2018-11-25 14:48:08 +01:00
|
|
|
#include "pdfobject.h"
|
2018-12-24 17:09:23 +01:00
|
|
|
#include "pdfcatalog.h"
|
2019-08-04 18:26:15 +02:00
|
|
|
#include "pdfsecurityhandler.h"
|
2018-11-17 16:48:30 +01:00
|
|
|
|
2018-12-02 17:53:19 +01:00
|
|
|
#include <QtCore>
|
2019-08-25 18:16:37 +02:00
|
|
|
#include <QMatrix>
|
2018-12-02 17:53:19 +01:00
|
|
|
#include <QDateTime>
|
|
|
|
|
2020-04-18 19:01:49 +02:00
|
|
|
#include <optional>
|
|
|
|
|
2018-11-17 16:48:30 +01:00
|
|
|
namespace pdf
|
|
|
|
{
|
2018-12-24 17:09:23 +01:00
|
|
|
class PDFDocument;
|
2020-03-19 18:17:08 +01:00
|
|
|
class PDFDocumentBuilder;
|
2018-11-17 16:48:30 +01:00
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
|
|
|
|
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
|
|
|
|
class PDFObjectStorage
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
constexpr inline PDFObjectStorage() = default;
|
|
|
|
|
|
|
|
constexpr inline PDFObjectStorage(const PDFObjectStorage&) = default;
|
|
|
|
constexpr inline PDFObjectStorage(PDFObjectStorage&&) = default;
|
|
|
|
|
|
|
|
constexpr inline PDFObjectStorage& operator=(const PDFObjectStorage&) = default;
|
|
|
|
constexpr inline PDFObjectStorage& operator=(PDFObjectStorage&&) = default;
|
|
|
|
|
2020-05-02 18:04:25 +02:00
|
|
|
bool operator==(const PDFObjectStorage& other) const;
|
|
|
|
bool operator!=(const PDFObjectStorage& other) const { return !(*this == other); }
|
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
struct Entry
|
|
|
|
{
|
|
|
|
constexpr inline explicit Entry() = default;
|
|
|
|
inline explicit Entry(PDFInteger generation, PDFObject object) : generation(generation), object(std::move(object)) { }
|
|
|
|
|
2020-05-02 18:04:25 +02:00
|
|
|
inline bool operator==(const Entry& other) const { return generation == other.generation && object == other.object; }
|
|
|
|
inline bool operator!=(const Entry& other) const { return !(*this == other); }
|
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
PDFInteger generation = 0;
|
|
|
|
PDFObject object;
|
|
|
|
};
|
|
|
|
|
|
|
|
using PDFObjects = std::vector<Entry>;
|
|
|
|
|
2019-08-04 18:26:15 +02:00
|
|
|
explicit PDFObjectStorage(PDFObjects&& objects, PDFObject&& trailerDictionary, PDFSecurityHandlerPointer&& securityHandler) :
|
2018-12-01 11:36:07 +01:00
|
|
|
m_objects(std::move(objects)),
|
2019-08-04 18:26:15 +02:00
|
|
|
m_trailerDictionary(std::move(trailerDictionary)),
|
|
|
|
m_securityHandler(std::move(securityHandler))
|
2018-12-01 11:36:07 +01:00
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-12-02 17:53:19 +01:00
|
|
|
/// Returns object from the object storage. If invalid reference is passed,
|
|
|
|
/// then null object is returned (no exception is thrown).
|
|
|
|
const PDFObject& getObject(PDFObjectReference reference) const;
|
|
|
|
|
2020-03-29 18:53:04 +02:00
|
|
|
/// If object is reference, the dereference attempt is performed
|
|
|
|
/// and object is returned. If it is not a reference, then self
|
|
|
|
/// is returned. If dereference attempt fails, then null object
|
|
|
|
/// is returned (no exception is thrown).
|
|
|
|
const PDFObject& getObject(const PDFObject& object) const;
|
|
|
|
|
|
|
|
/// Returns dictionary from an object. If object is not a dictionary,
|
|
|
|
/// then nullptr is returned (no exception is thrown).
|
|
|
|
const PDFDictionary* getDictionaryFromObject(const PDFObject& object) const;
|
|
|
|
|
|
|
|
/// Returns object by reference. If dereference attempt fails, then null object
|
|
|
|
/// is returned (no exception is thrown).
|
|
|
|
const PDFObject& getObjectByReference(PDFObjectReference reference) const;
|
|
|
|
|
2018-12-01 11:36:07 +01:00
|
|
|
/// Returns array of objects stored in this storage
|
|
|
|
const PDFObjects& getObjects() const { return m_objects; }
|
|
|
|
|
|
|
|
/// Returns trailer dictionary
|
|
|
|
const PDFObject& getTrailerDictionary() const { return m_trailerDictionary; }
|
|
|
|
|
2019-08-04 18:26:15 +02:00
|
|
|
/// Returns security handler associated with these objects
|
|
|
|
const PDFSecurityHandler* getSecurityHandler() const { return m_securityHandler.data(); }
|
|
|
|
|
2020-03-22 15:30:34 +01:00
|
|
|
/// Sets security handler associated with these objects
|
|
|
|
void setSecurityHandler(PDFSecurityHandlerPointer handler) { m_securityHandler = qMove(handler); }
|
|
|
|
|
2020-03-19 18:17:08 +01:00
|
|
|
/// Adds a new object to the object list. This function
|
|
|
|
/// is not thread safe, do not call it from multiple threads.
|
|
|
|
/// \param object Object to be added
|
|
|
|
/// \returns Reference to new object
|
|
|
|
PDFObjectReference addObject(PDFObject object);
|
|
|
|
|
2020-03-21 16:36:27 +01:00
|
|
|
/// Sets object to object storage. Reference must exist.
|
|
|
|
/// \param reference Reference to object
|
|
|
|
/// \param object New value of object
|
|
|
|
void setObject(PDFObjectReference reference, PDFObject object);
|
|
|
|
|
2020-03-21 18:18:08 +01:00
|
|
|
/// Updates trailer dictionary. Preserves items which are not in a new
|
|
|
|
/// dictionary \p trailerDictionary. It merges new dictionary to the
|
|
|
|
/// old one.
|
|
|
|
/// \param trailerDictionary New trailer dictionary
|
|
|
|
void updateTrailerDictionary(PDFObject trailerDictionary);
|
|
|
|
|
2020-03-29 18:53:04 +02:00
|
|
|
/// Returns the decoded stream. If stream data cannot be decoded,
|
|
|
|
/// then empty byte array is returned.
|
|
|
|
/// \param stream Stream to be decoded
|
|
|
|
QByteArray getDecodedStream(const PDFStream* stream) const;
|
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
private:
|
2018-12-01 11:36:07 +01:00
|
|
|
PDFObjects m_objects;
|
|
|
|
PDFObject m_trailerDictionary;
|
2019-08-04 18:26:15 +02:00
|
|
|
PDFSecurityHandlerPointer m_securityHandler;
|
2018-11-25 14:48:08 +01:00
|
|
|
};
|
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
/// Loads data from the object contained in the PDF document, such as integers,
|
|
|
|
/// bools, ... This object has two sets of functions - first one with default values,
|
|
|
|
/// then if object with valid data is not found, default value is used, and second one,
|
|
|
|
/// without default value, if valid data are not found, then exception is thrown.
|
|
|
|
/// This class uses Decorator design pattern.
|
|
|
|
class PDFDocumentDataLoaderDecorator
|
|
|
|
{
|
|
|
|
public:
|
2020-03-29 18:53:04 +02:00
|
|
|
explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document);
|
|
|
|
inline explicit PDFDocumentDataLoaderDecorator(const PDFObjectStorage* storage) : m_storage(storage) { }
|
2018-12-24 17:09:23 +01:00
|
|
|
inline ~PDFDocumentDataLoaderDecorator() = default;
|
|
|
|
|
2019-03-30 18:45:30 +01:00
|
|
|
/// Reads a name from the object, if it is possible. If object is not a name,
|
|
|
|
/// then empty byte array is returned.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
2020-04-18 19:01:49 +02:00
|
|
|
QByteArray readName(const PDFObject& object) const;
|
2019-03-30 18:45:30 +01:00
|
|
|
|
2019-03-31 18:08:36 +02:00
|
|
|
/// Reads a string from the object, if it is possible. If object is not a string,
|
|
|
|
/// then empty byte array is returned.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
2020-04-18 19:01:49 +02:00
|
|
|
QByteArray readString(const PDFObject& object) const;
|
2019-03-31 18:08:36 +02:00
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
/// Reads an integer from the object, if it is possible.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
PDFInteger readInteger(const PDFObject& object, PDFInteger defaultValue) const;
|
|
|
|
|
2019-02-16 18:26:16 +01:00
|
|
|
/// Reads a real number from the object, if it is possible. If integer appears as object,
|
|
|
|
/// then it is converted to real number.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param defaultValue Default value
|
2019-02-23 15:44:14 +01:00
|
|
|
PDFReal readNumber(const PDFObject& object, PDFReal defaultValue) const;
|
2019-02-16 18:26:16 +01:00
|
|
|
|
2019-03-25 18:44:45 +01:00
|
|
|
/// Reads a boolean from the object, if it is possible.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
bool readBoolean(const PDFObject& object, bool defaultValue) const;
|
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
/// Reads a text string from the object, if it is possible.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
|
|
|
|
|
2018-12-26 18:00:17 +01:00
|
|
|
/// Reads a rectangle from the object, if it is possible.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
QRectF readRectangle(const PDFObject& object, const QRectF& defaultValue) const;
|
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
/// Reads enum from name object, if it is possible.
|
|
|
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
|
|
|
/// \param begin Begin of the enum search array
|
|
|
|
/// \param end End of the enum search array
|
|
|
|
/// \param default value Default value
|
|
|
|
template<typename Enum, typename Iterator>
|
|
|
|
Enum readEnumByName(const PDFObject& object, Iterator begin, Iterator end, Enum defaultValue) const
|
|
|
|
{
|
2020-03-29 18:53:04 +02:00
|
|
|
const PDFObject& dereferencedObject = m_storage->getObject(object);
|
2019-11-23 19:02:24 +01:00
|
|
|
if (dereferencedObject.isName() || dereferencedObject.isString())
|
2018-12-24 17:09:23 +01:00
|
|
|
{
|
|
|
|
QByteArray name = dereferencedObject.getString();
|
|
|
|
|
|
|
|
for (Iterator it = begin; it != end; ++it)
|
|
|
|
{
|
|
|
|
if (name == (*it).first)
|
|
|
|
{
|
|
|
|
return (*it).second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return defaultValue;
|
|
|
|
}
|
|
|
|
|
2019-02-16 18:26:16 +01:00
|
|
|
/// Tries to read array of real values. Reads as much values as possible.
|
|
|
|
/// If array size differs, then nothing happens.
|
|
|
|
/// \param object Array of integers
|
|
|
|
/// \param first First iterator
|
|
|
|
/// \param second Second iterator
|
|
|
|
template<typename T>
|
|
|
|
void readNumberArray(const PDFObject& object, T first, T last)
|
|
|
|
{
|
2020-03-29 18:53:04 +02:00
|
|
|
const PDFObject& dereferencedObject = m_storage->getObject(object);
|
2019-02-16 18:26:16 +01:00
|
|
|
if (dereferencedObject.isArray())
|
|
|
|
{
|
|
|
|
const PDFArray* array = dereferencedObject.getArray();
|
|
|
|
|
|
|
|
size_t distance = std::distance(first, last);
|
|
|
|
if (array->getCount() == distance)
|
|
|
|
{
|
|
|
|
T it = first;
|
|
|
|
for (size_t i = 0; i < distance; ++i)
|
|
|
|
{
|
|
|
|
*it = readNumber(array->getItem(i), *it);
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to read array of real values from dictionary. Reads as much values as possible.
|
|
|
|
/// If array size differs, or entry dictionary doesn't exist, then nothing happens.
|
|
|
|
/// \param dictionary Dictionary with desired values
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param first First iterator
|
|
|
|
/// \param second Second iterator
|
|
|
|
template<typename T>
|
|
|
|
void readNumberArrayFromDictionary(const PDFDictionary* dictionary, const char* key, T first, T last)
|
|
|
|
{
|
|
|
|
if (dictionary->hasKey(key))
|
|
|
|
{
|
|
|
|
readNumberArray(dictionary->get(key), first, last);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:16:37 +02:00
|
|
|
/// Tries to read matrix from the dictionary. If matrix entry is not present, default value is returned.
|
|
|
|
/// If it is present and invalid, exception is thrown.
|
2020-04-18 19:01:49 +02:00
|
|
|
QMatrix readMatrixFromDictionary(const PDFDictionary* dictionary, const char* key, QMatrix defaultValue) const;
|
2019-08-25 18:16:37 +02:00
|
|
|
|
2019-03-07 19:57:03 +01:00
|
|
|
/// Tries to read array of real values from dictionary. If entry dictionary doesn't exist,
|
2019-09-08 17:17:12 +02:00
|
|
|
/// or error occurs, default value is returned.
|
2020-04-18 19:01:49 +02:00
|
|
|
std::vector<PDFReal> readNumberArrayFromDictionary(const PDFDictionary* dictionary, const char* key, std::vector<PDFReal> defaultValue = std::vector<PDFReal>()) const;
|
2019-03-07 19:57:03 +01:00
|
|
|
|
|
|
|
/// Tries to read array of integer values from dictionary. If entry dictionary doesn't exist,
|
2019-06-23 18:35:32 +02:00
|
|
|
/// or error occurs, empty array is returned.
|
2020-04-18 19:01:49 +02:00
|
|
|
std::vector<PDFInteger> readIntegerArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
2019-03-07 19:57:03 +01:00
|
|
|
|
2019-02-16 18:26:16 +01:00
|
|
|
/// Reads number from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
PDFReal readNumberFromDictionary(const PDFDictionary* dictionary, const char* key, PDFReal defaultValue) const;
|
|
|
|
|
2019-09-01 14:42:32 +02:00
|
|
|
/// Reads number from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
PDFReal readNumberFromDictionary(const PDFDictionary* dictionary, const QByteArray& key, PDFReal defaultValue) const;
|
|
|
|
|
2019-02-16 18:26:16 +01:00
|
|
|
/// Reads integer from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
PDFInteger readIntegerFromDictionary(const PDFDictionary* dictionary, const char* key, PDFInteger defaultValue) const;
|
|
|
|
|
2019-06-23 18:35:32 +02:00
|
|
|
/// Reads a text string from the dictionary, if it is possible.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
QString readTextStringFromDictionary(const PDFDictionary* dictionary, const char* key, const QString& defaultValue) const;
|
|
|
|
|
|
|
|
/// Tries to read array of references from dictionary. If entry dictionary doesn't exist,
|
|
|
|
/// or error occurs, empty array is returned.
|
2020-04-18 19:01:49 +02:00
|
|
|
std::vector<PDFObjectReference> readReferenceArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
2019-06-23 18:35:32 +02:00
|
|
|
|
2019-02-23 15:44:14 +01:00
|
|
|
/// Reads number array from dictionary. Reads all values. If some value is not
|
2019-09-08 17:17:12 +02:00
|
|
|
/// real number (or integer number), default value is returned. Default value is also returned,
|
2019-02-23 15:44:14 +01:00
|
|
|
/// if \p object is invalid.
|
|
|
|
/// \param object Object containing array of numbers
|
2019-09-08 17:17:12 +02:00
|
|
|
std::vector<PDFReal> readNumberArray(const PDFObject& object, std::vector<PDFReal> defaultValue = std::vector<PDFReal>()) const;
|
2019-02-23 15:44:14 +01:00
|
|
|
|
2019-03-07 19:57:03 +01:00
|
|
|
/// Reads integer array from dictionary. Reads all values. If some value is not
|
|
|
|
/// integer number, empty array is returned. Empty array is also returned,
|
|
|
|
/// if \p object is invalid.
|
|
|
|
/// \param object Object containing array of numbers
|
|
|
|
std::vector<PDFInteger> readIntegerArray(const PDFObject& object) const;
|
|
|
|
|
2020-03-05 18:28:07 +01:00
|
|
|
/// Reads reference from dictionary. If error occurs, then invalid reference is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
PDFObjectReference readReferenceFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
|
|
|
|
|
|
|
/// Reads reference array. Reads all values. If error occurs,
|
2019-06-23 18:35:32 +02:00
|
|
|
/// then empty array is returned.
|
|
|
|
/// \param object Object containing array of references
|
|
|
|
std::vector<PDFObjectReference> readReferenceArray(const PDFObject& object) const;
|
|
|
|
|
|
|
|
/// Reads name array. Reads all values. If error occurs,
|
|
|
|
/// then empty array is returned.
|
|
|
|
/// \param object Object containing array of references
|
|
|
|
std::vector<QByteArray> readNameArray(const PDFObject& object) const;
|
|
|
|
|
2019-11-23 19:02:24 +01:00
|
|
|
/// Reads string array. Reads all values. If error occurs,
|
|
|
|
/// then empty array is returned.
|
|
|
|
/// \param object Object containing array of references
|
|
|
|
std::vector<QByteArray> readStringArray(const PDFObject& object) const;
|
|
|
|
|
2019-06-23 18:35:32 +02:00
|
|
|
/// Reads name array from dictionary. Reads all values. If error occurs,
|
|
|
|
/// then empty array is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
std::vector<QByteArray> readNameArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
|
|
|
|
2019-03-25 18:44:45 +01:00
|
|
|
/// Reads boolean from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
/// \param defaultValue Default value
|
|
|
|
bool readBooleanFromDictionary(const PDFDictionary* dictionary, const char* key, bool defaultValue) const;
|
|
|
|
|
2019-03-30 18:45:30 +01:00
|
|
|
/// Reads a name from dictionary. If dictionary entry doesn't exist, or error occurs, empty byte array is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
2020-04-18 19:01:49 +02:00
|
|
|
QByteArray readNameFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
2019-03-30 18:45:30 +01:00
|
|
|
|
2019-03-31 18:08:36 +02:00
|
|
|
/// Reads a string from dictionary. If dictionary entry doesn't exist, or error occurs, empty byte array is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
2020-04-18 19:01:49 +02:00
|
|
|
QByteArray readStringFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
2019-03-31 18:08:36 +02:00
|
|
|
|
2019-11-23 19:02:24 +01:00
|
|
|
/// Reads string array from dictionary. Reads all values. If error occurs,
|
|
|
|
/// then empty array is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
std::vector<QByteArray> readStringArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
|
|
|
|
2020-04-10 20:52:05 +02:00
|
|
|
/// Reads string list. If error occurs, empty list is returned.
|
|
|
|
QStringList readTextStringList(const PDFObject& object);
|
|
|
|
|
|
|
|
/// Reads list of object, using parse function defined in object
|
|
|
|
template<typename Object>
|
|
|
|
std::vector<Object> readObjectList(PDFObject object)
|
|
|
|
{
|
|
|
|
std::vector<Object> result;
|
|
|
|
object = m_storage->getObject(object);
|
|
|
|
if (object.isArray())
|
|
|
|
{
|
|
|
|
const PDFArray* array = object.getArray();
|
|
|
|
const size_t count = array->getCount();
|
|
|
|
result.reserve(count);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < count; ++i)
|
|
|
|
{
|
|
|
|
result.emplace_back(Object::parse(m_storage, array->getItem(i)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-04-18 19:01:49 +02:00
|
|
|
/// Reads optional string from dictionary. If key is not in dictionary,
|
|
|
|
/// then empty optional is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
std::optional<QByteArray> readOptionalStringFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
|
|
|
|
|
|
|
/// Reads optionalinteger from dictionary. If dictionary entry doesn't exist, or error occurs, empty optional is returned.
|
|
|
|
/// \param dictionary Dictionary containing desired data
|
|
|
|
/// \param key Entry key
|
|
|
|
std::optional<PDFInteger> readOptionalIntegerFromDictionary(const PDFDictionary* dictionary, const char* key) const;
|
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
private:
|
2020-03-29 18:53:04 +02:00
|
|
|
const PDFObjectStorage* m_storage;
|
2018-12-24 17:09:23 +01:00
|
|
|
};
|
|
|
|
|
2018-12-01 11:36:07 +01:00
|
|
|
/// PDF document main class.
|
2019-12-03 19:24:43 +01:00
|
|
|
class PDFFORQTLIBSHARED_EXPORT PDFDocument
|
2018-11-17 16:48:30 +01:00
|
|
|
{
|
2018-12-02 17:53:19 +01:00
|
|
|
Q_DECLARE_TR_FUNCTIONS(pdf::PDFDocument)
|
|
|
|
|
2018-11-17 16:48:30 +01:00
|
|
|
public:
|
|
|
|
explicit PDFDocument() = default;
|
2018-11-25 14:48:08 +01:00
|
|
|
|
2020-05-02 18:04:25 +02:00
|
|
|
bool operator==(const PDFDocument& other) const;
|
|
|
|
bool operator!=(const PDFDocument& other) const { return !(*this == other); }
|
|
|
|
|
2018-12-01 11:36:07 +01:00
|
|
|
const PDFObjectStorage& getStorage() const { return m_pdfObjectStorage; }
|
|
|
|
|
2019-12-21 15:02:11 +01:00
|
|
|
/// Info about the document. Title, Author, Keywords... It also stores "extra"
|
|
|
|
/// values, which are in info dictionary. They can be either strings, or date
|
|
|
|
/// time (QString or QDateTime).
|
2018-12-02 17:53:19 +01:00
|
|
|
struct Info
|
|
|
|
{
|
|
|
|
/// Indicates, that document was modified that it includes trapping information.
|
|
|
|
/// See PDF Reference 1.7, Section 10.10.5 "Trapping Support".
|
|
|
|
enum class Trapped
|
|
|
|
{
|
|
|
|
True, ///< Fully trapped
|
|
|
|
False, ///< Not yet trapped
|
|
|
|
Unknown ///< Either unknown, or it has been trapped partly, not fully
|
|
|
|
};
|
|
|
|
|
|
|
|
QString title;
|
|
|
|
QString author;
|
|
|
|
QString subject;
|
|
|
|
QString keywords;
|
|
|
|
QString creator;
|
|
|
|
QString producer;
|
|
|
|
QDateTime creationDate;
|
|
|
|
QDateTime modifiedDate;
|
|
|
|
Trapped trapped = Trapped::Unknown;
|
2019-12-21 15:02:11 +01:00
|
|
|
PDFVersion version;
|
|
|
|
std::map<QByteArray, QVariant> extra;
|
2018-12-02 17:53:19 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/// Returns info about the document (title, author, etc.)
|
|
|
|
const Info* getInfo() const { return &m_info; }
|
|
|
|
|
2018-12-14 19:41:12 +01:00
|
|
|
/// If object is reference, the dereference attempt is performed
|
|
|
|
/// and object is returned. If it is not a reference, then self
|
|
|
|
/// is returned. If dereference attempt fails, then null object
|
|
|
|
/// is returned (no exception is thrown).
|
|
|
|
const PDFObject& getObject(const PDFObject& object) const;
|
|
|
|
|
2019-11-23 19:02:24 +01:00
|
|
|
/// Returns dictionary from an object. If object is not a dictionary,
|
|
|
|
/// then nullptr is returned (no exception is thrown).
|
|
|
|
const PDFDictionary* getDictionaryFromObject(const PDFObject& object) const;
|
|
|
|
|
2019-11-17 17:41:07 +01:00
|
|
|
/// Returns object by reference. If dereference attempt fails, then null object
|
|
|
|
/// is returned (no exception is thrown).
|
|
|
|
const PDFObject& getObjectByReference(PDFObjectReference reference) const;
|
|
|
|
|
2019-01-20 17:55:06 +01:00
|
|
|
/// Returns the document catalog
|
|
|
|
const PDFCatalog* getCatalog() const { return &m_catalog; }
|
|
|
|
|
2019-02-09 18:40:56 +01:00
|
|
|
/// Returns the decoded stream. If stream data cannot be decoded,
|
|
|
|
/// then empty byte array is returned.
|
|
|
|
/// \param stream Stream to be decoded
|
|
|
|
QByteArray getDecodedStream(const PDFStream* stream) const;
|
|
|
|
|
2019-06-28 18:11:05 +02:00
|
|
|
/// Returns the trailer dictionary
|
|
|
|
const PDFDictionary* getTrailerDictionary() const;
|
|
|
|
|
2019-12-21 15:02:11 +01:00
|
|
|
/// Returns version of the PDF document. Version can be taken from catalog,
|
|
|
|
/// or from PDF file header. Version from catalog has precedence over version from
|
|
|
|
/// header.
|
|
|
|
QByteArray getVersion() const;
|
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
private:
|
2018-12-01 11:36:07 +01:00
|
|
|
friend class PDFDocumentReader;
|
2020-03-19 18:17:08 +01:00
|
|
|
friend class PDFDocumentBuilder;
|
2018-12-01 11:36:07 +01:00
|
|
|
|
2019-12-21 15:02:11 +01:00
|
|
|
explicit PDFDocument(PDFObjectStorage&& storage, PDFVersion version) :
|
2018-12-01 11:36:07 +01:00
|
|
|
m_pdfObjectStorage(std::move(storage))
|
|
|
|
{
|
2018-12-02 17:53:19 +01:00
|
|
|
init();
|
2019-12-21 15:02:11 +01:00
|
|
|
|
|
|
|
m_info.version = version;
|
2018-12-01 11:36:07 +01:00
|
|
|
}
|
|
|
|
|
2018-12-02 17:53:19 +01:00
|
|
|
/// Initialize data based on object in the storage.
|
|
|
|
/// Can throw exception if error is detected.
|
|
|
|
void init();
|
|
|
|
|
|
|
|
/// Initialize the document info from the trailer dictionary.
|
|
|
|
/// If document info is not present, then default document
|
|
|
|
/// info is used. If error is detected, exception is thrown.
|
|
|
|
void initInfo();
|
|
|
|
|
2018-11-25 14:48:08 +01:00
|
|
|
/// Storage of objects
|
|
|
|
PDFObjectStorage m_pdfObjectStorage;
|
2018-12-02 17:53:19 +01:00
|
|
|
|
|
|
|
/// Info about the PDF document
|
|
|
|
Info m_info;
|
2018-12-24 17:09:23 +01:00
|
|
|
|
|
|
|
/// Catalog object
|
|
|
|
PDFCatalog m_catalog;
|
2018-11-17 16:48:30 +01:00
|
|
|
};
|
|
|
|
|
2020-04-25 18:15:12 +02:00
|
|
|
using PDFDocumentPointer = QSharedPointer<PDFDocument>;
|
|
|
|
|
2020-04-25 14:21:06 +02:00
|
|
|
/// Helper class for document updates (for example, add/delete annotations,
|
|
|
|
/// fill form fields, do some other minor operations) and also for major
|
|
|
|
/// updates (document reset). It also contains modification flags, which are
|
|
|
|
/// hints for update operations (for example, if only annotations are changed,
|
|
|
|
/// then rebuilding outline tree is not needed). At least one of the flags
|
|
|
|
/// must be set. Reset flag is conservative, so it should be set, when document
|
|
|
|
/// changes are not known.
|
|
|
|
class PDFModifiedDocument
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
enum ModificationFlag
|
|
|
|
{
|
|
|
|
None = 0x0000, ///< No flag
|
|
|
|
Reset = 0x0001, ///< Whole document content is changed (for example, new document is being set)
|
|
|
|
Annotation = 0x0002, ///< Annotations changed
|
|
|
|
FormField = 0x0004, ///< Form field content changed
|
|
|
|
};
|
|
|
|
|
|
|
|
Q_DECLARE_FLAGS(ModificationFlags, ModificationFlag)
|
|
|
|
|
|
|
|
explicit inline PDFModifiedDocument() = default;
|
|
|
|
explicit inline PDFModifiedDocument(PDFDocument* document, PDFOptionalContentActivity* optionalContentActivity) :
|
|
|
|
m_document(document),
|
|
|
|
m_optionalContentActivity(optionalContentActivity),
|
|
|
|
m_flags(Reset)
|
|
|
|
{
|
2020-05-03 18:52:22 +02:00
|
|
|
Q_ASSERT(m_document);
|
|
|
|
}
|
|
|
|
|
|
|
|
explicit inline PDFModifiedDocument(PDFDocumentPointer document, PDFOptionalContentActivity* optionalContentActivity) :
|
|
|
|
m_documentPointer(qMove(document)),
|
|
|
|
m_document(m_documentPointer.data()),
|
|
|
|
m_optionalContentActivity(optionalContentActivity),
|
|
|
|
m_flags(Reset)
|
|
|
|
{
|
|
|
|
Q_ASSERT(m_document);
|
2020-04-25 14:21:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
explicit inline PDFModifiedDocument(PDFDocument* document, PDFOptionalContentActivity* optionalContentActivity, ModificationFlags flags) :
|
|
|
|
m_document(document),
|
|
|
|
m_optionalContentActivity(optionalContentActivity),
|
|
|
|
m_flags(flags)
|
|
|
|
{
|
2020-05-03 18:52:22 +02:00
|
|
|
Q_ASSERT(m_document);
|
|
|
|
}
|
|
|
|
|
|
|
|
explicit inline PDFModifiedDocument(PDFDocumentPointer document, PDFOptionalContentActivity* optionalContentActivity, ModificationFlags flags) :
|
|
|
|
m_documentPointer(qMove(document)),
|
|
|
|
m_document(m_documentPointer.data()),
|
|
|
|
m_optionalContentActivity(optionalContentActivity),
|
|
|
|
m_flags(flags)
|
|
|
|
{
|
|
|
|
Q_ASSERT(m_document);
|
2020-04-25 14:21:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
PDFDocument* getDocument() const { return m_document; }
|
|
|
|
PDFOptionalContentActivity* getOptionalContentActivity() const { return m_optionalContentActivity; }
|
|
|
|
void setOptionalContentActivity(PDFOptionalContentActivity* optionalContentActivity) { m_optionalContentActivity = optionalContentActivity; }
|
2020-05-03 18:52:22 +02:00
|
|
|
ModificationFlags getFlags() const { return m_flags; }
|
2020-04-25 14:21:06 +02:00
|
|
|
|
|
|
|
bool hasReset() const { return m_flags.testFlag(Reset); }
|
|
|
|
bool hasFlag(ModificationFlag flag) const { return m_flags.testFlag(flag); }
|
|
|
|
|
|
|
|
operator PDFDocument*() const { return m_document; }
|
2020-05-03 18:52:22 +02:00
|
|
|
operator PDFDocumentPointer() const { return m_documentPointer; }
|
2020-04-25 14:21:06 +02:00
|
|
|
|
|
|
|
private:
|
2020-05-03 18:52:22 +02:00
|
|
|
PDFDocumentPointer m_documentPointer;
|
2020-04-25 14:21:06 +02:00
|
|
|
PDFDocument* m_document = nullptr;
|
|
|
|
PDFOptionalContentActivity* m_optionalContentActivity = nullptr;
|
|
|
|
ModificationFlags m_flags = Reset;
|
|
|
|
};
|
|
|
|
|
2018-12-24 17:09:23 +01:00
|
|
|
// Implementation
|
|
|
|
|
2018-12-02 17:53:19 +01:00
|
|
|
inline
|
|
|
|
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
|
|
|
|
{
|
|
|
|
if (object.isReference())
|
|
|
|
{
|
|
|
|
// Try to dereference the object
|
|
|
|
return m_pdfObjectStorage.getObject(object.getReference());
|
|
|
|
}
|
|
|
|
|
|
|
|
return object;
|
|
|
|
}
|
|
|
|
|
2019-11-23 19:02:24 +01:00
|
|
|
inline
|
|
|
|
const PDFDictionary* PDFDocument::getDictionaryFromObject(const PDFObject& object) const
|
|
|
|
{
|
|
|
|
const PDFObject& dereferencedObject = getObject(object);
|
|
|
|
if (dereferencedObject.isDictionary())
|
|
|
|
{
|
|
|
|
return dereferencedObject.getDictionary();
|
|
|
|
}
|
|
|
|
else if (dereferencedObject.isStream())
|
|
|
|
{
|
|
|
|
return dereferencedObject.getStream()->getDictionary();
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2019-11-17 17:41:07 +01:00
|
|
|
inline
|
|
|
|
const PDFObject& PDFDocument::getObjectByReference(PDFObjectReference reference) const
|
|
|
|
{
|
|
|
|
return m_pdfObjectStorage.getObject(reference);
|
|
|
|
}
|
|
|
|
|
2020-03-29 18:53:04 +02:00
|
|
|
inline
|
|
|
|
const PDFObject& PDFObjectStorage::getObject(const PDFObject& object) const
|
|
|
|
{
|
|
|
|
if (object.isReference())
|
|
|
|
{
|
|
|
|
// Try to dereference the object
|
|
|
|
return getObject(object.getReference());
|
|
|
|
}
|
|
|
|
|
|
|
|
return object;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline
|
|
|
|
const PDFDictionary* PDFObjectStorage::getDictionaryFromObject(const PDFObject& object) const
|
|
|
|
{
|
|
|
|
const PDFObject& dereferencedObject = getObject(object);
|
|
|
|
if (dereferencedObject.isDictionary())
|
|
|
|
{
|
|
|
|
return dereferencedObject.getDictionary();
|
|
|
|
}
|
|
|
|
else if (dereferencedObject.isStream())
|
|
|
|
{
|
|
|
|
return dereferencedObject.getStream()->getDictionary();
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline
|
|
|
|
const PDFObject& PDFObjectStorage::getObjectByReference(PDFObjectReference reference) const
|
|
|
|
{
|
|
|
|
return getObject(reference);
|
|
|
|
}
|
|
|
|
|
2018-11-17 16:48:30 +01:00
|
|
|
} // namespace pdf
|
|
|
|
|
|
|
|
#endif // PDFDOCUMENT_H
|