PDF4QT/PdfForQtLib/sources/pdfobject.h

439 lines
15 KiB
C
Raw Normal View History

2020-01-18 11:38:54 +01:00
// Copyright (C) 2018-2020 Jakub Melka
2018-11-17 16:48:30 +01:00
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFOBJECT_H
#define PDFOBJECT_H
#include "pdfglobal.h"
#include <QByteArray>
#include <memory>
#include <vector>
#include <variant>
#include <array>
2018-12-01 11:36:07 +01:00
#include <initializer_list>
2018-11-17 16:48:30 +01:00
namespace pdf
{
2018-12-01 11:36:07 +01:00
class PDFArray;
class PDFString;
class PDFStream;
2018-11-21 19:30:15 +01:00
class PDFDictionary;
2018-12-01 11:36:07 +01:00
class PDFAbstractVisitor;
2018-11-17 16:48:30 +01:00
/// This class represents a content of the PDF object. It can be
/// array of objects, dictionary, content stream data, or string data.
class PDFObjectContent
{
public:
constexpr PDFObjectContent() = default;
virtual ~PDFObjectContent() = default;
/// Equals operator. Returns true, if content of this object is
/// equal to the content of the other object.
virtual bool equals(const PDFObjectContent* other) const = 0;
2018-12-01 12:36:25 +01:00
/// Optimizes memory consumption of this object
virtual void optimize() = 0;
2018-11-17 16:48:30 +01:00
};
/// This class represents inplace string in the PDF object. To avoid too much
/// memory allocation, we store small strings inplace as small objects, so
/// we do not use memory allocator, so this doesn't cause performance downgrade.
/// Very often, PDF document consists of large number of names and strings
/// objects, which will fit into this category.
struct PDFInplaceString
{
static constexpr const int MAX_STRING_SIZE = sizeof(PDFObjectReference) - 1;
constexpr PDFInplaceString() = default;
inline PDFInplaceString(const QByteArray& data)
{
Q_ASSERT(data.size() <= MAX_STRING_SIZE);
size = static_cast<uint8_t>(data.size());
std::copy(data.cbegin(), data.cend(), string.data());
}
inline bool operator==(const PDFInplaceString& other) const
{
if (size != other.size)
{
return false;
}
for (uint8_t i = 0; i < size; ++i)
{
if (string[i] != other.string[i])
{
return false;
}
}
return true;
}
inline bool operator !=(const PDFInplaceString& other) const
{
return !(*this == other);
}
QByteArray getString() const
{
return (size > 0) ? QByteArray(string.data(), size) : QByteArray();
}
uint8_t size = 0;
std::array<char, MAX_STRING_SIZE> string = { };
};
/// Reference to the string implementations
struct PDFStringRef
{
const PDFInplaceString* inplaceString = nullptr;
const PDFString* memoryString = nullptr;
QByteArray getString() const;
};
2018-11-17 16:48:30 +01:00
class PDFFORQTLIBSHARED_EXPORT PDFObject
{
public:
2020-03-08 17:06:43 +01:00
enum class Type : uint8_t
2018-11-17 16:48:30 +01:00
{
// Simple PDF objects
Null,
Bool,
Int,
Real,
String,
Name,
// Complex PDF objects
Array,
Dictionary,
Stream,
Reference
};
2018-12-01 11:36:07 +01:00
static std::vector<Type> getTypes() { return { Type::Null, Type::Bool, Type::Int, Type::Real, Type::String, Type::Name, Type::Array, Type::Dictionary, Type::Stream, Type::Reference }; }
2018-11-17 16:48:30 +01:00
typedef std::shared_ptr<PDFObjectContent> PDFObjectContentPointer;
// Default constructor should be constexpr
constexpr inline PDFObject() :
m_type(Type::Null),
m_data()
{
}
// Default destructor should be OK
inline ~PDFObject() = default;
// Enforce default copy constructor and default move constructor
constexpr inline PDFObject(const PDFObject&) = default;
constexpr inline PDFObject(PDFObject&&) = default;
// Enforce default copy assignment operator and move assignment operator
constexpr inline PDFObject& operator=(const PDFObject&) = default;
constexpr inline PDFObject& operator=(PDFObject&&) = default;
2020-03-21 16:36:27 +01:00
inline Type getType() const { return m_type; }
2018-11-17 16:48:30 +01:00
// Test operators
inline bool isNull() const { return m_type == Type::Null; }
inline bool isBool() const { return m_type == Type::Bool; }
inline bool isInt() const { return m_type == Type::Int; }
inline bool isReal() const { return m_type == Type::Real; }
inline bool isString() const { return m_type == Type::String; }
inline bool isName() const { return m_type == Type::Name; }
inline bool isArray() const { return m_type == Type::Array; }
inline bool isDictionary() const { return m_type == Type::Dictionary; }
inline bool isStream() const { return m_type == Type::Stream; }
inline bool isReference() const { return m_type == Type::Reference; }
2018-12-01 11:36:07 +01:00
inline bool getBool() const { return std::get<bool>(m_data); }
2018-11-17 16:48:30 +01:00
inline PDFInteger getInteger() const { return std::get<PDFInteger>(m_data); }
2018-12-01 11:36:07 +01:00
inline PDFReal getReal() const { return std::get<PDFReal>(m_data); }
2018-11-17 16:48:30 +01:00
QByteArray getString() const;
2018-11-21 19:30:15 +01:00
const PDFDictionary* getDictionary() const;
2018-11-25 14:48:08 +01:00
PDFObjectReference getReference() const { return std::get<PDFObjectReference>(m_data); }
PDFStringRef getStringObject() const;
2018-12-01 11:36:07 +01:00
const PDFStream* getStream() const;
const PDFArray* getArray() const;
2018-11-17 16:48:30 +01:00
bool operator==(const PDFObject& other) const;
bool operator!=(const PDFObject& other) const { return !(*this == other); }
2018-12-01 11:36:07 +01:00
/// Accepts the visitor
void accept(PDFAbstractVisitor* visitor) const;
2018-11-17 16:48:30 +01:00
/// Creates a null object
static inline PDFObject createNull() { return PDFObject(); }
/// Creates a boolean object
static inline PDFObject createBool(bool value) { return PDFObject(Type::Bool, value); }
/// Creates an integer object
static inline PDFObject createInteger(PDFInteger value) { return PDFObject(Type::Int, value); }
/// Creates an object with real number
static inline PDFObject createReal(PDFReal value) { return PDFObject(Type::Real, value); }
/// Creates a reference object
static inline PDFObject createReference(const PDFObjectReference& reference) { return PDFObject(Type::Reference, reference); }
/// Creates an array object
2018-12-01 12:36:25 +01:00
static inline PDFObject createArray(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Array, std::move(value)); }
2018-11-17 16:48:30 +01:00
/// Creates a dictionary object
2018-12-01 12:36:25 +01:00
static inline PDFObject createDictionary(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Dictionary, std::move(value)); }
2018-11-17 16:48:30 +01:00
/// Creates a stream object
2018-12-01 12:36:25 +01:00
static inline PDFObject createStream(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Stream, std::move(value)); }
2018-11-17 16:48:30 +01:00
/// Creates a name object
static PDFObject createName(QByteArray name);
/// Creates a string object
static PDFObject createString(QByteArray name);
/// Creates a name object
static PDFObject createName(PDFStringRef name);
/// Creates a string object
static PDFObject createString(PDFStringRef name);
2018-11-17 16:48:30 +01:00
private:
template<typename T>
constexpr inline PDFObject(Type type, T&& value) :
m_data(std::forward<T>(value)),
m_type(type)
2018-11-17 16:48:30 +01:00
{
}
std::variant<typename std::monostate, bool, PDFInteger, PDFReal, PDFObjectReference, PDFObjectContentPointer, PDFInplaceString> m_data;
2018-11-17 16:48:30 +01:00
Type m_type;
};
/// Represents raw string in the PDF file. No conversions are performed, this is
2019-07-02 16:20:12 +02:00
/// reason, that we do not use QString, but QByteArray instead.
2018-11-17 16:48:30 +01:00
class PDFString : public PDFObjectContent
{
public:
inline explicit PDFString() = default;
inline explicit PDFString(QByteArray&& value) :
m_string(std::move(value))
{
}
virtual ~PDFString() override = default;
virtual bool equals(const PDFObjectContent* other) const override;
2018-12-01 11:36:07 +01:00
const QByteArray& getString() const { return m_string; }
2018-11-17 16:48:30 +01:00
void setString(const QByteArray &getString);
2018-12-01 12:36:25 +01:00
/// Optimizes the string for memory consumption
virtual void optimize() override;
2018-11-17 16:48:30 +01:00
private:
QByteArray m_string;
};
/// Represents an array of objects in the PDF file.
class PDFArray : public PDFObjectContent
{
public:
inline constexpr PDFArray() = default;
2019-08-13 11:45:36 +02:00
inline PDFArray(std::vector<PDFObject>&& objects) : m_objects(qMove(objects)) { }
2018-11-17 16:48:30 +01:00
virtual ~PDFArray() override = default;
virtual bool equals(const PDFObjectContent* other) const override;
/// Returns item at the specified index. If index is invalid,
/// then it throws an exception.
const PDFObject& getItem(size_t index) const { return m_objects.at(index); }
/// Returns size of the array (number of elements)
size_t getCount() const { return m_objects.size(); }
2018-12-01 11:36:07 +01:00
/// Returns capacity of the array (theoretical number of elements before reallocation)
size_t getCapacity() const { return m_objects.capacity(); }
2018-11-17 16:48:30 +01:00
/// Appends object to the end of object list
void appendItem(PDFObject object);
2018-12-01 12:36:25 +01:00
/// Optimizes the array for memory consumption
virtual void optimize() override;
2018-11-17 16:48:30 +01:00
private:
std::vector<PDFObject> m_objects;
};
/// Represents a dictionary of objects in the PDF file. Dictionary is
/// an array of pairs key-value, where key is name object and value is any
/// PDF object. For this reason, we use QByteArray for key. We do not use
/// map, because dictionaries are usually small.
2019-12-21 18:10:54 +01:00
class PDFFORQTLIBSHARED_EXPORT PDFDictionary : public PDFObjectContent
2018-11-17 16:48:30 +01:00
{
2019-08-13 11:45:36 +02:00
public:
2018-11-17 16:48:30 +01:00
using DictionaryEntry = std::pair<QByteArray, PDFObject>;
inline constexpr PDFDictionary() = default;
2019-08-13 11:45:36 +02:00
inline PDFDictionary(std::vector<DictionaryEntry>&& dictionary) : m_dictionary(qMove(dictionary)) { }
2018-11-17 16:48:30 +01:00
virtual ~PDFDictionary() override = default;
virtual bool equals(const PDFObjectContent* other) const override;
/// Returns object for the key. If key is not found in the dictionary,
/// then valid reference to the null object is returned.
/// \param key Key
const PDFObject& get(const QByteArray& key) const;
/// Returns object for the key. If key is not found in the dictionary,
/// then valid reference to the null object is returned.
/// \param key Key
const PDFObject& get(const char* key) const;
/// Returns true, if dictionary contains a particular key
/// \param key Key to be found in the dictionary
bool hasKey(const QByteArray& key) const { return find(key) != m_dictionary.cend(); }
/// Returns true, if dictionary contains a particular key
/// \param key Key to be found in the dictionary
bool hasKey(const char* key) const { return find(key) != m_dictionary.cend(); }
/// Adds a new entry to the dictionary.
/// \param key Key
/// \param value Value
void addEntry(QByteArray&& key, PDFObject&& value) { m_dictionary.emplace_back(std::move(key), std::move(value)); }
2020-03-21 16:36:27 +01:00
/// Sets entry value. If entry with given key doesn't exist,
/// then it is created.
/// \param key Key
/// \param value Value
void setEntry(const QByteArray& key, PDFObject&& value);
2018-12-01 11:36:07 +01:00
/// Returns count of items in the dictionary
size_t getCount() const { return m_dictionary.size(); }
/// Returns capacity of items in the dictionary
size_t getCapacity() const { return m_dictionary.capacity(); }
/// Returns n-th key of the dictionary
/// \param index Zero-based index of key in the dictionary
const QByteArray& getKey(size_t index) const { return m_dictionary[index].first; }
/// Returns n-th value of the dictionary
/// \param index Zero-based index of value in the dictionary
const PDFObject& getValue(size_t index) const { return m_dictionary[index].second; }
2020-03-21 16:36:27 +01:00
/// Removes null objects from dictionary
void removeNullObjects();
2018-12-01 12:36:25 +01:00
/// Optimizes the dictionary for memory consumption
virtual void optimize() override;
2018-11-17 16:48:30 +01:00
private:
/// Finds an item in the dictionary array, if the item is not in the dictionary,
/// then end iterator is returned.
/// \param key Key to be found
std::vector<DictionaryEntry>::const_iterator find(const QByteArray& key) const;
2020-03-21 16:36:27 +01:00
/// Finds an item in the dictionary array, if the item is not in the dictionary,
/// then end iterator is returned.
/// \param key Key to be found
std::vector<DictionaryEntry>::iterator find(const QByteArray& key);
2018-11-17 16:48:30 +01:00
/// Finds an item in the dictionary array, if the item is not in the dictionary,
/// then end iterator is returned.
/// \param key Key to be found
std::vector<DictionaryEntry>::const_iterator find(const char* key) const;
std::vector<DictionaryEntry> m_dictionary;
};
/// Represents a stream object in the PDF file. Stream consists of dictionary
/// and stream content - byte array.
class PDFStream : public PDFObjectContent
{
public:
inline explicit constexpr PDFStream() = default;
inline explicit PDFStream(PDFDictionary&& dictionary, QByteArray&& content) :
m_dictionary(std::move(dictionary)),
m_content(std::move(content))
{
}
virtual ~PDFStream() override = default;
virtual bool equals(const PDFObjectContent* other) const override;
/// Returns dictionary for this content stream
const PDFDictionary* getDictionary() const { return &m_dictionary; }
2018-12-01 12:36:25 +01:00
/// Optimizes the stream for memory consumption
virtual void optimize() override { m_dictionary.optimize(); m_content.shrink_to_fit(); }
/// Returns content of the stream
const QByteArray* getContent() const { return &m_content; }
2018-11-17 16:48:30 +01:00
private:
PDFDictionary m_dictionary;
QByteArray m_content;
};
2020-03-21 16:36:27 +01:00
class PDFObjectManipulator
{
public:
explicit PDFObjectManipulator() = delete;
enum MergeFlag
{
2020-06-05 19:16:45 +02:00
NoFlag = 0x0000,
2020-03-21 16:36:27 +01:00
RemoveNullObjects = 0x0001, ///< Remove null object from dictionaries
ConcatenateArrays = 0x0002, ///< Concatenate arrays instead of replace
};
Q_DECLARE_FLAGS(MergeFlags, MergeFlag)
/// Merges two objects. If object type is different, then object from right is used.
/// If both objects are dictionaries, then their content is merged, object \p right
/// has precedence over object \p left. If both objects are arrays, and concatenating
/// flag is turned on, then they are concatenated instead of replacing left array
/// by right array. If remove null objects flag is turend on, then null objects
/// are removed from dictionaries.
/// \param left Left, 'slave' object
/// \param right Right 'master' object, has priority over left
/// \param flags Merge flags
static PDFObject merge(PDFObject left, PDFObject right, MergeFlags flags);
/// Remove null objects from all dictionaries
/// \param object Object
static PDFObject removeNullObjects(PDFObject object);
};
2018-11-17 16:48:30 +01:00
} // namespace pdf
#endif // PDFOBJECT_H