// Copyright (C) 2018-2023 Jakub Melka // // This file is part of PDF4QT. // // PDF4QT is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // with the written consent of the copyright owner, any later version. // // PDF4QT is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with PDF4QT. If not, see . #include "pdfdocument.h" #include "pdfencoding.h" #include "pdfexception.h" #include "pdfstreamfilters.h" #include "pdfconstants.h" #include "pdfdbgheap.h" namespace pdf { static constexpr const char* PDF_DOCUMENT_INFO_ENTRY = "Info"; QByteArray PDFObjectStorage::getDecodedStream(const PDFStream* stream) const { return PDFStreamFilterStorage::getDecodedStream(stream, std::bind(QOverload::of(&PDFObjectStorage::getObject), this, std::placeholders::_1), getSecurityHandler()); } PDFDocument::~PDFDocument() { } bool PDFDocument::operator==(const PDFDocument& other) const { // Document is considered equal, if storage is equal return m_pdfObjectStorage == other.m_pdfObjectStorage; } QByteArray PDFDocument::getIdPart(size_t index) const { QByteArray id; const PDFObject& idArrayObject = getTrailerDictionary()->get("ID"); if (idArrayObject.isArray()) { const PDFArray* idArray = idArrayObject.getArray(); if (idArray->getCount() > index) { const PDFObject& idArrayItem = idArray->getItem(index); if (idArrayItem.isString()) { id = idArrayItem.getString(); } } } return id; } QByteArray PDFDocument::getDecodedStream(const PDFStream* stream) const { return m_pdfObjectStorage.getDecodedStream(stream); } const PDFDictionary* PDFDocument::getTrailerDictionary() const { const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary(); // Trailer object should be dictionary/stream here. It is verified in the document reader. Q_ASSERT(trailerDictionary.isDictionary() || trailerDictionary.isStream()); if (trailerDictionary.isDictionary()) { return trailerDictionary.getDictionary(); } else if (trailerDictionary.isStream()) { return trailerDictionary.getStream()->getDictionary(); } return nullptr; } QByteArray PDFDocument::getVersion() const { QByteArray result = m_catalog.getVersion(); if (result.isEmpty() && m_info.version.isValid()) { result = QString("%1.%2").arg(m_info.version.major).arg(m_info.version.minor).toLatin1(); } return result; } void PDFDocument::init() { initInfo(); const PDFDictionary* dictionary = getTrailerDictionary(); Q_ASSERT(dictionary); m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this); } void PDFDocument::initInfo() { // Trailer object should be dictionary here. It is verified in the document reader. const PDFDictionary* dictionary = getTrailerDictionary(); Q_ASSERT(dictionary); if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY)) { m_info = PDFDocumentInfo::parse(dictionary->get(PDF_DOCUMENT_INFO_ENTRY), &m_pdfObjectStorage); } } bool PDFObjectStorage::operator==(const PDFObjectStorage& other) const { // We compare just content. Security handler just defines encryption behavior. return m_objects == other.m_objects && m_trailerDictionary == other.m_trailerDictionary; } const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const { if (reference.objectNumber >= 0 && reference.objectNumber < static_cast(m_objects.size()) && m_objects[reference.objectNumber].generation == reference.generation) { return m_objects[reference.objectNumber].object; } else { static const PDFObject dummy; return dummy; } } PDFObjectReference PDFObjectStorage::addObject(PDFObject object) { PDFObjectReference reference(m_objects.size(), 0); m_objects.emplace_back(0, qMove(object)); return reference; } void PDFObjectStorage::setObject(PDFObjectReference reference, PDFObject object) { m_objects[reference.objectNumber] = Entry(reference.generation, qMove(object)); } void PDFObjectStorage::updateTrailerDictionary(PDFObject trailerDictionary) { m_trailerDictionary = PDFObjectManipulator::merge(m_trailerDictionary, trailerDictionary, PDFObjectManipulator::RemoveNullObjects); } PDFDocumentDataLoaderDecorator::PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_storage(&document->getStorage()) { } QByteArray PDFDocumentDataLoaderDecorator::readName(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isName()) { return dereferencedObject.getString(); } return QByteArray(); } QByteArray PDFDocumentDataLoaderDecorator::readString(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isString()) { return dereferencedObject.getString(); } return QByteArray(); } PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isInt()) { return dereferencedObject.getInteger(); } return defaultValue; } PDFReal PDFDocumentDataLoaderDecorator::readNumber(const PDFObject& object, PDFReal defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isReal()) { return dereferencedObject.getReal(); } else if (dereferencedObject.isInt()) { return dereferencedObject.getInteger(); } return defaultValue; } bool PDFDocumentDataLoaderDecorator::readBoolean(const PDFObject& object, bool defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isBool()) { return dereferencedObject.getBool(); } return defaultValue; } QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object, const QString& defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isString()) { return PDFEncoding::convertTextString(dereferencedObject.getString()); } return defaultValue; } QRectF PDFDocumentDataLoaderDecorator::readRectangle(const PDFObject& object, const QRectF& defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); if (array->getCount() == 4) { std::array items; for (size_t i = 0; i < 4; ++i) { const PDFObject& currentObject = m_storage->getObject(array->getItem(i)); if (currentObject.isReal()) { items[i] = currentObject.getReal(); } else if (currentObject.isInt()) { items[i] = currentObject.getInteger(); } else { return defaultValue; } } const PDFReal xMin = qMin(items[0], items[2]); const PDFReal xMax = qMax(items[0], items[2]); const PDFReal yMin = qMin(items[1], items[3]); const PDFReal yMax = qMax(items[1], items[3]); return QRectF(xMin, yMin, xMax - xMin, yMax - yMin); } } return defaultValue; } QTransform PDFDocumentDataLoaderDecorator::readMatrixFromDictionary(const PDFDictionary* dictionary, const char* key, QTransform defaultValue) const { if (dictionary->hasKey(key)) { std::vector matrixNumbers = readNumberArrayFromDictionary(dictionary, key); if (matrixNumbers.size() != 6) { throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid number of matrix elements. Expected 6, actual %1.").arg(matrixNumbers.size())); } return QTransform(matrixNumbers[0], matrixNumbers[1], matrixNumbers[2], matrixNumbers[3], matrixNumbers[4], matrixNumbers[5]); } return defaultValue; } std::vector PDFDocumentDataLoaderDecorator::readNumberArrayFromDictionary(const PDFDictionary* dictionary, const char* key, std::vector defaultValue) const { if (dictionary->hasKey(key)) { return readNumberArray(dictionary->get(key), defaultValue); } return defaultValue; } std::vector PDFDocumentDataLoaderDecorator::readIntegerArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readIntegerArray(dictionary->get(key)); } return std::vector(); } PDFReal PDFDocumentDataLoaderDecorator::readNumberFromDictionary(const PDFDictionary* dictionary, const char* key, PDFReal defaultValue) const { if (dictionary->hasKey(key)) { return readNumber(dictionary->get(key), defaultValue); } return defaultValue; } PDFReal PDFDocumentDataLoaderDecorator::readNumberFromDictionary(const PDFDictionary* dictionary, const QByteArray& key, PDFReal defaultValue) const { if (dictionary->hasKey(key)) { return readNumber(dictionary->get(key), defaultValue); } return defaultValue; } PDFInteger PDFDocumentDataLoaderDecorator::readIntegerFromDictionary(const PDFDictionary* dictionary, const char* key, PDFInteger defaultValue) const { if (dictionary->hasKey(key)) { return readInteger(dictionary->get(key), defaultValue); } return defaultValue; } QString PDFDocumentDataLoaderDecorator::readTextStringFromDictionary(const PDFDictionary* dictionary, const char* key, const QString& defaultValue) const { if (dictionary->hasKey(key)) { return readTextString(dictionary->get(key), defaultValue); } return defaultValue; } std::vector PDFDocumentDataLoaderDecorator::readReferenceArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readReferenceArray(dictionary->get(key)); } return std::vector(); } std::vector PDFDocumentDataLoaderDecorator::readNumberArray(const PDFObject& object, std::vector defaultValue) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); std::vector result; const size_t count = array->getCount(); result.reserve(count); for (size_t i = 0; i < count; ++i) { const PDFReal number = readNumber(array->getItem(i), std::numeric_limits::quiet_NaN()); if (std::isnan(number)) { return defaultValue; } result.push_back(number); } // We assume, that RVO (return value optimization) will work return result; } return defaultValue; } std::vector PDFDocumentDataLoaderDecorator::readIntegerArray(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); std::vector result; const size_t count = array->getCount(); result.reserve(count); for (size_t i = 0; i < count; ++i) { // This value is not representable in the current PDF parser. So we assume we // can't get this value. constexpr const PDFInteger INVALID_VALUE = std::numeric_limits::max(); const PDFInteger number = readInteger(array->getItem(i), INVALID_VALUE); if (number == INVALID_VALUE) { return std::vector(); } result.push_back(number); } // We assume, that RVO (return value optimization) will work return result; } return std::vector(); } PDFObjectReference PDFDocumentDataLoaderDecorator::readReference(const PDFObject& object) const { if (object.isReference()) { return object.getReference(); } return PDFObjectReference(); } PDFObjectReference PDFDocumentDataLoaderDecorator::readReferenceFromDictionary(const PDFDictionary* dictionary, const char* key) const { const PDFObject& object = dictionary->get(key); if (object.isReference()) { return object.getReference(); } return PDFObjectReference(); } std::vector PDFDocumentDataLoaderDecorator::readReferenceArray(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); std::vector result; const size_t count = array->getCount(); result.reserve(count); for (size_t i = 0; i < count; ++i) { const PDFObject& referenceObject = array->getItem(i); if (referenceObject.isReference()) { result.push_back(referenceObject.getReference()); } else { result.clear(); break; } } // We assume, that RVO (return value optimization) will work return result; } return std::vector(); } std::vector PDFDocumentDataLoaderDecorator::readNameArray(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); std::vector result; const size_t count = array->getCount(); result.reserve(count); for (size_t i = 0; i < count; ++i) { const PDFObject& nameObject = array->getItem(i); if (nameObject.isName()) { result.push_back(nameObject.getString()); } else { result.clear(); break; } } // We assume, that RVO (return value optimization) will work return result; } return std::vector(); } std::vector PDFDocumentDataLoaderDecorator::readNameArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readNameArray(dictionary->get(key)); } return std::vector(); } bool PDFDocumentDataLoaderDecorator::readBooleanFromDictionary(const PDFDictionary* dictionary, const char* key, bool defaultValue) const { if (dictionary->hasKey(key)) { return readBoolean(dictionary->get(key), defaultValue); } return defaultValue; } QByteArray PDFDocumentDataLoaderDecorator::readNameFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readName(dictionary->get(key)); } return QByteArray(); } QByteArray PDFDocumentDataLoaderDecorator::readStringFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readString(dictionary->get(key)); } return QByteArray(); } std::vector PDFDocumentDataLoaderDecorator::readStringArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readStringArray(dictionary->get(key)); } return std::vector(); } QStringList PDFDocumentDataLoaderDecorator::readTextStringList(const PDFObject& object) { QStringList result; const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); const size_t count = array->getCount(); result.reserve(int(count)); for (size_t i = 0; i < count; ++i) { result << readTextString(array->getItem(i), QString()); } } return result; } QColor PDFDocumentDataLoaderDecorator::readRGBColorFromDictionary(const PDFDictionary* dictionary, const char* key, QColor defaultColor) { std::vector colors = readNumberArrayFromDictionary(dictionary, key); if (colors.size() == 3) { const PDFReal red = qBound(0.0, colors[0], 1.0); const PDFReal green = qBound(0.0, colors[1], 1.0); const PDFReal blue = qBound(0.0, colors[2], 1.0); return QColor::fromRgbF(red, green, blue); } return defaultColor; } std::optional PDFDocumentDataLoaderDecorator::readOptionalStringFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { return readStringFromDictionary(dictionary, key); } return std::nullopt; } std::optional PDFDocumentDataLoaderDecorator::readOptionalIntegerFromDictionary(const PDFDictionary* dictionary, const char* key) const { if (dictionary->hasKey(key)) { PDFInteger integer = readIntegerFromDictionary(dictionary, key, std::numeric_limits::max()); if (integer != std::numeric_limits::max()) { return integer; } } return std::nullopt; } std::vector PDFDocumentDataLoaderDecorator::readStringArray(const PDFObject& object) const { const PDFObject& dereferencedObject = m_storage->getObject(object); if (dereferencedObject.isArray()) { const PDFArray* array = dereferencedObject.getArray(); std::vector result; const size_t count = array->getCount(); result.reserve(count); for (size_t i = 0; i < count; ++i) { const PDFObject& stringObject = array->getItem(i); if (stringObject.isString()) { result.push_back(stringObject.getString()); } else { result.clear(); break; } } // We assume, that RVO (return value optimization) will work return result; } return std::vector(); } const PDFObject& PDFObjectStorage::getObject(const PDFObject& object) const { if (object.isReference()) { // Try to dereference the object return getObject(object.getReference()); } return object; } } // namespace pdf