Optional content - first part

This commit is contained in:
Jakub Melka 2019-06-23 18:35:32 +02:00
parent a429052002
commit 48f4a24923
9 changed files with 454 additions and 4 deletions

View File

@ -37,6 +37,7 @@ DESTDIR = $$OUT_PWD/..
SOURCES += \
sources/pdfobject.cpp \
sources/pdfoptionalcontent.cpp \
sources/pdfparser.cpp \
sources/pdfdocument.cpp \
sources/pdfdocumentreader.cpp \
@ -61,6 +62,7 @@ SOURCES += \
HEADERS += \
sources/pdfobject.h \
sources/pdfoptionalcontent.h \
sources/pdfparser.h \
sources/pdfglobal.h \
sources/pdfconstants.h \

View File

@ -56,6 +56,12 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
catalogObject.m_pages = PDFPage::parse(document, catalogDictionary->get("Pages"));
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
if (catalogDictionary->hasKey("OCProperties"))
{
catalogObject.m_optionalContentProperties = PDFOptionalContentProperties::create(document, catalogDictionary->get("OCProperties"));
}
return catalogObject;
}

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018 Jakub Melka
// Copyright (C) 2018-2019 Jakub Melka
//
// This file is part of PdfForQt.
//
@ -20,6 +20,7 @@
#include "pdfobject.h"
#include "pdfpage.h"
#include "pdfoptionalcontent.h"
#include <QtCore>
@ -202,6 +203,9 @@ public:
/// Returns the page
const PDFPage* getPage(size_t index) const { return &m_pages.at(index); }
/// Return optional content properties
const PDFOptionalContentProperties* getOptionalContentProperties() const { return &m_optionalContentProperties; }
/// Parses catalog from catalog dictionary. If object cannot be parsed, or error occurs,
/// then exception is thrown.
static PDFCatalog parse(const PDFObject& catalog, const PDFDocument* document);
@ -210,6 +214,7 @@ private:
PDFViewerPreferences m_viewerPreferences;
std::vector<PDFPage> m_pages;
std::vector<PDFPageLabel> m_pageLabels;
PDFOptionalContentProperties m_optionalContentProperties;
};
} // namespace pdf

View File

@ -411,6 +411,26 @@ PDFInteger PDFDocumentDataLoaderDecorator::readIntegerFromDictionary(const PDFDi
return defaultValue;
}
QString PDFDocumentDataLoaderDecorator::readTextStringFromDictionary(const PDFDictionary* dictionary, const char* key, const QString& defaultValue) const
{
if (dictionary->hasKey(key))
{
return readTextString(dictionary->get(key), defaultValue);
}
return defaultValue;
}
std::vector<PDFObjectReference> PDFDocumentDataLoaderDecorator::readReferenceArrayFromDictionary(const PDFDictionary* dictionary, const char* key)
{
if (dictionary->hasKey(key))
{
return readReferenceArray(dictionary->get(key));
}
return std::vector<PDFObjectReference>();
}
std::vector<PDFReal> PDFDocumentDataLoaderDecorator::readNumberArray(const PDFObject& object) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
@ -470,6 +490,80 @@ std::vector<PDFInteger> PDFDocumentDataLoaderDecorator::readIntegerArray(const P
return std::vector<PDFInteger>();
}
std::vector<PDFObjectReference> PDFDocumentDataLoaderDecorator::readReferenceArray(const PDFObject& object) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isArray())
{
const PDFArray* array = dereferencedObject.getArray();
std::vector<PDFObjectReference> result;
const size_t count = array->getCount();
result.reserve(count);
for (size_t i = 0; i < count; ++i)
{
const PDFObject& referenceObject = array->getItem(i);
if (referenceObject.isReference())
{
result.push_back(referenceObject.getReference());
}
else
{
result.clear();
break;
}
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std::move(result);
}
return std::vector<PDFObjectReference>();
}
std::vector<QByteArray> PDFDocumentDataLoaderDecorator::readNameArray(const PDFObject& object) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isArray())
{
const PDFArray* array = dereferencedObject.getArray();
std::vector<QByteArray> result;
const size_t count = array->getCount();
result.reserve(count);
for (size_t i = 0; i < count; ++i)
{
const PDFObject& nameObject = array->getItem(i);
if (nameObject.isName())
{
result.push_back(nameObject.getString());
}
else
{
result.clear();
break;
}
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std::move(result);
}
return std::vector<QByteArray>();
}
std::vector<QByteArray> PDFDocumentDataLoaderDecorator::readNameArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const
{
if (dictionary->hasKey(key))
{
return readNameArray(dictionary->get(key));
}
return std::vector<QByteArray>();
}
bool PDFDocumentDataLoaderDecorator::readBooleanFromDictionary(const PDFDictionary* dictionary, const char* key, bool defaultValue) const
{
if (dictionary->hasKey(key))

View File

@ -190,11 +190,11 @@ public:
}
/// Tries to read array of real values from dictionary. If entry dictionary doesn't exist,
/// or error occurs, empty record is returned.
/// or error occurs, empty array is returned.
std::vector<PDFReal> readNumberArrayFromDictionary(const PDFDictionary* dictionary, const char* key);
/// Tries to read array of integer values from dictionary. If entry dictionary doesn't exist,
/// or error occurs, empty record is returned.
/// or error occurs, empty array is returned.
std::vector<PDFInteger> readIntegerArrayFromDictionary(const PDFDictionary* dictionary, const char* key);
/// Reads number from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
@ -209,6 +209,16 @@ public:
/// \param defaultValue Default value
PDFInteger readIntegerFromDictionary(const PDFDictionary* dictionary, const char* key, PDFInteger defaultValue) const;
/// Reads a text string from the dictionary, if it is possible.
/// \param dictionary Dictionary containing desired data
/// \param key Entry key
/// \param defaultValue Default value
QString readTextStringFromDictionary(const PDFDictionary* dictionary, const char* key, const QString& defaultValue) const;
/// Tries to read array of references from dictionary. If entry dictionary doesn't exist,
/// or error occurs, empty array is returned.
std::vector<PDFObjectReference> readReferenceArrayFromDictionary(const PDFDictionary* dictionary, const char* key);
/// Reads number array from dictionary. Reads all values. If some value is not
/// real number (or integer number), empty array is returned. Empty array is also returned,
/// if \p object is invalid.
@ -221,6 +231,22 @@ public:
/// \param object Object containing array of numbers
std::vector<PDFInteger> readIntegerArray(const PDFObject& object) const;
/// Reads reference array from dictionary. Reads all values. If error occurs,
/// then empty array is returned.
/// \param object Object containing array of references
std::vector<PDFObjectReference> readReferenceArray(const PDFObject& object) const;
/// Reads name array. Reads all values. If error occurs,
/// then empty array is returned.
/// \param object Object containing array of references
std::vector<QByteArray> readNameArray(const PDFObject& object) const;
/// Reads name array from dictionary. Reads all values. If error occurs,
/// then empty array is returned.
/// \param dictionary Dictionary containing desired data
/// \param key Entry key
std::vector<QByteArray> readNameArrayFromDictionary(const PDFDictionary* dictionary, const char* key) const;
/// Reads boolean from dictionary. If dictionary entry doesn't exist, or error occurs, default value is returned.
/// \param dictionary Dictionary containing desired data
/// \param key Entry key

View File

@ -0,0 +1,181 @@
// Copyright (C) 2019 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfoptionalcontent.h"
#include "pdfdocument.h"
#include "pdfexception.h"
namespace pdf
{
PDFOptionalContentProperties PDFOptionalContentProperties::create(const PDFDocument* document, const PDFObject& object)
{
PDFOptionalContentProperties properties;
const PDFObject& dereferencedObject = document->getObject(object);
if (dereferencedObject.isDictionary())
{
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
PDFDocumentDataLoaderDecorator loader(document);
properties.m_allOptionalContentGroups = loader.readReferenceArrayFromDictionary(dictionary, "OCGs");
if (dictionary->hasKey("D"))
{
properties.m_defaultConfiguration = PDFOptionalContentConfiguration::create(document, dictionary->get("D"));
}
if (dictionary->hasKey("Configs"))
{
const PDFObject& configsObject = document->getObject(dictionary->get("Configs"));
if (configsObject.isArray())
{
const PDFArray* configsArray = configsObject.getArray();
properties.m_configurations.reserve(configsArray->getCount());
for (size_t i = 0, count = configsArray->getCount(); i < count; ++i)
{
properties.m_configurations.emplace_back(PDFOptionalContentConfiguration::create(document, configsArray->getItem(i)));
}
}
else if (!configsObject.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content properties."));
}
}
}
else if (!dereferencedObject.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content properties."));
}
return properties;
}
PDFOptionalContentConfiguration PDFOptionalContentConfiguration::create(const PDFDocument* document, const PDFObject& object)
{
PDFOptionalContentConfiguration configuration;
const PDFObject& dereferencedObject = document->getObject(object);
if (dereferencedObject.isDictionary())
{
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
PDFDocumentDataLoaderDecorator loader(document);
configuration.m_name = loader.readTextStringFromDictionary(dictionary, "Name", QString());
configuration.m_creator = loader.readTextStringFromDictionary(dictionary, "Creator", QString());
constexpr const std::array<std::pair<const char*, BaseState>, 3> baseStateEnumValues = {
std::pair<const char*, BaseState>{ "ON", BaseState::ON },
std::pair<const char*, BaseState>{ "OFF", BaseState::OFF },
std::pair<const char*, BaseState>{ "Unchanged", BaseState::Unchanged }
};
configuration.m_baseState = loader.readEnumByName(dictionary->get("BaseState"), baseStateEnumValues.cbegin(), baseStateEnumValues.cend(), BaseState::ON);
configuration.m_OnArray = loader.readReferenceArrayFromDictionary(dictionary, "ON");
configuration.m_OffArray = loader.readReferenceArrayFromDictionary(dictionary, "OFF");
if (dictionary->hasKey("Intent"))
{
const PDFObject& nameOrNames = document->getObject(dictionary->get("Intent"));
if (nameOrNames.isName())
{
configuration.m_intents = { loader.readName(nameOrNames) };
}
else if (nameOrNames.isArray())
{
configuration.m_intents = loader.readNameArray(nameOrNames);
}
else if (!nameOrNames.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content configuration."));
}
}
if (dictionary->hasKey("AS"))
{
const PDFObject& asArrayObject = document->getObject(dictionary->get("AS"));
if (asArrayObject.isArray())
{
const PDFArray* asArray = asArrayObject.getArray();
configuration.m_usageApplications.reserve(asArray->getCount());
for (size_t i = 0, count = asArray->getCount(); i < count; ++i)
{
configuration.m_usageApplications.emplace_back(createUsageApplication(document, asArray->getItem(i)));
}
}
else if (!asArrayObject.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content configuration."));
}
}
configuration.m_order = document->getObject(dictionary->get("Order"));
if (!configuration.m_order.isArray() && !configuration.m_order.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content configuration."));
}
constexpr const std::array<std::pair<const char*, ListMode>, 3> listModeEnumValues = {
std::pair<const char*, ListMode>{ "AllPages", ListMode::AllPages },
std::pair<const char*, ListMode>{ "VisiblePages", ListMode::VisiblePages }
};
configuration.m_listMode = loader.readEnumByName(dictionary->get("ListMode"), listModeEnumValues.cbegin(), listModeEnumValues.cend(), ListMode::AllPages);
if (dictionary->hasKey("RBGroups"))
{
const PDFObject& rbGroupsObject = document->getObject(dictionary->get("RBGroups"));
if (rbGroupsObject.isArray())
{
const PDFArray* rbGroupsArray = rbGroupsObject.getArray();
configuration.m_radioButtonGroups.reserve(rbGroupsArray->getCount());
for (size_t i = 0, count = rbGroupsArray->getCount(); i < count; ++i)
{
configuration.m_radioButtonGroups.emplace_back(loader.readReferenceArray(rbGroupsArray->getItem(i)));
}
}
else if (!rbGroupsObject.isNull())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid optional content configuration."));
}
}
configuration.m_locked = loader.readReferenceArrayFromDictionary(dictionary, "Locked");
}
return configuration;
}
PDFOptionalContentConfiguration::UsageApplication PDFOptionalContentConfiguration::createUsageApplication(const PDFDocument* document, const PDFObject& object)
{
UsageApplication result;
const PDFObject& dereferencedObject = document->getObject(object);
if (dereferencedObject.isDictionary())
{
PDFDocumentDataLoaderDecorator loader(document);
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
result.event = loader.readNameFromDictionary(dictionary, "Event");
result.optionalContengGroups = loader.readReferenceArrayFromDictionary(dictionary, "OCGs");
result.categories = loader.readNameArrayFromDictionary(dictionary, "Category");
}
return result;
}
} // namespace pdf

View File

@ -0,0 +1,103 @@
// Copyright (C) 2019 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFOPTIONALCONTENT_H
#define PDFOPTIONALCONTENT_H
#include "pdfobject.h"
namespace pdf
{
class PDFDocument;
/// Configuration of optional content configuration.
class PDFOptionalContentConfiguration
{
public:
enum class BaseState
{
ON,
OFF,
Unchanged
};
enum class ListMode
{
AllPages,
VisiblePages
};
struct UsageApplication
{
QByteArray event;
std::vector<PDFObjectReference> optionalContengGroups;
std::vector<QByteArray> categories;
};
/// Creates new optional content properties configuration from the object. If object is not valid,
/// then exception is thrown.
/// \param document Document
/// \param object Object containing documents optional content configuration
static PDFOptionalContentConfiguration create(const PDFDocument* document, const PDFObject& object);
private:
/// Creates usage application
/// \param document Document
/// \param object Object containing usage application
static UsageApplication createUsageApplication(const PDFDocument* document, const PDFObject& object);
QString m_name;
QString m_creator;
BaseState m_baseState = BaseState::ON;
std::vector<PDFObjectReference> m_OnArray;
std::vector<PDFObjectReference> m_OffArray;
std::vector<QByteArray> m_intents;
std::vector<UsageApplication> m_usageApplications;
PDFObject m_order;
ListMode m_listMode = ListMode::AllPages;
std::vector<std::vector<PDFObjectReference>> m_radioButtonGroups;
std::vector<PDFObjectReference> m_locked;
};
/// Object containing properties of the optional content of the PDF document. It contains
/// for example all documents optional content groups.
class PDFOptionalContentProperties
{
public:
explicit PDFOptionalContentProperties() = default;
/// Returns, if object is valid - at least one optional content group exists
bool isValid() const { return !m_allOptionalContentGroups.empty(); }
/// Creates new optional content properties from the object. If object is not valid,
/// then exception is thrown.
/// \param document Document
/// \param object Object containing documents optional content properties
static PDFOptionalContentProperties create(const PDFDocument* document, const PDFObject& object);
private:
std::vector<PDFObjectReference> m_allOptionalContentGroups;
PDFOptionalContentConfiguration m_defaultConfiguration;
std::vector<PDFOptionalContentConfiguration> m_configurations;
};
} // namespace pdf
#endif // PDFOPTIONALCONTENT_H

View File

@ -182,7 +182,8 @@ PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page, const PDFD
m_fontDictionary(nullptr),
m_xobjectDictionary(nullptr),
m_extendedGraphicStateDictionary(nullptr),
m_textBeginEndState(0)
m_textBeginEndState(0),
m_compatibilityBeginEndState(0)
{
Q_ASSERT(page);
Q_ASSERT(document);
@ -790,6 +791,18 @@ void PDFPageContentProcessor::processCommand(const QByteArray& command)
break;
}
case Operator::CompatibilityBegin:
{
operatorCompatibilityBegin();
break;
}
case Operator::CompatibilityEnd:
{
operatorCompatibilityEnd();
break;
}
case Operator::Invalid:
{
m_errorList.append(PDFRenderError(RenderErrorType::Error, PDFTranslationContext::tr("Unknown operator '%1'.").arg(QString::fromLatin1(command))));
@ -1876,6 +1889,19 @@ void PDFPageContentProcessor::operatorPaintXObject(PDFPageContentProcessor::PDFO
}
}
void PDFPageContentProcessor::operatorCompatibilityBegin()
{
++m_compatibilityBeginEndState;
}
void PDFPageContentProcessor::operatorCompatibilityEnd()
{
if (--m_compatibilityBeginEndState < 0)
{
throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Compatibility operator begin/end mismatch."));
}
}
void PDFPageContentProcessor::drawText(const TextSequence& textSequence)
{
if (textSequence.items.empty())

View File

@ -593,6 +593,10 @@ private:
// XObject: Do
void operatorPaintXObject(PDFOperandName name); ///< Do, paint the X Object (image, form, ...)
// Compatibility: BX, EX
void operatorCompatibilityBegin(); ///< BX, Compatibility mode begin (unrecognized operators are ignored)
void operatorCompatibilityEnd(); ///< EX, Compatibility mode end
// Draws the text using the text sequence
void drawText(const TextSequence& textSequence);
@ -639,6 +643,9 @@ private:
/// Nesting level of the begin/end of text object
int m_textBeginEndState;
/// Compatibility level (if positive, then unrecognized operators are ignored)
int m_compatibilityBeginEndState;
/// Actually realized physical font
PDFCachedItem<PDFRealizedFontPointer> m_realizedFont;