From 7a7b1d7b403c1432eef0483e3db0a8c5a3bf9368 Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Mon, 24 Dec 2018 17:09:23 +0100 Subject: [PATCH] Load page labels --- PdfForQt.pro.user | 2 +- PdfForQtLib/PdfForQtLib.pro | 3 +- PdfForQtLib/sources/pdfcatalog.cpp | 31 +++++++ PdfForQtLib/sources/pdfcatalog.h | 47 ++++++++++ PdfForQtLib/sources/pdfdocument.cpp | 32 +++++++ PdfForQtLib/sources/pdfdocument.h | 57 ++++++++++++ PdfForQtLib/sources/pdfnumbertreeloader.h | 100 ++++++++++++++++++++++ 7 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 PdfForQtLib/sources/pdfnumbertreeloader.h diff --git a/PdfForQt.pro.user b/PdfForQt.pro.user index 4ad65d7..6ba5cfb 100644 --- a/PdfForQt.pro.user +++ b/PdfForQt.pro.user @@ -1,6 +1,6 @@ - + EnvironmentId diff --git a/PdfForQtLib/PdfForQtLib.pro b/PdfForQtLib/PdfForQtLib.pro index e3ab1af..7101d42 100644 --- a/PdfForQtLib/PdfForQtLib.pro +++ b/PdfForQtLib/PdfForQtLib.pro @@ -56,7 +56,8 @@ HEADERS += \ sources/pdfflatmap.h \ sources/pdfvisitor.h \ sources/pdfencoding.h \ - sources/pdfcatalog.h + sources/pdfcatalog.h \ + sources/pdfnumbertreeloader.h unix { target.path = /usr/lib diff --git a/PdfForQtLib/sources/pdfcatalog.cpp b/PdfForQtLib/sources/pdfcatalog.cpp index 4114c63..1a9de96 100644 --- a/PdfForQtLib/sources/pdfcatalog.cpp +++ b/PdfForQtLib/sources/pdfcatalog.cpp @@ -18,6 +18,7 @@ #include "pdfcatalog.h" #include "pdfparser.h" #include "pdfdocument.h" +#include "pdfnumbertreeloader.h" namespace pdf { @@ -48,8 +49,12 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume throw PDFParserException(PDFTranslationContext::tr("Catalog must be a dictionary.")); } + const PDFDictionary* catalogDictionary = catalog.getDictionary(); + Q_ASSERT(catalogDictionary); + PDFCatalog catalogObject; catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document); + catalogObject.m_pageLabels = PDFNumberTreeLoader::parse(document, catalogDictionary->get("PageLabels")); return catalogObject; } @@ -315,4 +320,30 @@ PDFViewerPreferences PDFViewerPreferences::parse(const PDFObject& catalogDiction return result; } +PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object) +{ + const PDFObject& dereferencedObject = document->getObject(object); + if (dereferencedObject.isDictionary()) + { + std::array, 5> numberingStyles = { std::pair{ "D", NumberingStyle::DecimalArabic}, + std::pair{ "R", NumberingStyle::UppercaseRoman }, + std::pair{ "r", NumberingStyle::LowercaseRoman }, + std::pair{ "A", NumberingStyle::UppercaseLetters}, + std::pair{ "a", NumberingStyle::LowercaseLetters} }; + + const PDFDictionary* dictionary = dereferencedObject.getDictionary(); + const PDFDocumentDataLoaderDecorator loader(document); + const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None); + const QString prefix = loader.readTextString(dictionary->get("P"), QString()); + const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1); + return PDFPageLabel(numberingStyle, prefix, pageIndex, startNumber); + } + else + { + throw PDFParserException(PDFTranslationContext::tr("Expected page label dictionary.")); + } + + return PDFPageLabel(); +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfcatalog.h b/PdfForQtLib/sources/pdfcatalog.h index d3c0de5..ff6b272 100644 --- a/PdfForQtLib/sources/pdfcatalog.h +++ b/PdfForQtLib/sources/pdfcatalog.h @@ -54,6 +54,52 @@ enum class PageMode UseAttachments, ///< Attachments window is selected and visible }; +/// Represents page numbering definition object +class PDFPageLabel +{ +public: + + enum class NumberingStyle + { + None, ///< This means, only prefix is used, no numbering + DecimalArabic, + UppercaseRoman, + LowercaseRoman, + UppercaseLetters, + LowercaseLetters + }; + + explicit inline PDFPageLabel() : + m_numberingType(NumberingStyle::None), + m_prefix(), + m_pageIndex(0), + m_startNumber(0) + { + + } + + explicit inline PDFPageLabel(NumberingStyle numberingType, const QString& prefix, PDFInteger pageIndex, PDFInteger startNumber) : + m_numberingType(numberingType), + m_prefix(prefix), + m_pageIndex(pageIndex), + m_startNumber(startNumber) + { + + } + + /// Comparison operator, works only with page indices (because they should be unique) + bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; } + + /// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10 + static PDFPageLabel parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object); + +private: + NumberingStyle m_numberingType; + QString m_prefix; + PDFInteger m_pageIndex; + PDFInteger m_startNumber; +}; + class PDFViewerPreferences { public: @@ -155,6 +201,7 @@ public: private: PDFViewerPreferences m_viewerPreferences; + std::vector m_pageLabels; }; } // namespace pdf diff --git a/PdfForQtLib/sources/pdfdocument.cpp b/PdfForQtLib/sources/pdfdocument.cpp index fe14da6..c204309 100644 --- a/PdfForQtLib/sources/pdfdocument.cpp +++ b/PdfForQtLib/sources/pdfdocument.cpp @@ -41,6 +41,16 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown" void PDFDocument::init() { initInfo(); + + const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary(); + + // Trailer object should be dictionary here. It is verified in the document reader. + Q_ASSERT(trailerDictionary.isDictionary()); + + const PDFDictionary* dictionary = trailerDictionary.getDictionary(); + Q_ASSERT(dictionary); + + m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this); } void PDFDocument::initInfo() @@ -160,4 +170,26 @@ const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const } } +PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const +{ + const PDFObject& dereferencedObject = m_document->getObject(object); + if (dereferencedObject.isInt()) + { + return dereferencedObject.getInteger(); + } + + return defaultValue; +} + +QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object, const QString& defaultValue) const +{ + const PDFObject& dereferencedObject = m_document->getObject(object); + if (dereferencedObject.isString()) + { + return PDFEncoding::convertTextString(dereferencedObject.getString()); + } + + return defaultValue; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfdocument.h b/PdfForQtLib/sources/pdfdocument.h index 7a52c5f..0ac2062 100644 --- a/PdfForQtLib/sources/pdfdocument.h +++ b/PdfForQtLib/sources/pdfdocument.h @@ -21,12 +21,14 @@ #include "pdfglobal.h" #include "pdfobject.h" +#include "pdfcatalog.h" #include #include namespace pdf { +class PDFDocument; /// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure /// locking, if this object is used from multiple threads. Calling const functions should be thread safe. @@ -74,6 +76,56 @@ private: PDFObject m_trailerDictionary; }; +/// Loads data from the object contained in the PDF document, such as integers, +/// bools, ... This object has two sets of functions - first one with default values, +/// then if object with valid data is not found, default value is used, and second one, +/// without default value, if valid data are not found, then exception is thrown. +/// This class uses Decorator design pattern. +class PDFDocumentDataLoaderDecorator +{ +public: + inline explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_document(document) { } + inline ~PDFDocumentDataLoaderDecorator() = default; + + /// Reads an integer from the object, if it is possible. + /// \param object Object, can be an indirect reference to object (it is dereferenced) + /// \param defaultValue Default value + PDFInteger readInteger(const PDFObject& object, PDFInteger defaultValue) const; + + /// Reads a text string from the object, if it is possible. + /// \param object Object, can be an indirect reference to object (it is dereferenced) + /// \param defaultValue Default value + QString readTextString(const PDFObject& object, const QString& defaultValue) const; + + /// Reads enum from name object, if it is possible. + /// \param object Object, can be an indirect reference to object (it is dereferenced) + /// \param begin Begin of the enum search array + /// \param end End of the enum search array + /// \param default value Default value + template + Enum readEnumByName(const PDFObject& object, Iterator begin, Iterator end, Enum defaultValue) const + { + const PDFObject& dereferencedObject = m_document->getObject(object); + if (dereferencedObject.isName()) + { + QByteArray name = dereferencedObject.getString(); + + for (Iterator it = begin; it != end; ++it) + { + if (name == (*it).first) + { + return (*it).second; + } + } + } + + return defaultValue; + } + +private: + const PDFDocument* m_document; +}; + /// PDF document main class. class PDFDocument { @@ -139,8 +191,13 @@ private: /// Info about the PDF document Info m_info; + + /// Catalog object + PDFCatalog m_catalog; }; +// Implementation + inline const PDFObject& PDFDocument::getObject(const PDFObject& object) const { diff --git a/PdfForQtLib/sources/pdfnumbertreeloader.h b/PdfForQtLib/sources/pdfnumbertreeloader.h new file mode 100644 index 0000000..0619513 --- /dev/null +++ b/PdfForQtLib/sources/pdfnumbertreeloader.h @@ -0,0 +1,100 @@ +// Copyright (C) 2018 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + + +#ifndef PDFNUMBERTREELOADER_H +#define PDFNUMBERTREELOADER_H + +#include "pdfdocument.h" + +#include + +namespace pdf +{ + +/// This class can load a number tree into the array +template +class PDFNumberTreeLoader +{ +public: + explicit PDFNumberTreeLoader() = delete; + + using Objects = std::vector; + + /// Parses the number tree and loads its items into the array. Some errors are ignored, + /// e.g. when kid is null. Type must contain methods to load object array. + static Objects parse(const PDFDocument* document, const PDFObject& root) + { + Objects result; + + // First, try to load items from the tree into the array + parseImpl(result, document, root); + + // Array may not be sorted. Sort it using comparison operator for Type. + std::stable_sort(result.begin(), result.end()); + + return result; + } + +private: + static void parseImpl(Objects& objects, const PDFDocument* document, const PDFObject& root) + { + const PDFObject& dereferencedRoot = document->getObject(root); + if (dereferencedRoot.isDictionary()) + { + const PDFDictionary* dictionary = dereferencedRoot.getDictionary(); + + // First, load the objects into the array + const PDFObject& numberedItems = document->getObject(dictionary->get("Nums")); + if (numberedItems.isArray()) + { + const PDFArray* numberedItemsArray = numberedItems.getArray(); + const size_t count = numberedItemsArray->getCount() / 2; + objects.reserve(objects.size() + count); + for (size_t i = 0; i < count; ++i) + { + const size_t numberIndex = 2 * i; + const size_t valueIndex = 2 * i + 1; + + const PDFObject& number = document->getObject(numberedItemsArray->getItem(numberIndex)); + if (!number.isInt()) + { + continue; + } + + objects.emplace_back(Type::parse(number.getInteger(), document, numberedItemsArray->getItem(valueIndex))); + } + } + + // Then, follow the kids + const PDFObject& kids = document->getObject(dictionary->get("Kids")); + if (kids.isArray()) + { + const PDFArray* kidsArray = kids.getArray(); + const size_t count = kidsArray->getCount(); + for (size_t i = 0; i < count; ++i) + { + parseImpl(objects, document, kidsArray->getItem(i)); + } + } + } + } +}; + +} // namespace pdf + +#endif // PDFNUMBERTREELOADER_H