Load page labels

This commit is contained in:
Jakub Melka 2018-12-24 17:09:23 +01:00
parent 038548c391
commit 7a7b1d7b40
7 changed files with 270 additions and 2 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 4.7.2, 2018-12-14T16:56:03. -->
<!-- Written by QtCreator 4.7.2, 2018-12-23T18:52:37. -->
<qtcreator>
<data>
<variable>EnvironmentId</variable>

View File

@ -56,7 +56,8 @@ HEADERS += \
sources/pdfflatmap.h \
sources/pdfvisitor.h \
sources/pdfencoding.h \
sources/pdfcatalog.h
sources/pdfcatalog.h \
sources/pdfnumbertreeloader.h
unix {
target.path = /usr/lib

View File

@ -18,6 +18,7 @@
#include "pdfcatalog.h"
#include "pdfparser.h"
#include "pdfdocument.h"
#include "pdfnumbertreeloader.h"
namespace pdf
{
@ -48,8 +49,12 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
throw PDFParserException(PDFTranslationContext::tr("Catalog must be a dictionary."));
}
const PDFDictionary* catalogDictionary = catalog.getDictionary();
Q_ASSERT(catalogDictionary);
PDFCatalog catalogObject;
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
return catalogObject;
}
@ -315,4 +320,30 @@ PDFViewerPreferences PDFViewerPreferences::parse(const PDFObject& catalogDiction
return result;
}
PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object)
{
const PDFObject& dereferencedObject = document->getObject(object);
if (dereferencedObject.isDictionary())
{
std::array<std::pair<const char*, NumberingStyle>, 5> numberingStyles = { std::pair<const char*, NumberingStyle>{ "D", NumberingStyle::DecimalArabic},
std::pair<const char*, NumberingStyle>{ "R", NumberingStyle::UppercaseRoman },
std::pair<const char*, NumberingStyle>{ "r", NumberingStyle::LowercaseRoman },
std::pair<const char*, NumberingStyle>{ "A", NumberingStyle::UppercaseLetters},
std::pair<const char*, NumberingStyle>{ "a", NumberingStyle::LowercaseLetters} };
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
const PDFDocumentDataLoaderDecorator loader(document);
const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None);
const QString prefix = loader.readTextString(dictionary->get("P"), QString());
const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1);
return PDFPageLabel(numberingStyle, prefix, pageIndex, startNumber);
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Expected page label dictionary."));
}
return PDFPageLabel();
}
} // namespace pdf

View File

@ -54,6 +54,52 @@ enum class PageMode
UseAttachments, ///< Attachments window is selected and visible
};
/// Represents page numbering definition object
class PDFPageLabel
{
public:
enum class NumberingStyle
{
None, ///< This means, only prefix is used, no numbering
DecimalArabic,
UppercaseRoman,
LowercaseRoman,
UppercaseLetters,
LowercaseLetters
};
explicit inline PDFPageLabel() :
m_numberingType(NumberingStyle::None),
m_prefix(),
m_pageIndex(0),
m_startNumber(0)
{
}
explicit inline PDFPageLabel(NumberingStyle numberingType, const QString& prefix, PDFInteger pageIndex, PDFInteger startNumber) :
m_numberingType(numberingType),
m_prefix(prefix),
m_pageIndex(pageIndex),
m_startNumber(startNumber)
{
}
/// Comparison operator, works only with page indices (because they should be unique)
bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; }
/// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10
static PDFPageLabel parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object);
private:
NumberingStyle m_numberingType;
QString m_prefix;
PDFInteger m_pageIndex;
PDFInteger m_startNumber;
};
class PDFViewerPreferences
{
public:
@ -155,6 +201,7 @@ public:
private:
PDFViewerPreferences m_viewerPreferences;
std::vector<PDFPageLabel> m_pageLabels;
};
} // namespace pdf

View File

@ -41,6 +41,16 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
void PDFDocument::init()
{
initInfo();
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
// Trailer object should be dictionary here. It is verified in the document reader.
Q_ASSERT(trailerDictionary.isDictionary());
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
Q_ASSERT(dictionary);
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
}
void PDFDocument::initInfo()
@ -160,4 +170,26 @@ const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const
}
}
PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isInt())
{
return dereferencedObject.getInteger();
}
return defaultValue;
}
QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object, const QString& defaultValue) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isString())
{
return PDFEncoding::convertTextString(dereferencedObject.getString());
}
return defaultValue;
}
} // namespace pdf

View File

@ -21,12 +21,14 @@
#include "pdfglobal.h"
#include "pdfobject.h"
#include "pdfcatalog.h"
#include <QtCore>
#include <QDateTime>
namespace pdf
{
class PDFDocument;
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
@ -74,6 +76,56 @@ private:
PDFObject m_trailerDictionary;
};
/// Loads data from the object contained in the PDF document, such as integers,
/// bools, ... This object has two sets of functions - first one with default values,
/// then if object with valid data is not found, default value is used, and second one,
/// without default value, if valid data are not found, then exception is thrown.
/// This class uses Decorator design pattern.
class PDFDocumentDataLoaderDecorator
{
public:
inline explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_document(document) { }
inline ~PDFDocumentDataLoaderDecorator() = default;
/// Reads an integer from the object, if it is possible.
/// \param object Object, can be an indirect reference to object (it is dereferenced)
/// \param defaultValue Default value
PDFInteger readInteger(const PDFObject& object, PDFInteger defaultValue) const;
/// Reads a text string from the object, if it is possible.
/// \param object Object, can be an indirect reference to object (it is dereferenced)
/// \param defaultValue Default value
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
/// Reads enum from name object, if it is possible.
/// \param object Object, can be an indirect reference to object (it is dereferenced)
/// \param begin Begin of the enum search array
/// \param end End of the enum search array
/// \param default value Default value
template<typename Enum, typename Iterator>
Enum readEnumByName(const PDFObject& object, Iterator begin, Iterator end, Enum defaultValue) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isName())
{
QByteArray name = dereferencedObject.getString();
for (Iterator it = begin; it != end; ++it)
{
if (name == (*it).first)
{
return (*it).second;
}
}
}
return defaultValue;
}
private:
const PDFDocument* m_document;
};
/// PDF document main class.
class PDFDocument
{
@ -139,8 +191,13 @@ private:
/// Info about the PDF document
Info m_info;
/// Catalog object
PDFCatalog m_catalog;
};
// Implementation
inline
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
{

View File

@ -0,0 +1,100 @@
// Copyright (C) 2018 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFNUMBERTREELOADER_H
#define PDFNUMBERTREELOADER_H
#include "pdfdocument.h"
#include <vector>
namespace pdf
{
/// This class can load a number tree into the array
template<typename Type>
class PDFNumberTreeLoader
{
public:
explicit PDFNumberTreeLoader() = delete;
using Objects = std::vector<Type>;
/// Parses the number tree and loads its items into the array. Some errors are ignored,
/// e.g. when kid is null. Type must contain methods to load object array.
static Objects parse(const PDFDocument* document, const PDFObject& root)
{
Objects result;
// First, try to load items from the tree into the array
parseImpl(result, document, root);
// Array may not be sorted. Sort it using comparison operator for Type.
std::stable_sort(result.begin(), result.end());
return result;
}
private:
static void parseImpl(Objects& objects, const PDFDocument* document, const PDFObject& root)
{
const PDFObject& dereferencedRoot = document->getObject(root);
if (dereferencedRoot.isDictionary())
{
const PDFDictionary* dictionary = dereferencedRoot.getDictionary();
// First, load the objects into the array
const PDFObject& numberedItems = document->getObject(dictionary->get("Nums"));
if (numberedItems.isArray())
{
const PDFArray* numberedItemsArray = numberedItems.getArray();
const size_t count = numberedItemsArray->getCount() / 2;
objects.reserve(objects.size() + count);
for (size_t i = 0; i < count; ++i)
{
const size_t numberIndex = 2 * i;
const size_t valueIndex = 2 * i + 1;
const PDFObject& number = document->getObject(numberedItemsArray->getItem(numberIndex));
if (!number.isInt())
{
continue;
}
objects.emplace_back(Type::parse(number.getInteger(), document, numberedItemsArray->getItem(valueIndex)));
}
}
// Then, follow the kids
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
if (kids.isArray())
{
const PDFArray* kidsArray = kids.getArray();
const size_t count = kidsArray->getCount();
for (size_t i = 0; i < count; ++i)
{
parseImpl(objects, document, kidsArray->getItem(i));
}
}
}
}
};
} // namespace pdf
#endif // PDFNUMBERTREELOADER_H