mirror of https://github.com/JakubMelka/PDF4QT.git
Load page labels
This commit is contained in:
parent
038548c391
commit
7a7b1d7b40
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 4.7.2, 2018-12-14T16:56:03. -->
|
||||
<!-- Written by QtCreator 4.7.2, 2018-12-23T18:52:37. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
|
|
|
@ -56,7 +56,8 @@ HEADERS += \
|
|||
sources/pdfflatmap.h \
|
||||
sources/pdfvisitor.h \
|
||||
sources/pdfencoding.h \
|
||||
sources/pdfcatalog.h
|
||||
sources/pdfcatalog.h \
|
||||
sources/pdfnumbertreeloader.h
|
||||
|
||||
unix {
|
||||
target.path = /usr/lib
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "pdfcatalog.h"
|
||||
#include "pdfparser.h"
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfnumbertreeloader.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
@ -48,8 +49,12 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
|||
throw PDFParserException(PDFTranslationContext::tr("Catalog must be a dictionary."));
|
||||
}
|
||||
|
||||
const PDFDictionary* catalogDictionary = catalog.getDictionary();
|
||||
Q_ASSERT(catalogDictionary);
|
||||
|
||||
PDFCatalog catalogObject;
|
||||
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
||||
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
|
||||
return catalogObject;
|
||||
}
|
||||
|
||||
|
@ -315,4 +320,30 @@ PDFViewerPreferences PDFViewerPreferences::parse(const PDFObject& catalogDiction
|
|||
return result;
|
||||
}
|
||||
|
||||
PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object)
|
||||
{
|
||||
const PDFObject& dereferencedObject = document->getObject(object);
|
||||
if (dereferencedObject.isDictionary())
|
||||
{
|
||||
std::array<std::pair<const char*, NumberingStyle>, 5> numberingStyles = { std::pair<const char*, NumberingStyle>{ "D", NumberingStyle::DecimalArabic},
|
||||
std::pair<const char*, NumberingStyle>{ "R", NumberingStyle::UppercaseRoman },
|
||||
std::pair<const char*, NumberingStyle>{ "r", NumberingStyle::LowercaseRoman },
|
||||
std::pair<const char*, NumberingStyle>{ "A", NumberingStyle::UppercaseLetters},
|
||||
std::pair<const char*, NumberingStyle>{ "a", NumberingStyle::LowercaseLetters} };
|
||||
|
||||
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
|
||||
const PDFDocumentDataLoaderDecorator loader(document);
|
||||
const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None);
|
||||
const QString prefix = loader.readTextString(dictionary->get("P"), QString());
|
||||
const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1);
|
||||
return PDFPageLabel(numberingStyle, prefix, pageIndex, startNumber);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected page label dictionary."));
|
||||
}
|
||||
|
||||
return PDFPageLabel();
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -54,6 +54,52 @@ enum class PageMode
|
|||
UseAttachments, ///< Attachments window is selected and visible
|
||||
};
|
||||
|
||||
/// Represents page numbering definition object
|
||||
class PDFPageLabel
|
||||
{
|
||||
public:
|
||||
|
||||
enum class NumberingStyle
|
||||
{
|
||||
None, ///< This means, only prefix is used, no numbering
|
||||
DecimalArabic,
|
||||
UppercaseRoman,
|
||||
LowercaseRoman,
|
||||
UppercaseLetters,
|
||||
LowercaseLetters
|
||||
};
|
||||
|
||||
explicit inline PDFPageLabel() :
|
||||
m_numberingType(NumberingStyle::None),
|
||||
m_prefix(),
|
||||
m_pageIndex(0),
|
||||
m_startNumber(0)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
explicit inline PDFPageLabel(NumberingStyle numberingType, const QString& prefix, PDFInteger pageIndex, PDFInteger startNumber) :
|
||||
m_numberingType(numberingType),
|
||||
m_prefix(prefix),
|
||||
m_pageIndex(pageIndex),
|
||||
m_startNumber(startNumber)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/// Comparison operator, works only with page indices (because they should be unique)
|
||||
bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; }
|
||||
|
||||
/// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10
|
||||
static PDFPageLabel parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object);
|
||||
|
||||
private:
|
||||
NumberingStyle m_numberingType;
|
||||
QString m_prefix;
|
||||
PDFInteger m_pageIndex;
|
||||
PDFInteger m_startNumber;
|
||||
};
|
||||
|
||||
class PDFViewerPreferences
|
||||
{
|
||||
public:
|
||||
|
@ -155,6 +201,7 @@ public:
|
|||
|
||||
private:
|
||||
PDFViewerPreferences m_viewerPreferences;
|
||||
std::vector<PDFPageLabel> m_pageLabels;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -41,6 +41,16 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
|
|||
void PDFDocument::init()
|
||||
{
|
||||
initInfo();
|
||||
|
||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||
|
||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||
Q_ASSERT(trailerDictionary.isDictionary());
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
Q_ASSERT(dictionary);
|
||||
|
||||
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
|
||||
}
|
||||
|
||||
void PDFDocument::initInfo()
|
||||
|
@ -160,4 +170,26 @@ const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const
|
|||
}
|
||||
}
|
||||
|
||||
PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const
|
||||
{
|
||||
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||
if (dereferencedObject.isInt())
|
||||
{
|
||||
return dereferencedObject.getInteger();
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object, const QString& defaultValue) const
|
||||
{
|
||||
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||
if (dereferencedObject.isString())
|
||||
{
|
||||
return PDFEncoding::convertTextString(dereferencedObject.getString());
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -21,12 +21,14 @@
|
|||
|
||||
#include "pdfglobal.h"
|
||||
#include "pdfobject.h"
|
||||
#include "pdfcatalog.h"
|
||||
|
||||
#include <QtCore>
|
||||
#include <QDateTime>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDocument;
|
||||
|
||||
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
|
||||
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
|
||||
|
@ -74,6 +76,56 @@ private:
|
|||
PDFObject m_trailerDictionary;
|
||||
};
|
||||
|
||||
/// Loads data from the object contained in the PDF document, such as integers,
|
||||
/// bools, ... This object has two sets of functions - first one with default values,
|
||||
/// then if object with valid data is not found, default value is used, and second one,
|
||||
/// without default value, if valid data are not found, then exception is thrown.
|
||||
/// This class uses Decorator design pattern.
|
||||
class PDFDocumentDataLoaderDecorator
|
||||
{
|
||||
public:
|
||||
inline explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_document(document) { }
|
||||
inline ~PDFDocumentDataLoaderDecorator() = default;
|
||||
|
||||
/// Reads an integer from the object, if it is possible.
|
||||
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||
/// \param defaultValue Default value
|
||||
PDFInteger readInteger(const PDFObject& object, PDFInteger defaultValue) const;
|
||||
|
||||
/// Reads a text string from the object, if it is possible.
|
||||
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||
/// \param defaultValue Default value
|
||||
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
|
||||
|
||||
/// Reads enum from name object, if it is possible.
|
||||
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||
/// \param begin Begin of the enum search array
|
||||
/// \param end End of the enum search array
|
||||
/// \param default value Default value
|
||||
template<typename Enum, typename Iterator>
|
||||
Enum readEnumByName(const PDFObject& object, Iterator begin, Iterator end, Enum defaultValue) const
|
||||
{
|
||||
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||
if (dereferencedObject.isName())
|
||||
{
|
||||
QByteArray name = dereferencedObject.getString();
|
||||
|
||||
for (Iterator it = begin; it != end; ++it)
|
||||
{
|
||||
if (name == (*it).first)
|
||||
{
|
||||
return (*it).second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
private:
|
||||
const PDFDocument* m_document;
|
||||
};
|
||||
|
||||
/// PDF document main class.
|
||||
class PDFDocument
|
||||
{
|
||||
|
@ -139,8 +191,13 @@ private:
|
|||
|
||||
/// Info about the PDF document
|
||||
Info m_info;
|
||||
|
||||
/// Catalog object
|
||||
PDFCatalog m_catalog;
|
||||
};
|
||||
|
||||
// Implementation
|
||||
|
||||
inline
|
||||
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
|
||||
{
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#ifndef PDFNUMBERTREELOADER_H
|
||||
#define PDFNUMBERTREELOADER_H
|
||||
|
||||
#include "pdfdocument.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
/// This class can load a number tree into the array
|
||||
template<typename Type>
|
||||
class PDFNumberTreeLoader
|
||||
{
|
||||
public:
|
||||
explicit PDFNumberTreeLoader() = delete;
|
||||
|
||||
using Objects = std::vector<Type>;
|
||||
|
||||
/// Parses the number tree and loads its items into the array. Some errors are ignored,
|
||||
/// e.g. when kid is null. Type must contain methods to load object array.
|
||||
static Objects parse(const PDFDocument* document, const PDFObject& root)
|
||||
{
|
||||
Objects result;
|
||||
|
||||
// First, try to load items from the tree into the array
|
||||
parseImpl(result, document, root);
|
||||
|
||||
// Array may not be sorted. Sort it using comparison operator for Type.
|
||||
std::stable_sort(result.begin(), result.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
static void parseImpl(Objects& objects, const PDFDocument* document, const PDFObject& root)
|
||||
{
|
||||
const PDFObject& dereferencedRoot = document->getObject(root);
|
||||
if (dereferencedRoot.isDictionary())
|
||||
{
|
||||
const PDFDictionary* dictionary = dereferencedRoot.getDictionary();
|
||||
|
||||
// First, load the objects into the array
|
||||
const PDFObject& numberedItems = document->getObject(dictionary->get("Nums"));
|
||||
if (numberedItems.isArray())
|
||||
{
|
||||
const PDFArray* numberedItemsArray = numberedItems.getArray();
|
||||
const size_t count = numberedItemsArray->getCount() / 2;
|
||||
objects.reserve(objects.size() + count);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const size_t numberIndex = 2 * i;
|
||||
const size_t valueIndex = 2 * i + 1;
|
||||
|
||||
const PDFObject& number = document->getObject(numberedItemsArray->getItem(numberIndex));
|
||||
if (!number.isInt())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
objects.emplace_back(Type::parse(number.getInteger(), document, numberedItemsArray->getItem(valueIndex)));
|
||||
}
|
||||
}
|
||||
|
||||
// Then, follow the kids
|
||||
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
|
||||
if (kids.isArray())
|
||||
{
|
||||
const PDFArray* kidsArray = kids.getArray();
|
||||
const size_t count = kidsArray->getCount();
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
parseImpl(objects, document, kidsArray->getItem(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFNUMBERTREELOADER_H
|
Loading…
Reference in New Issue