mirror of https://github.com/JakubMelka/PDF4QT.git
Load page labels
This commit is contained in:
parent
038548c391
commit
7a7b1d7b40
|
@ -1,6 +1,6 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!DOCTYPE QtCreatorProject>
|
<!DOCTYPE QtCreatorProject>
|
||||||
<!-- Written by QtCreator 4.7.2, 2018-12-14T16:56:03. -->
|
<!-- Written by QtCreator 4.7.2, 2018-12-23T18:52:37. -->
|
||||||
<qtcreator>
|
<qtcreator>
|
||||||
<data>
|
<data>
|
||||||
<variable>EnvironmentId</variable>
|
<variable>EnvironmentId</variable>
|
||||||
|
|
|
@ -56,7 +56,8 @@ HEADERS += \
|
||||||
sources/pdfflatmap.h \
|
sources/pdfflatmap.h \
|
||||||
sources/pdfvisitor.h \
|
sources/pdfvisitor.h \
|
||||||
sources/pdfencoding.h \
|
sources/pdfencoding.h \
|
||||||
sources/pdfcatalog.h
|
sources/pdfcatalog.h \
|
||||||
|
sources/pdfnumbertreeloader.h
|
||||||
|
|
||||||
unix {
|
unix {
|
||||||
target.path = /usr/lib
|
target.path = /usr/lib
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "pdfcatalog.h"
|
#include "pdfcatalog.h"
|
||||||
#include "pdfparser.h"
|
#include "pdfparser.h"
|
||||||
#include "pdfdocument.h"
|
#include "pdfdocument.h"
|
||||||
|
#include "pdfnumbertreeloader.h"
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
@ -48,8 +49,12 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
||||||
throw PDFParserException(PDFTranslationContext::tr("Catalog must be a dictionary."));
|
throw PDFParserException(PDFTranslationContext::tr("Catalog must be a dictionary."));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const PDFDictionary* catalogDictionary = catalog.getDictionary();
|
||||||
|
Q_ASSERT(catalogDictionary);
|
||||||
|
|
||||||
PDFCatalog catalogObject;
|
PDFCatalog catalogObject;
|
||||||
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
||||||
|
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
|
||||||
return catalogObject;
|
return catalogObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,4 +320,30 @@ PDFViewerPreferences PDFViewerPreferences::parse(const PDFObject& catalogDiction
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object)
|
||||||
|
{
|
||||||
|
const PDFObject& dereferencedObject = document->getObject(object);
|
||||||
|
if (dereferencedObject.isDictionary())
|
||||||
|
{
|
||||||
|
std::array<std::pair<const char*, NumberingStyle>, 5> numberingStyles = { std::pair<const char*, NumberingStyle>{ "D", NumberingStyle::DecimalArabic},
|
||||||
|
std::pair<const char*, NumberingStyle>{ "R", NumberingStyle::UppercaseRoman },
|
||||||
|
std::pair<const char*, NumberingStyle>{ "r", NumberingStyle::LowercaseRoman },
|
||||||
|
std::pair<const char*, NumberingStyle>{ "A", NumberingStyle::UppercaseLetters},
|
||||||
|
std::pair<const char*, NumberingStyle>{ "a", NumberingStyle::LowercaseLetters} };
|
||||||
|
|
||||||
|
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
|
||||||
|
const PDFDocumentDataLoaderDecorator loader(document);
|
||||||
|
const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None);
|
||||||
|
const QString prefix = loader.readTextString(dictionary->get("P"), QString());
|
||||||
|
const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1);
|
||||||
|
return PDFPageLabel(numberingStyle, prefix, pageIndex, startNumber);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw PDFParserException(PDFTranslationContext::tr("Expected page label dictionary."));
|
||||||
|
}
|
||||||
|
|
||||||
|
return PDFPageLabel();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -54,6 +54,52 @@ enum class PageMode
|
||||||
UseAttachments, ///< Attachments window is selected and visible
|
UseAttachments, ///< Attachments window is selected and visible
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Represents page numbering definition object
|
||||||
|
class PDFPageLabel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum class NumberingStyle
|
||||||
|
{
|
||||||
|
None, ///< This means, only prefix is used, no numbering
|
||||||
|
DecimalArabic,
|
||||||
|
UppercaseRoman,
|
||||||
|
LowercaseRoman,
|
||||||
|
UppercaseLetters,
|
||||||
|
LowercaseLetters
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit inline PDFPageLabel() :
|
||||||
|
m_numberingType(NumberingStyle::None),
|
||||||
|
m_prefix(),
|
||||||
|
m_pageIndex(0),
|
||||||
|
m_startNumber(0)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit inline PDFPageLabel(NumberingStyle numberingType, const QString& prefix, PDFInteger pageIndex, PDFInteger startNumber) :
|
||||||
|
m_numberingType(numberingType),
|
||||||
|
m_prefix(prefix),
|
||||||
|
m_pageIndex(pageIndex),
|
||||||
|
m_startNumber(startNumber)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Comparison operator, works only with page indices (because they should be unique)
|
||||||
|
bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; }
|
||||||
|
|
||||||
|
/// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10
|
||||||
|
static PDFPageLabel parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object);
|
||||||
|
|
||||||
|
private:
|
||||||
|
NumberingStyle m_numberingType;
|
||||||
|
QString m_prefix;
|
||||||
|
PDFInteger m_pageIndex;
|
||||||
|
PDFInteger m_startNumber;
|
||||||
|
};
|
||||||
|
|
||||||
class PDFViewerPreferences
|
class PDFViewerPreferences
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -155,6 +201,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
PDFViewerPreferences m_viewerPreferences;
|
PDFViewerPreferences m_viewerPreferences;
|
||||||
|
std::vector<PDFPageLabel> m_pageLabels;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -41,6 +41,16 @@ static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown"
|
||||||
void PDFDocument::init()
|
void PDFDocument::init()
|
||||||
{
|
{
|
||||||
initInfo();
|
initInfo();
|
||||||
|
|
||||||
|
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||||
|
|
||||||
|
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||||
|
Q_ASSERT(trailerDictionary.isDictionary());
|
||||||
|
|
||||||
|
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||||
|
Q_ASSERT(dictionary);
|
||||||
|
|
||||||
|
m_catalog = PDFCatalog::parse(getObject(dictionary->get("Root")), this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PDFDocument::initInfo()
|
void PDFDocument::initInfo()
|
||||||
|
@ -160,4 +170,26 @@ const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const
|
||||||
|
{
|
||||||
|
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||||
|
if (dereferencedObject.isInt())
|
||||||
|
{
|
||||||
|
return dereferencedObject.getInteger();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object, const QString& defaultValue) const
|
||||||
|
{
|
||||||
|
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||||
|
if (dereferencedObject.isString())
|
||||||
|
{
|
||||||
|
return PDFEncoding::convertTextString(dereferencedObject.getString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -21,12 +21,14 @@
|
||||||
|
|
||||||
#include "pdfglobal.h"
|
#include "pdfglobal.h"
|
||||||
#include "pdfobject.h"
|
#include "pdfobject.h"
|
||||||
|
#include "pdfcatalog.h"
|
||||||
|
|
||||||
#include <QtCore>
|
#include <QtCore>
|
||||||
#include <QDateTime>
|
#include <QDateTime>
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
class PDFDocument;
|
||||||
|
|
||||||
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
|
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
|
||||||
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
|
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
|
||||||
|
@ -74,6 +76,56 @@ private:
|
||||||
PDFObject m_trailerDictionary;
|
PDFObject m_trailerDictionary;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Loads data from the object contained in the PDF document, such as integers,
|
||||||
|
/// bools, ... This object has two sets of functions - first one with default values,
|
||||||
|
/// then if object with valid data is not found, default value is used, and second one,
|
||||||
|
/// without default value, if valid data are not found, then exception is thrown.
|
||||||
|
/// This class uses Decorator design pattern.
|
||||||
|
class PDFDocumentDataLoaderDecorator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_document(document) { }
|
||||||
|
inline ~PDFDocumentDataLoaderDecorator() = default;
|
||||||
|
|
||||||
|
/// Reads an integer from the object, if it is possible.
|
||||||
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||||
|
/// \param defaultValue Default value
|
||||||
|
PDFInteger readInteger(const PDFObject& object, PDFInteger defaultValue) const;
|
||||||
|
|
||||||
|
/// Reads a text string from the object, if it is possible.
|
||||||
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||||
|
/// \param defaultValue Default value
|
||||||
|
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
|
||||||
|
|
||||||
|
/// Reads enum from name object, if it is possible.
|
||||||
|
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||||
|
/// \param begin Begin of the enum search array
|
||||||
|
/// \param end End of the enum search array
|
||||||
|
/// \param default value Default value
|
||||||
|
template<typename Enum, typename Iterator>
|
||||||
|
Enum readEnumByName(const PDFObject& object, Iterator begin, Iterator end, Enum defaultValue) const
|
||||||
|
{
|
||||||
|
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||||
|
if (dereferencedObject.isName())
|
||||||
|
{
|
||||||
|
QByteArray name = dereferencedObject.getString();
|
||||||
|
|
||||||
|
for (Iterator it = begin; it != end; ++it)
|
||||||
|
{
|
||||||
|
if (name == (*it).first)
|
||||||
|
{
|
||||||
|
return (*it).second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const PDFDocument* m_document;
|
||||||
|
};
|
||||||
|
|
||||||
/// PDF document main class.
|
/// PDF document main class.
|
||||||
class PDFDocument
|
class PDFDocument
|
||||||
{
|
{
|
||||||
|
@ -139,8 +191,13 @@ private:
|
||||||
|
|
||||||
/// Info about the PDF document
|
/// Info about the PDF document
|
||||||
Info m_info;
|
Info m_info;
|
||||||
|
|
||||||
|
/// Catalog object
|
||||||
|
PDFCatalog m_catalog;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Implementation
|
||||||
|
|
||||||
inline
|
inline
|
||||||
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
|
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
// Copyright (C) 2018 Jakub Melka
|
||||||
|
//
|
||||||
|
// This file is part of PdfForQt.
|
||||||
|
//
|
||||||
|
// PdfForQt is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// PdfForQt is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef PDFNUMBERTREELOADER_H
|
||||||
|
#define PDFNUMBERTREELOADER_H
|
||||||
|
|
||||||
|
#include "pdfdocument.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace pdf
|
||||||
|
{
|
||||||
|
|
||||||
|
/// This class can load a number tree into the array
|
||||||
|
template<typename Type>
|
||||||
|
class PDFNumberTreeLoader
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit PDFNumberTreeLoader() = delete;
|
||||||
|
|
||||||
|
using Objects = std::vector<Type>;
|
||||||
|
|
||||||
|
/// Parses the number tree and loads its items into the array. Some errors are ignored,
|
||||||
|
/// e.g. when kid is null. Type must contain methods to load object array.
|
||||||
|
static Objects parse(const PDFDocument* document, const PDFObject& root)
|
||||||
|
{
|
||||||
|
Objects result;
|
||||||
|
|
||||||
|
// First, try to load items from the tree into the array
|
||||||
|
parseImpl(result, document, root);
|
||||||
|
|
||||||
|
// Array may not be sorted. Sort it using comparison operator for Type.
|
||||||
|
std::stable_sort(result.begin(), result.end());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void parseImpl(Objects& objects, const PDFDocument* document, const PDFObject& root)
|
||||||
|
{
|
||||||
|
const PDFObject& dereferencedRoot = document->getObject(root);
|
||||||
|
if (dereferencedRoot.isDictionary())
|
||||||
|
{
|
||||||
|
const PDFDictionary* dictionary = dereferencedRoot.getDictionary();
|
||||||
|
|
||||||
|
// First, load the objects into the array
|
||||||
|
const PDFObject& numberedItems = document->getObject(dictionary->get("Nums"));
|
||||||
|
if (numberedItems.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* numberedItemsArray = numberedItems.getArray();
|
||||||
|
const size_t count = numberedItemsArray->getCount() / 2;
|
||||||
|
objects.reserve(objects.size() + count);
|
||||||
|
for (size_t i = 0; i < count; ++i)
|
||||||
|
{
|
||||||
|
const size_t numberIndex = 2 * i;
|
||||||
|
const size_t valueIndex = 2 * i + 1;
|
||||||
|
|
||||||
|
const PDFObject& number = document->getObject(numberedItemsArray->getItem(numberIndex));
|
||||||
|
if (!number.isInt())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
objects.emplace_back(Type::parse(number.getInteger(), document, numberedItemsArray->getItem(valueIndex)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then, follow the kids
|
||||||
|
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
|
||||||
|
if (kids.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* kidsArray = kids.getArray();
|
||||||
|
const size_t count = kidsArray->getCount();
|
||||||
|
for (size_t i = 0; i < count; ++i)
|
||||||
|
{
|
||||||
|
parseImpl(objects, document, kidsArray->getItem(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pdf
|
||||||
|
|
||||||
|
#endif // PDFNUMBERTREELOADER_H
|
Loading…
Reference in New Issue