Reading pages

This commit is contained in:
Jakub Melka 2018-12-26 18:00:17 +01:00
parent 5db77f810a
commit 9239d663e6
7 changed files with 363 additions and 2 deletions

View File

@ -43,7 +43,8 @@ SOURCES += \
sources/pdfxreftable.cpp \
sources/pdfvisitor.cpp \
sources/pdfencoding.cpp \
sources/pdfcatalog.cpp
sources/pdfcatalog.cpp \
sources/pdfpage.cpp
HEADERS += \
sources/pdfobject.h \
@ -57,7 +58,8 @@ HEADERS += \
sources/pdfvisitor.h \
sources/pdfencoding.h \
sources/pdfcatalog.h \
sources/pdfnumbertreeloader.h
sources/pdfnumbertreeloader.h \
sources/pdfpage.h
unix {
target.path = /usr/lib

View File

@ -54,6 +54,7 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
PDFCatalog catalogObject;
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
catalogObject.m_pages = PDFPage::parse(document, catalogDictionary->get("Pages"));
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
return catalogObject;
}

View File

@ -19,6 +19,7 @@
#define PDFCATALOG_H
#include "pdfobject.h"
#include "pdfpage.h"
#include <QtCore>
@ -201,6 +202,7 @@ public:
private:
PDFViewerPreferences m_viewerPreferences;
std::vector<PDFPage> m_pages;
std::vector<PDFPageLabel> m_pageLabels;
};

View File

@ -192,4 +192,42 @@ QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object,
return defaultValue;
}
QRectF PDFDocumentDataLoaderDecorator::readRectangle(const PDFObject& object, const QRectF& defaultValue) const
{
const PDFObject& dereferencedObject = m_document->getObject(object);
if (dereferencedObject.isArray())
{
const PDFArray* array = dereferencedObject.getArray();
if (array->getCount() == 4)
{
std::array<PDFReal, 4> items;
for (size_t i = 0; i < 4; ++i)
{
const PDFObject& object = m_document->getObject(array->getItem(i));
if (object.isReal())
{
items[i] = object.getReal();
}
else if (object.isInt())
{
items[i] = object.getInteger();
}
else
{
return defaultValue;
}
}
const PDFReal xMin = qMin(items[0], items[2]);
const PDFReal xMax = qMax(items[0], items[2]);
const PDFReal yMin = qMin(items[1], items[3]);
const PDFReal yMax = qMax(items[1], items[3]);
return QRectF(xMin, yMin, xMax - xMin, yMax - yMin);
}
}
return defaultValue;
}
} // namespace pdf

View File

@ -97,6 +97,11 @@ public:
/// \param defaultValue Default value
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
/// Reads a rectangle from the object, if it is possible.
/// \param object Object, can be an indirect reference to object (it is dereferenced)
/// \param defaultValue Default value
QRectF readRectangle(const PDFObject& object, const QRectF& defaultValue) const;
/// Reads enum from name object, if it is possible.
/// \param object Object, can be an indirect reference to object (it is dereferenced)
/// \param begin Begin of the enum search array

View File

@ -0,0 +1,200 @@
// Copyright (C) 2018 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfpage.h"
#include "pdfdocument.h"
#include "pdfparser.h"
namespace pdf
{
PDFPageInheritableAttributes PDFPageInheritableAttributes::parse(const PDFPageInheritableAttributes& templateAttributes,
const PDFObject& dictionary,
const PDFDocument* document)
{
PDFPageInheritableAttributes result(templateAttributes);
const PDFObject& dereferencedDictionary = document->getObject(dictionary);
if (dereferencedDictionary.isDictionary())
{
PDFDocumentDataLoaderDecorator loader(document);
const PDFDictionary* dictionary = dereferencedDictionary.getDictionary();
if (dictionary->hasKey("MediaBox"))
{
result.m_mediaBox = loader.readRectangle(dictionary->get("MediaBox"), result.getMediaBox());
}
if (dictionary->hasKey("CropBox"))
{
result.m_cropBox = loader.readRectangle(dictionary->get("CropBox"), result.getCropBox());
}
if (dictionary->hasKey("Resources"))
{
result.m_resources = dictionary->get("Resources");
}
if (dictionary->hasKey("Rotate"))
{
PDFInteger rotation = loader.readInteger(dictionary->get("Rotate"), 0);
// PDF specification says, that angle can be multiple of 90, so we can have here
// for example, 450° (90° * 5), or even negative angles. We must get rid of them.
PDFInteger fullCircles = rotation / 360;
if (fullCircles != 0)
{
rotation = rotation - fullCircles * 360;
}
switch (rotation)
{
case 0:
{
result.m_pageRotation = PageRotation::None;
break;
}
case 90:
{
result.m_pageRotation = PageRotation::Rotate90;
break;
}
case 180:
{
result.m_pageRotation = PageRotation::Rotate180;
break;
}
case 270:
{
result.m_pageRotation = PageRotation::Rotate270;
break;
}
default:
{
throw PDFParserException(PDFTranslationContext::tr("Invalid page rotation."));
}
}
}
}
return result;
}
PageRotation PDFPageInheritableAttributes::getPageRotation() const
{
if (m_pageRotation)
{
return m_pageRotation.value();
}
return PageRotation::None;
}
std::vector<PDFPage> PDFPage::parse(const PDFDocument* document, const PDFObject& root)
{
std::vector<PDFPage> result;
std::set<PDFObjectReference> visited;
parseImpl(result, visited, PDFPageInheritableAttributes(), root, document);
return result;
}
void PDFPage::parseImpl(std::vector<PDFPage>& pages,
std::set<PDFObjectReference>& visitedReferences,
const PDFPageInheritableAttributes& templateAttributes,
const PDFObject& root,
const PDFDocument* document)
{
// Are we in internal node, or leaf (page object)?
const PDFObject& dereferenced = document->getObject(root);
if (dereferenced.isDictionary())
{
const PDFDictionary* dictionary = dereferenced.getDictionary();
const PDFObject& typeObject = document->getObject(dictionary->get("Type"));
if (typeObject.isName())
{
PDFPageInheritableAttributes currentInheritableAttributes = PDFPageInheritableAttributes::parse(templateAttributes, root, document);
QByteArray typeString = typeObject.getString();
if (typeString == "Pages")
{
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
if (kids.isArray())
{
const PDFArray* kidsArray = kids.getArray();
const size_t count = kidsArray->getCount();
for (size_t i = 0; i < count; ++i)
{
const PDFObject& kid = kidsArray->getItem(i);
// Check reference
if (!kid.isReference())
{
throw PDFParserException(PDFTranslationContext::tr("Expected valid kids in page tree."));
}
// Check cycles
if (visitedReferences.count(kid.getReference()))
{
throw PDFParserException(PDFTranslationContext::tr("Detected cycles in page tree."));
}
visitedReferences.insert(kid.getReference());
parseImpl(pages, visitedReferences, currentInheritableAttributes, kid, document);
}
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Expected valid kids in page tree."));
}
}
else if (typeString == "Page")
{
PDFPage page;
page.m_mediaBox = currentInheritableAttributes.getMediaBox();
page.m_cropBox = currentInheritableAttributes.getCropBox();
page.m_resources = currentInheritableAttributes.getResources();
page.m_pageRotation = currentInheritableAttributes.getPageRotation();
if (!page.m_cropBox.isValid())
{
page.m_cropBox = page.m_mediaBox;
}
PDFDocumentDataLoaderDecorator loader(document);
page.m_bleedBox = loader.readRectangle(dictionary->get("BleedBox"), page.getCropBox());
page.m_trimBox = loader.readRectangle(dictionary->get("TrimBox"), page.getCropBox());
page.m_artBox = loader.readRectangle(dictionary->get("ArtBox"), page.getCropBox());
page.m_contents = document->getObject(dictionary->get("Contents"));
pages.emplace_back(std::move(page));
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Expected valid type item in page tree."));
}
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Expected valid type item in page tree."));
}
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Expected dictionary in page tree."));
}
}
} // namespace pdf

View File

@ -0,0 +1,113 @@
// Copyright (C) 2018 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFPAGE_H
#define PDFPAGE_H
#include "pdfobject.h"
#include <QRectF>
#include <set>
#include <optional>
namespace pdf
{
class PDFDocument;
/// This enum represents number of degree, which should be page rotated CLOCKWISE,
/// when being displayed or printed.
enum class PageRotation
{
None,
Rotate90,
Rotate180,
Rotate270
};
/// This class represents attributes, which are inheritable. Also allows merging from
/// parents.
class PDFPageInheritableAttributes
{
public:
explicit inline PDFPageInheritableAttributes() = default;
/// Parses inheritable attributes from the page tree node
/// \param templateAttributes Template attributes
/// \param dictionary Dictionary, from which the data will be read
/// \param document Document owning this data
static PDFPageInheritableAttributes parse(const PDFPageInheritableAttributes& templateAttributes, const PDFObject& dictionary, const PDFDocument* document);
const QRectF& getMediaBox() const { return m_mediaBox; }
const QRectF& getCropBox() const { return m_cropBox; }
PageRotation getPageRotation() const;
const PDFObject& getResources() const { return m_resources; }
private:
QRectF m_mediaBox;
QRectF m_cropBox;
std::optional<PageRotation> m_pageRotation;
PDFObject m_resources;
};
/// Object representing page in PDF document. Contains different page properties, such as
/// media box, crop box, rotation, etc. and also page content, resources.
class PDFPage
{
public:
explicit PDFPage() = default;
/// Parses the page tree. If error occurs, then exception is thrown.
/// \param document Document owning this tree
/// \param root Root object of page tree
static std::vector<PDFPage> parse(const PDFDocument* document, const PDFObject& root);
const QRectF& getMediaBox() const { return m_mediaBox; }
const QRectF& getCropBox() const { return m_cropBox; }
const QRectF& getBleedBox() const { return m_bleedBox; }
const QRectF& getTrimBox() const { return m_trimBox; }
const QRectF& getArtBox() const { return m_artBox; }
PageRotation getPageRotation() const { return m_pageRotation; }
const PDFObject& getResources() const { return m_resources; }
const PDFObject& getContents() const { return m_contents; }
private:
/// Parses the page tree (implementation). If error occurs, then exception is thrown.
/// \param pages Page array. Pages are inserted into this array
/// \param visitedReferences Visited references (to check cycles in page tree and avoid hangup)
/// \param templateAttributes Template attributes (inheritable attributes defined in parent)
/// \param root Root object of page tree
/// \param document Document owning this tree
static void parseImpl(std::vector<PDFPage>& pages,
std::set<PDFObjectReference>& visitedReferences,
const PDFPageInheritableAttributes& templateAttributes,
const PDFObject& root,
const PDFDocument* document);
QRectF m_mediaBox;
QRectF m_cropBox;
QRectF m_bleedBox;
QRectF m_trimBox;
QRectF m_artBox;
PageRotation m_pageRotation = PageRotation::None;
PDFObject m_resources;
PDFObject m_contents;
};
} // namespace pdf
#endif // PDFPAGE_H