mirror of https://github.com/JakubMelka/PDF4QT.git
Reading pages
This commit is contained in:
parent
5db77f810a
commit
9239d663e6
|
@ -43,7 +43,8 @@ SOURCES += \
|
|||
sources/pdfxreftable.cpp \
|
||||
sources/pdfvisitor.cpp \
|
||||
sources/pdfencoding.cpp \
|
||||
sources/pdfcatalog.cpp
|
||||
sources/pdfcatalog.cpp \
|
||||
sources/pdfpage.cpp
|
||||
|
||||
HEADERS += \
|
||||
sources/pdfobject.h \
|
||||
|
@ -57,7 +58,8 @@ HEADERS += \
|
|||
sources/pdfvisitor.h \
|
||||
sources/pdfencoding.h \
|
||||
sources/pdfcatalog.h \
|
||||
sources/pdfnumbertreeloader.h
|
||||
sources/pdfnumbertreeloader.h \
|
||||
sources/pdfpage.h
|
||||
|
||||
unix {
|
||||
target.path = /usr/lib
|
||||
|
|
|
@ -54,6 +54,7 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
|||
|
||||
PDFCatalog catalogObject;
|
||||
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
||||
catalogObject.m_pages = PDFPage::parse(document, catalogDictionary->get("Pages"));
|
||||
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
|
||||
return catalogObject;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#define PDFCATALOG_H
|
||||
|
||||
#include "pdfobject.h"
|
||||
#include "pdfpage.h"
|
||||
|
||||
#include <QtCore>
|
||||
|
||||
|
@ -201,6 +202,7 @@ public:
|
|||
|
||||
private:
|
||||
PDFViewerPreferences m_viewerPreferences;
|
||||
std::vector<PDFPage> m_pages;
|
||||
std::vector<PDFPageLabel> m_pageLabels;
|
||||
};
|
||||
|
||||
|
|
|
@ -192,4 +192,42 @@ QString PDFDocumentDataLoaderDecorator::readTextString(const PDFObject& object,
|
|||
return defaultValue;
|
||||
}
|
||||
|
||||
QRectF PDFDocumentDataLoaderDecorator::readRectangle(const PDFObject& object, const QRectF& defaultValue) const
|
||||
{
|
||||
const PDFObject& dereferencedObject = m_document->getObject(object);
|
||||
if (dereferencedObject.isArray())
|
||||
{
|
||||
const PDFArray* array = dereferencedObject.getArray();
|
||||
if (array->getCount() == 4)
|
||||
{
|
||||
std::array<PDFReal, 4> items;
|
||||
for (size_t i = 0; i < 4; ++i)
|
||||
{
|
||||
const PDFObject& object = m_document->getObject(array->getItem(i));
|
||||
if (object.isReal())
|
||||
{
|
||||
items[i] = object.getReal();
|
||||
}
|
||||
else if (object.isInt())
|
||||
{
|
||||
items[i] = object.getInteger();
|
||||
}
|
||||
else
|
||||
{
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
const PDFReal xMin = qMin(items[0], items[2]);
|
||||
const PDFReal xMax = qMax(items[0], items[2]);
|
||||
const PDFReal yMin = qMin(items[1], items[3]);
|
||||
const PDFReal yMax = qMax(items[1], items[3]);
|
||||
|
||||
return QRectF(xMin, yMin, xMax - xMin, yMax - yMin);
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -97,6 +97,11 @@ public:
|
|||
/// \param defaultValue Default value
|
||||
QString readTextString(const PDFObject& object, const QString& defaultValue) const;
|
||||
|
||||
/// Reads a rectangle from the object, if it is possible.
|
||||
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||
/// \param defaultValue Default value
|
||||
QRectF readRectangle(const PDFObject& object, const QRectF& defaultValue) const;
|
||||
|
||||
/// Reads enum from name object, if it is possible.
|
||||
/// \param object Object, can be an indirect reference to object (it is dereferenced)
|
||||
/// \param begin Begin of the enum search array
|
||||
|
|
|
@ -0,0 +1,200 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfpage.h"
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfparser.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
PDFPageInheritableAttributes PDFPageInheritableAttributes::parse(const PDFPageInheritableAttributes& templateAttributes,
|
||||
const PDFObject& dictionary,
|
||||
const PDFDocument* document)
|
||||
{
|
||||
PDFPageInheritableAttributes result(templateAttributes);
|
||||
|
||||
const PDFObject& dereferencedDictionary = document->getObject(dictionary);
|
||||
if (dereferencedDictionary.isDictionary())
|
||||
{
|
||||
PDFDocumentDataLoaderDecorator loader(document);
|
||||
|
||||
const PDFDictionary* dictionary = dereferencedDictionary.getDictionary();
|
||||
if (dictionary->hasKey("MediaBox"))
|
||||
{
|
||||
result.m_mediaBox = loader.readRectangle(dictionary->get("MediaBox"), result.getMediaBox());
|
||||
}
|
||||
if (dictionary->hasKey("CropBox"))
|
||||
{
|
||||
result.m_cropBox = loader.readRectangle(dictionary->get("CropBox"), result.getCropBox());
|
||||
}
|
||||
if (dictionary->hasKey("Resources"))
|
||||
{
|
||||
result.m_resources = dictionary->get("Resources");
|
||||
}
|
||||
if (dictionary->hasKey("Rotate"))
|
||||
{
|
||||
PDFInteger rotation = loader.readInteger(dictionary->get("Rotate"), 0);
|
||||
|
||||
// PDF specification says, that angle can be multiple of 90, so we can have here
|
||||
// for example, 450° (90° * 5), or even negative angles. We must get rid of them.
|
||||
PDFInteger fullCircles = rotation / 360;
|
||||
if (fullCircles != 0)
|
||||
{
|
||||
rotation = rotation - fullCircles * 360;
|
||||
}
|
||||
|
||||
switch (rotation)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
result.m_pageRotation = PageRotation::None;
|
||||
break;
|
||||
}
|
||||
case 90:
|
||||
{
|
||||
result.m_pageRotation = PageRotation::Rotate90;
|
||||
break;
|
||||
}
|
||||
case 180:
|
||||
{
|
||||
result.m_pageRotation = PageRotation::Rotate180;
|
||||
break;
|
||||
}
|
||||
case 270:
|
||||
{
|
||||
result.m_pageRotation = PageRotation::Rotate270;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Invalid page rotation."));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PageRotation PDFPageInheritableAttributes::getPageRotation() const
|
||||
{
|
||||
if (m_pageRotation)
|
||||
{
|
||||
return m_pageRotation.value();
|
||||
}
|
||||
return PageRotation::None;
|
||||
}
|
||||
|
||||
std::vector<PDFPage> PDFPage::parse(const PDFDocument* document, const PDFObject& root)
|
||||
{
|
||||
std::vector<PDFPage> result;
|
||||
std::set<PDFObjectReference> visited;
|
||||
parseImpl(result, visited, PDFPageInheritableAttributes(), root, document);
|
||||
return result;
|
||||
}
|
||||
|
||||
void PDFPage::parseImpl(std::vector<PDFPage>& pages,
|
||||
std::set<PDFObjectReference>& visitedReferences,
|
||||
const PDFPageInheritableAttributes& templateAttributes,
|
||||
const PDFObject& root,
|
||||
const PDFDocument* document)
|
||||
{
|
||||
// Are we in internal node, or leaf (page object)?
|
||||
const PDFObject& dereferenced = document->getObject(root);
|
||||
|
||||
if (dereferenced.isDictionary())
|
||||
{
|
||||
const PDFDictionary* dictionary = dereferenced.getDictionary();
|
||||
const PDFObject& typeObject = document->getObject(dictionary->get("Type"));
|
||||
if (typeObject.isName())
|
||||
{
|
||||
PDFPageInheritableAttributes currentInheritableAttributes = PDFPageInheritableAttributes::parse(templateAttributes, root, document);
|
||||
|
||||
QByteArray typeString = typeObject.getString();
|
||||
if (typeString == "Pages")
|
||||
{
|
||||
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
|
||||
if (kids.isArray())
|
||||
{
|
||||
const PDFArray* kidsArray = kids.getArray();
|
||||
const size_t count = kidsArray->getCount();
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const PDFObject& kid = kidsArray->getItem(i);
|
||||
|
||||
// Check reference
|
||||
if (!kid.isReference())
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected valid kids in page tree."));
|
||||
}
|
||||
|
||||
// Check cycles
|
||||
if (visitedReferences.count(kid.getReference()))
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Detected cycles in page tree."));
|
||||
}
|
||||
|
||||
visitedReferences.insert(kid.getReference());
|
||||
parseImpl(pages, visitedReferences, currentInheritableAttributes, kid, document);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected valid kids in page tree."));
|
||||
}
|
||||
}
|
||||
else if (typeString == "Page")
|
||||
{
|
||||
PDFPage page;
|
||||
|
||||
page.m_mediaBox = currentInheritableAttributes.getMediaBox();
|
||||
page.m_cropBox = currentInheritableAttributes.getCropBox();
|
||||
page.m_resources = currentInheritableAttributes.getResources();
|
||||
page.m_pageRotation = currentInheritableAttributes.getPageRotation();
|
||||
|
||||
if (!page.m_cropBox.isValid())
|
||||
{
|
||||
page.m_cropBox = page.m_mediaBox;
|
||||
}
|
||||
|
||||
PDFDocumentDataLoaderDecorator loader(document);
|
||||
page.m_bleedBox = loader.readRectangle(dictionary->get("BleedBox"), page.getCropBox());
|
||||
page.m_trimBox = loader.readRectangle(dictionary->get("TrimBox"), page.getCropBox());
|
||||
page.m_artBox = loader.readRectangle(dictionary->get("ArtBox"), page.getCropBox());
|
||||
page.m_contents = document->getObject(dictionary->get("Contents"));
|
||||
|
||||
pages.emplace_back(std::move(page));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected valid type item in page tree."));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected valid type item in page tree."));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(PDFTranslationContext::tr("Expected dictionary in page tree."));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFPAGE_H
|
||||
#define PDFPAGE_H
|
||||
|
||||
#include "pdfobject.h"
|
||||
|
||||
#include <QRectF>
|
||||
|
||||
#include <set>
|
||||
#include <optional>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDocument;
|
||||
|
||||
/// This enum represents number of degree, which should be page rotated CLOCKWISE,
|
||||
/// when being displayed or printed.
|
||||
enum class PageRotation
|
||||
{
|
||||
None,
|
||||
Rotate90,
|
||||
Rotate180,
|
||||
Rotate270
|
||||
};
|
||||
|
||||
/// This class represents attributes, which are inheritable. Also allows merging from
|
||||
/// parents.
|
||||
class PDFPageInheritableAttributes
|
||||
{
|
||||
public:
|
||||
explicit inline PDFPageInheritableAttributes() = default;
|
||||
|
||||
/// Parses inheritable attributes from the page tree node
|
||||
/// \param templateAttributes Template attributes
|
||||
/// \param dictionary Dictionary, from which the data will be read
|
||||
/// \param document Document owning this data
|
||||
static PDFPageInheritableAttributes parse(const PDFPageInheritableAttributes& templateAttributes, const PDFObject& dictionary, const PDFDocument* document);
|
||||
|
||||
const QRectF& getMediaBox() const { return m_mediaBox; }
|
||||
const QRectF& getCropBox() const { return m_cropBox; }
|
||||
PageRotation getPageRotation() const;
|
||||
const PDFObject& getResources() const { return m_resources; }
|
||||
|
||||
private:
|
||||
QRectF m_mediaBox;
|
||||
QRectF m_cropBox;
|
||||
std::optional<PageRotation> m_pageRotation;
|
||||
PDFObject m_resources;
|
||||
};
|
||||
|
||||
/// Object representing page in PDF document. Contains different page properties, such as
|
||||
/// media box, crop box, rotation, etc. and also page content, resources.
|
||||
class PDFPage
|
||||
{
|
||||
public:
|
||||
explicit PDFPage() = default;
|
||||
|
||||
/// Parses the page tree. If error occurs, then exception is thrown.
|
||||
/// \param document Document owning this tree
|
||||
/// \param root Root object of page tree
|
||||
static std::vector<PDFPage> parse(const PDFDocument* document, const PDFObject& root);
|
||||
|
||||
const QRectF& getMediaBox() const { return m_mediaBox; }
|
||||
const QRectF& getCropBox() const { return m_cropBox; }
|
||||
const QRectF& getBleedBox() const { return m_bleedBox; }
|
||||
const QRectF& getTrimBox() const { return m_trimBox; }
|
||||
const QRectF& getArtBox() const { return m_artBox; }
|
||||
PageRotation getPageRotation() const { return m_pageRotation; }
|
||||
const PDFObject& getResources() const { return m_resources; }
|
||||
const PDFObject& getContents() const { return m_contents; }
|
||||
|
||||
private:
|
||||
/// Parses the page tree (implementation). If error occurs, then exception is thrown.
|
||||
/// \param pages Page array. Pages are inserted into this array
|
||||
/// \param visitedReferences Visited references (to check cycles in page tree and avoid hangup)
|
||||
/// \param templateAttributes Template attributes (inheritable attributes defined in parent)
|
||||
/// \param root Root object of page tree
|
||||
/// \param document Document owning this tree
|
||||
static void parseImpl(std::vector<PDFPage>& pages,
|
||||
std::set<PDFObjectReference>& visitedReferences,
|
||||
const PDFPageInheritableAttributes& templateAttributes,
|
||||
const PDFObject& root,
|
||||
const PDFDocument* document);
|
||||
|
||||
QRectF m_mediaBox;
|
||||
QRectF m_cropBox;
|
||||
QRectF m_bleedBox;
|
||||
QRectF m_trimBox;
|
||||
QRectF m_artBox;
|
||||
PageRotation m_pageRotation = PageRotation::None;
|
||||
PDFObject m_resources;
|
||||
PDFObject m_contents;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFPAGE_H
|
Loading…
Reference in New Issue