diff --git a/Pdf4QtLib/sources/pdfencoding.cpp b/Pdf4QtLib/sources/pdfencoding.cpp index d4242ff..4732259 100644 --- a/Pdf4QtLib/sources/pdfencoding.cpp +++ b/Pdf4QtLib/sources/pdfencoding.cpp @@ -2413,6 +2413,28 @@ QString PDFEncoding::convertSmartFromByteStringToUnicode(const QByteArray& strea return QString::fromLatin1(stream.toHex()).toUpper(); } +QString PDFEncoding::convertSmartFromByteStringToRepresentableQString(const QByteArray& stream) +{ + if (stream.startsWith("D:")) + { + QDateTime dateTime = convertToDateTime(stream); + if (dateTime.isValid()) + { + return dateTime.toString(Qt::TextDate); + } + } + + bool isBinary = false; + QString text = convertSmartFromByteStringToUnicode(stream, &isBinary); + + if (!isBinary) + { + return text; + } + + return stream.toPercentEncoding(" ", QByteArray(), '%'); +} + QString PDFEncoding::getEncodingCharacters(Encoding encoding) { QString string; diff --git a/Pdf4QtLib/sources/pdfencoding.h b/Pdf4QtLib/sources/pdfencoding.h index 4a7ad89..5779b12 100644 --- a/Pdf4QtLib/sources/pdfencoding.h +++ b/Pdf4QtLib/sources/pdfencoding.h @@ -117,10 +117,16 @@ public: /// Function checks if stream can be converted to unicode by heuristic /// way, it is not always reliable. /// \param stream Stream - /// \param isBinary If specified, it is set to true if conversion failed + /// \param[out] isBinary If specified, it is set to true if conversion failed /// \returns Unicode string or string converted to hexadecimal representation static QString convertSmartFromByteStringToUnicode(const QByteArray& stream, bool* isBinary); + /// Tries to convert stream to representable string. If it cannot be done, + /// percentage encoding is used. + /// \param stream Stream + /// \returns Unicode string or string converted to percentage representation + static QString convertSmartFromByteStringToRepresentableQString(const QByteArray& stream); + /// Returns all characters of the given encoding /// \param encoding Encoding /// \returns All characters reprezentable by encoding. diff --git a/Pdf4QtLib/sources/pdfobjectutils.cpp b/Pdf4QtLib/sources/pdfobjectutils.cpp index 61a1f68..40345a8 100644 --- a/Pdf4QtLib/sources/pdfobjectutils.cpp +++ b/Pdf4QtLib/sources/pdfobjectutils.cpp @@ -15,7 +15,6 @@ // You should have received a copy of the GNU Lesser General Public License // along with Pdf4Qt. If not, see . - #include "pdfobjectutils.h" #include "pdfvisitor.h" @@ -210,6 +209,16 @@ std::set PDFObjectUtils::getReferences(const std::vector PDFObjectUtils::getDirectReferences(const PDFObject& object) +{ + std::set references; + + PDFCollectReferencesVisitor collectReferencesVisitor(references); + object.accept(&collectReferencesVisitor); + + return references; +} + PDFObject PDFObjectUtils::replaceReferences(const PDFObject& object, const std::map& referenceMapping) { PDFReplaceReferencesVisitor replaceReferencesVisitor(referenceMapping); @@ -217,4 +226,162 @@ PDFObject PDFObjectUtils::replaceReferences(const PDFObject& object, const std:: return replaceReferencesVisitor.getObject(); } +void PDFObjectClassifier::classify(const PDFDocument* document) +{ + // Clear old classification, if it exist + m_classification.clear(); + m_allTypesUsed = None; + + if (!document) + { + return; + } + + PDFDocumentDataLoaderDecorator loader(document); + const PDFObjectStorage& storage = document->getStorage(); + const PDFObjectStorage::PDFObjects& objects = storage.getObjects(); + + m_classification.resize(objects.size(), Classification()); + for (size_t i = 0; i < objects.size(); ++i) + { + PDFObjectReference reference(i, objects[i].generation); + m_classification[i].reference = reference; + } + + // First, iterate trough pages of the document + const PDFCatalog* catalog = document->getCatalog(); + const size_t pageCount = catalog->getPageCount(); + for (size_t i = 0; i < pageCount; ++i) + { + const PDFPage* page = catalog->getPage(i); + + if (!page) + { + continue; + } + + // Handle page itself + if (hasObject(page->getPageReference())) + { + mark(page->getPageReference(), Page); + } + + // Handle annotations + for (const PDFObjectReference& reference : page->getAnnotations()) + { + if (hasObject(reference)) + { + mark(reference, Annotation); + } + } + + // Handle contents + PDFObject pageObject = document->getObjectByReference(page->getPageReference()); + Q_ASSERT(pageObject.isDictionary()); + + const PDFDictionary* dictionary = pageObject.getDictionary(); + const PDFObject& contentsObject = dictionary->get("Contents"); + if (contentsObject.isReference()) + { + mark(contentsObject.getReference(), ContentStream); + } + + // Handle resources + if (const PDFDictionary* resourcesDictionary = document->getDictionaryFromObject(dictionary->get("Resources"))) + { + markDictionary(document, resourcesDictionary->get("ExtGState"), GraphicState); + markDictionary(document, resourcesDictionary->get("ColorSpace"), ColorSpace); + markDictionary(document, resourcesDictionary->get("Pattern"), Pattern); + markDictionary(document, resourcesDictionary->get("Shading"), Shading); + markDictionary(document, resourcesDictionary->get("Font"), Font); + + if (const PDFDictionary* xobjectDictionary = document->getDictionaryFromObject(resourcesDictionary->get("XObject"))) + { + const size_t count = xobjectDictionary->getCount(); + for (size_t i = 0; i < count; ++i) + { + const PDFObject& item = xobjectDictionary->getValue(i); + if (item.isReference() && hasObject(item.getReference())) + { + if (const PDFDictionary* xobjectItemDictionary = document->getDictionaryFromObject(item)) + { + QByteArray subtype = loader.readNameFromDictionary(xobjectItemDictionary, "Subtype"); + + if (subtype == "Image") + { + mark(item.getReference(), Image); + } + else if (subtype == "Form") + { + mark(item.getReference(), Form); + } + } + } + } + } + } + } + + for (Classification& classification : m_classification) + { + if (const PDFDictionary* dictionary = document->getDictionaryFromObject(document->getObjectByReference(classification.reference))) + { + QByteArray typeName = loader.readNameFromDictionary(dictionary, "Type"); + if (typeName == "Action") + { + classification.types.setFlag(Action); + } + } + } + + for (const Classification& classification : m_classification) + { + m_allTypesUsed |= classification.types; + } +} + +bool PDFObjectClassifier::hasObject(PDFObjectReference reference) const +{ + return reference.isValid() && + reference.objectNumber < PDFInteger(m_classification.size()) && + m_classification[reference.objectNumber].reference == reference; +} + +std::vector PDFObjectClassifier::getObjectsByType(Type type) const +{ + std::vector result; + + for (const Classification& classification : m_classification) + { + if (classification.types.testFlag(type)) + { + result.push_back(classification.reference); + } + } + + return result; +} + +void PDFObjectClassifier::mark(PDFObjectReference reference, Type type) +{ + Q_ASSERT(hasObject(reference)); + m_classification[reference.objectNumber].types.setFlag(type, true); +} + +void PDFObjectClassifier::markDictionary(const PDFDocument* document, PDFObject object, Type type) +{ + if (const PDFDictionary* dictionary = document->getDictionaryFromObject(object)) + { + const size_t count = dictionary->getCount(); + for (size_t i = 0; i < count; ++i) + { + const PDFObject& item = dictionary->getValue(i); + if (item.isReference() && hasObject(item.getReference())) + { + mark(item.getReference(), type); + } + } + } +} + } // namespace pdf diff --git a/Pdf4QtLib/sources/pdfobjectutils.h b/Pdf4QtLib/sources/pdfobjectutils.h index 5e9c166..9585186 100644 --- a/Pdf4QtLib/sources/pdfobjectutils.h +++ b/Pdf4QtLib/sources/pdfobjectutils.h @@ -20,23 +20,30 @@ #include "pdfobject.h" +#include + #include +#include namespace pdf { class PDFObjectStorage; +class PDFDocument; /// Utilities for manipulation with objects class PDFObjectUtils { public: - /// Returns list of references referenced by \p objects. So, all references, which are present + /// Returns a list of references referenced by \p objects. So, all references, which are present /// in objects, appear in the result set, including objects, which are referenced by referenced /// objects (so, transitive closure above reference graph is returned). /// \param objects Objects /// \param storage Storage static std::set getReferences(const std::vector& objects, const PDFObjectStorage& storage); + /// Returns a list of references directly referenced from object. References itself are not followed. + static std::set getDirectReferences(const PDFObject& object); + static PDFObject replaceReferences(const PDFObject& object, const std::map& referenceMapping); private: @@ -97,6 +104,67 @@ private: bool m_locked; }; +/// Classifies objects according to their type. Some heuristic is used +/// when object type is missing or document is not well-formed. +class Pdf4QtLIBSHARED_EXPORT PDFObjectClassifier +{ +public: + + inline PDFObjectClassifier() = default; + + /// Performs object classification on a document. Old classification + /// is being cleared. + /// \param document Document + void classify(const PDFDocument* document); + + enum Type : uint32_t + { + None = 0x00000000, + Page = 0x00000001, + ContentStream = 0x00000002, + GraphicState = 0x00000004, + ColorSpace = 0x00000008, + Pattern = 0x00000010, + Shading = 0x00000020, + Image = 0x00000040, + Form = 0x00000080, + Font = 0x00000100, + Action = 0x00000200, + Annotation = 0x00000400 + }; + + Q_DECLARE_FLAGS(Types, Type) + + /// Returns true, if object with given reference exists + /// and was classified. + /// \param reference Reference + bool hasObject(PDFObjectReference reference) const; + + /// Returns true, if any object with given type is present in a document + /// \param type Object type + bool hasType(Type type) const { return m_allTypesUsed.testFlag(type); } + + /// Returns a list of objects with a given type + /// \param type Type + std::vector getObjectsByType(Type type) const; + +private: + struct Classification + { + PDFObjectReference reference; + Types types = None; + }; + + /// Marks object with a given type + void mark(PDFObjectReference reference, Type type); + + /// Marks objects in dictionary with a given type + void markDictionary(const PDFDocument* document, PDFObject object, Type type); + + std::vector m_classification; + Types m_allTypesUsed; +}; + } // namespace pdf #endif // PDFOBJECTUTILS_H diff --git a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.cpp b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.cpp index 4746e42..b255fe3 100644 --- a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.cpp +++ b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.cpp @@ -34,24 +34,69 @@ ObjectInspectorDialog::ObjectInspectorDialog(const pdf::PDFDocument* document, Q { ui->setupUi(this); + m_objectClassifier.classify(document); + ui->modeComboBox->addItem(tr("Document"), int(PDFObjectInspectorTreeItemModel::Document)); ui->modeComboBox->addItem(tr("Pages"), int(PDFObjectInspectorTreeItemModel::Page)); - ui->modeComboBox->addItem(tr("Images"), int(PDFObjectInspectorTreeItemModel::Image)); + + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::ContentStream)) + { + ui->modeComboBox->addItem(tr("Content streams"), int(PDFObjectInspectorTreeItemModel::ContentStream)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::GraphicState)) + { + ui->modeComboBox->addItem(tr("Graphic states"), int(PDFObjectInspectorTreeItemModel::GraphicState)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::ColorSpace)) + { + ui->modeComboBox->addItem(tr("Color spaces"), int(PDFObjectInspectorTreeItemModel::ColorSpace)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Pattern)) + { + ui->modeComboBox->addItem(tr("Patterns"), int(PDFObjectInspectorTreeItemModel::Pattern)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Shading)) + { + ui->modeComboBox->addItem(tr("Shadings"), int(PDFObjectInspectorTreeItemModel::Shading)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Image)) + { + ui->modeComboBox->addItem(tr("Images"), int(PDFObjectInspectorTreeItemModel::Image)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Form)) + { + ui->modeComboBox->addItem(tr("Forms"), int(PDFObjectInspectorTreeItemModel::Form)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Font)) + { + ui->modeComboBox->addItem(tr("Fonts"), int(PDFObjectInspectorTreeItemModel::Font)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Action)) + { + ui->modeComboBox->addItem(tr("Actions"), int(PDFObjectInspectorTreeItemModel::Action)); + } + if (m_objectClassifier.hasType(pdf::PDFObjectClassifier::Annotation)) + { + ui->modeComboBox->addItem(tr("Annotations"), int(PDFObjectInspectorTreeItemModel::Annotation)); + } + ui->modeComboBox->addItem(tr("Object List"), int(PDFObjectInspectorTreeItemModel::List)); ui->modeComboBox->setCurrentIndex(ui->modeComboBox->findData(int(PDFObjectInspectorTreeItemModel::Document))); connect(ui->modeComboBox, QOverload::of(&QComboBox::currentIndexChanged), this, &ObjectInspectorDialog::onModeChanged); - m_model = new PDFObjectInspectorTreeItemModel(this); + m_model = new PDFObjectInspectorTreeItemModel(&m_objectClassifier, this); onModeChanged(); m_model->setDocument(pdf::PDFModifiedDocument(const_cast(document), nullptr, pdf::PDFModifiedDocument::Reset)); ui->objectTreeView->setRootIsDecorated(true); ui->objectTreeView->setModel(m_model); - QSplitter* splitter = new QSplitter(this); - splitter->addWidget(ui->objectTreeView); - splitter->addWidget(ui->tabWidget); + ui->splitter->setStretchFactor(0, 0); + ui->splitter->setStretchFactor(1, 1); + ui->splitter->setCollapsible(0, true); + ui->splitter->setCollapsible(1, true); + ui->splitter->setSizes(QList() << pdf::PDFWidgetUtils::scaleDPI_x(this, 300) << pdf::PDFWidgetUtils::scaleDPI_x(this, 200)); ui->objectTreeView->setMinimumWidth(pdf::PDFWidgetUtils::scaleDPI_x(this, 200)); setMinimumSize(pdf::PDFWidgetUtils::scaleDPI(this, QSize(800, 600))); diff --git a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.h b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.h index 46d15cb..4e1b376 100644 --- a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.h +++ b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.h @@ -19,6 +19,7 @@ #define OBJECTINSPECTORDIALOG_H #include "pdfdocument.h" +#include "pdfobjectutils.h" #include @@ -44,6 +45,7 @@ private: Ui::ObjectInspectorDialog* ui; const pdf::PDFDocument* m_document; + pdf::PDFObjectClassifier m_objectClassifier; PDFObjectInspectorTreeItemModel* m_model; }; diff --git a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.ui b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.ui index a37e907..912bd42 100644 --- a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.ui +++ b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/objectinspectordialog.ui @@ -13,15 +13,47 @@ Object Inspector - - - - - false - + + + + + Qt::Horizontal + + + + Objects + + + + + + + + + false + + + + + + + + 0 + + + + Tab 1 + + + + + Tab 2 + + + - + Qt::Horizontal @@ -31,23 +63,6 @@ - - - - - - - - Tab 1 - - - - - Tab 2 - - - - diff --git a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.cpp b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.cpp index c3429da..e393c52 100644 --- a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.cpp +++ b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.cpp @@ -18,6 +18,7 @@ #include "pdfobjectinspectortreeitemmodel.h" #include "pdfdocument.h" #include "pdfvisitor.h" +#include "pdfencoding.h" #include @@ -84,8 +85,9 @@ void PDFObjectInspectorTreeItem::setObject(const pdf::PDFObject& object) m_object = object; } -PDFObjectInspectorTreeItemModel::PDFObjectInspectorTreeItemModel(QObject* parent) : - pdf::PDFTreeItemModel(parent) +PDFObjectInspectorTreeItemModel::PDFObjectInspectorTreeItemModel(const pdf::PDFObjectClassifier* classifier, QObject* parent) : + pdf::PDFTreeItemModel(parent), + m_classifier(classifier) { } @@ -100,6 +102,8 @@ QVariant PDFObjectInspectorTreeItemModel::headerData(int section, Qt::Orientatio int PDFObjectInspectorTreeItemModel::columnCount(const QModelIndex& parent) const { + Q_UNUSED(parent); + return 1; } @@ -119,7 +123,7 @@ QVariant PDFObjectInspectorTreeItemModel::data(const QModelIndex& index, int rol const PDFObjectInspectorTreeItem* parent = static_cast(index.parent().internalPointer()); QStringList data; - if (item->getReference().isValid() && parent && !parent->getReference().isValid()) + if (item->getReference().isValid() && (!parent || (parent && !parent->getReference().isValid()))) { data << QString("%1 %2 R").arg(item->getReference().objectNumber).arg(item->getReference().generation); } @@ -152,7 +156,7 @@ QVariant PDFObjectInspectorTreeItemModel::data(const QModelIndex& index, int rol break; case pdf::PDFObject::Type::String: - data << QString("\"%1\"").arg(QString::fromLatin1(object.getString().toPercentEncoding())); + data << QString("\"%1\"").arg(pdf::PDFEncoding::convertSmartFromByteStringToRepresentableQString(object.getString())); break; case pdf::PDFObject::Type::Name: @@ -168,7 +172,7 @@ QVariant PDFObjectInspectorTreeItemModel::data(const QModelIndex& index, int rol break; case pdf::PDFObject::Type::Stream: - data << tr("Stream [%1 items, %2 data bytes]").arg(locale.toString(object.getStream()->getDictionary()->getCount())).arg(locale.toString(object.getStream()->getContent()->size())); + data << tr("Stream [%1 items, %2 data bytes]").arg(locale.toString(object.getStream()->getDictionary()->getCount()), locale.toString(object.getStream()->getContent()->size())); break; case pdf::PDFObject::Type::Reference: @@ -193,6 +197,17 @@ void PDFObjectInspectorTreeItemModel::update() { std::set usedReferences; + auto createObjectsFromClassifier = [this, &usedReferences](pdf::PDFObjectClassifier::Type type) + { + m_rootItem.reset(new PDFObjectInspectorTreeItem()); + + for (pdf::PDFObjectReference reference : m_classifier->getObjectsByType(type)) + { + pdf::PDFObject object = m_document->getStorage().getObjectByReference(reference); + createObjectItem(getRootItem(), reference, object, true, usedReferences); + } + }; + switch (m_mode) { case pdfplugin::PDFObjectInspectorTreeItemModel::Document: @@ -221,7 +236,44 @@ void PDFObjectInspectorTreeItemModel::update() break; } - case pdfplugin::PDFObjectInspectorTreeItemModel::Image: + case ContentStream: + createObjectsFromClassifier(pdf::PDFObjectClassifier::ContentStream); + break; + + case GraphicState: + createObjectsFromClassifier(pdf::PDFObjectClassifier::GraphicState); + break; + + case ColorSpace: + createObjectsFromClassifier(pdf::PDFObjectClassifier::ColorSpace); + break; + + case Pattern: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Pattern); + break; + + case Shading: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Shading); + break; + + case Image: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Image); + break; + + case Form: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Form); + break; + + case Font: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Font); + break; + + case Action: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Action); + break; + + case Annotation: + createObjectsFromClassifier(pdf::PDFObjectClassifier::Annotation); break; case pdfplugin::PDFObjectInspectorTreeItemModel::List: @@ -392,7 +444,7 @@ void PDFCreateObjectInspectorTreeItemFromObjectVisitor::visitReference(const pdf { Q_ASSERT(m_usedReferences); - if (!m_usedReferences->count(reference)) + if (m_usedReferences->count(reference)) { // Reference already followed return; diff --git a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.h b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.h index 383d6d1..d4023f1 100644 --- a/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.h +++ b/Pdf4QtViewerPlugins/ObjectInspectorPlugin/pdfobjectinspectortreeitemmodel.h @@ -19,6 +19,7 @@ #define PDFOBJECTINSPECTORTREEITEMMODEL_H #include "pdfitemmodels.h" +#include "pdfobjectutils.h" #include @@ -37,11 +38,20 @@ public: { Document, Page, + ContentStream, + GraphicState, + ColorSpace, + Pattern, + Shading, Image, + Form, + Font, + Action, + Annotation, List }; - explicit PDFObjectInspectorTreeItemModel(QObject* parent); + explicit PDFObjectInspectorTreeItemModel(const pdf::PDFObjectClassifier* classifier, QObject* parent); virtual QVariant headerData(int section, Qt::Orientation orientation, int role) const override; virtual int columnCount(const QModelIndex& parent) const override; @@ -59,6 +69,7 @@ private: PDFObjectInspectorTreeItem* getRootItem(); + const pdf::PDFObjectClassifier* m_classifier; Mode m_mode = List; };