Structure tree information tool

This commit is contained in:
Jakub Melka 2020-10-10 14:45:14 +02:00
parent e735c66d5c
commit b0831a84a7
7 changed files with 458 additions and 9 deletions

View File

@ -56,6 +56,10 @@ struct PDFStructureTreeAttributeDefinition
/// \param string String
static PDFStructureTreeAttribute::Owner getOwnerFromString(const QByteArray& string);
/// Returns string from owner. If owner is not valid, then invalid string is returned.
/// \param owner Owner
static QString getOwnerName(PDFStructureTreeAttribute::Owner owner);
PDFStructureTreeAttribute::Attribute type = PDFStructureTreeAttribute::Attribute::User;
const char* name = nullptr;
bool inheritable = false;
@ -248,6 +252,19 @@ PDFStructureTreeAttribute::Owner PDFStructureTreeAttributeDefinition::getOwnerFr
return PDFStructureTreeAttribute::Owner::Invalid;
}
QString PDFStructureTreeAttributeDefinition::getOwnerName(PDFStructureTreeAttribute::Owner owner)
{
for (const auto& item : s_ownerDefinitions)
{
if (owner == item.second)
{
return QString::fromLatin1(item.first);
}
}
return QString();
}
PDFStructureTreeAttribute::PDFStructureTreeAttribute() :
m_definition(&s_attributeDefinitions.front()),
m_owner(Owner::Invalid),
@ -278,6 +295,22 @@ PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const
return m_definition->type;
}
QString PDFStructureTreeAttribute::getTypeName(const PDFObjectStorage* storage) const
{
if (isUser())
{
return getUserPropertyName(storage);
}
Q_ASSERT(m_definition);
return QString::fromLatin1(m_definition->name);
}
QString PDFStructureTreeAttribute::getOwnerName() const
{
return PDFStructureTreeAttributeDefinition::getOwnerName(getOwner());
}
bool PDFStructureTreeAttribute::isInheritable() const
{
Q_ASSERT(m_definition);
@ -873,4 +906,33 @@ PDFStructureItemPointer PDFStructureObjectReference::parseObjectReference(const
return pointer;
}
void PDFStructureTreeAbstractVisitor::visitStructureTree(const PDFStructureTree* structureTree)
{
acceptChildren(structureTree);
}
void PDFStructureTreeAbstractVisitor::visitStructureElement(const PDFStructureElement* structureElement)
{
acceptChildren(structureElement);
}
void PDFStructureTreeAbstractVisitor::visitStructureMarkedContentReference(const PDFStructureMarkedContentReference* structureMarkedContentReference)
{
acceptChildren(structureMarkedContentReference);
}
void PDFStructureTreeAbstractVisitor::visitStructureObjectReference(const PDFStructureObjectReference* structureObjectReference)
{
acceptChildren(structureObjectReference);
}
void PDFStructureTreeAbstractVisitor::acceptChildren(const PDFStructureItem* item)
{
const size_t childCount = item->getChildCount();
for (size_t i = 0; i < childCount; ++i)
{
item->getChild(i)->accept(this);
}
}
} // namespace pdf

View File

@ -28,6 +28,27 @@ namespace pdf
class PDFObjectStorage;
struct PDFStructureTreeAttributeDefinition;
class PDFStructureItem;
class PDFStructureTree;
class PDFStructureElement;
class PDFStructureMarkedContentReference;
class PDFStructureObjectReference;
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAbstractVisitor
{
public:
inline PDFStructureTreeAbstractVisitor() = default;
virtual ~PDFStructureTreeAbstractVisitor() = default;
virtual void visitStructureTree(const PDFStructureTree* structureTree);
virtual void visitStructureElement(const PDFStructureElement* structureElement);
virtual void visitStructureMarkedContentReference(const PDFStructureMarkedContentReference* structureMarkedContentReference);
virtual void visitStructureObjectReference(const PDFStructureObjectReference* structureObjectReference);
protected:
void acceptChildren(const PDFStructureItem* item);
};
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute
{
public:
@ -137,9 +158,15 @@ public:
/// Returns attribute type
Attribute getType() const;
/// Returns attribute type name
QString getTypeName(const PDFObjectStorage* storage) const;
/// Returns attribute owner
Owner getOwner() const { return m_owner; }
/// Returns owner name
QString getOwnerName() const;
/// Returns true, if attribute is inheritable
bool isInheritable() const;
@ -159,13 +186,16 @@ public:
/// cannot be determined, empty object is returned.
PDFObject getDefaultValue() const;
/// Returns true, if attribute is user defined
bool isUser() const { return getType() == Attribute::User; }
/// Returns user property name. This function should be called only for
/// user properties. If error occurs, then empty string is returned.
/// \param storage Storage (for resolving of indirect objects)
QString getUserPropertyName(const PDFObjectStorage* storage) const;
/// Returns user property value. This function should be called only for
/// user properties. If error occurs, then empty string is returned.
/// user properties. If error occurs, then empty object is returned.
/// \param storage Storage (for resolving of indirect objects)
PDFObject getUserPropertyValue(const PDFObjectStorage* storage) const;
@ -220,7 +250,7 @@ class PDFStructureMarkedContentReference;
using PDFStructureItemPointer = QSharedPointer<PDFStructureItem>;
/// Root class for all structure tree items
class PDFStructureItem
class PDFFORQTLIBSHARED_EXPORT PDFStructureItem
{
public:
explicit inline PDFStructureItem(PDFStructureItem* parent, PDFStructureTree* root) :
@ -273,6 +303,8 @@ public:
virtual PDFStructureObjectReference* asStructureObjectReference() { return nullptr; }
virtual const PDFStructureObjectReference* asStructureObjectReference() const { return nullptr; }
virtual void accept(PDFStructureTreeAbstractVisitor* visitor) const = 0;
const PDFStructureItem* getParent() const { return m_parent; }
PDFStructureItem* getParent() { return m_parent; }
const PDFStructureTree* getTree() const { return m_root; }
@ -312,7 +344,7 @@ protected:
};
/// Structure tree namespace
class PDFStructureTreeNamespace
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeNamespace
{
public:
explicit inline PDFStructureTreeNamespace() = default;
@ -334,7 +366,7 @@ private:
using PDFStructureTreeNamespaces = std::vector<PDFStructureTreeNamespace>;
/// Structure tree, contains structure element hierarchy
class PDFStructureTree : public PDFStructureItem
class PDFFORQTLIBSHARED_EXPORT PDFStructureTree : public PDFStructureItem
{
public:
explicit inline PDFStructureTree() : PDFStructureItem(nullptr, this) { }
@ -342,6 +374,8 @@ public:
virtual PDFStructureTree* asStructureTree() override { return this; }
virtual const PDFStructureTree* asStructureTree() const override { return this; }
virtual void accept(PDFStructureTreeAbstractVisitor* visitor) const override { visitor->visitStructureTree(this); }
/// Returns parents from parent tree for given entry. If entry
/// is not found, then empty vector is returned.
/// \param id Id
@ -366,6 +400,9 @@ public:
/// Returns a list of associated files
const std::vector<PDFFileSpecification>& getAssociatedFiles() const { return m_associatedFiles; }
/// Returns true, if structure tree is valid
bool isValid() const { return getChildCount() > 0; }
/// Parses structure tree from the object. If error occurs, empty structure
/// tree is returned.
/// \param storage Storage
@ -397,7 +434,7 @@ private:
};
/// Structure element
class PDFStructureElement : public PDFStructureItem
class PDFFORQTLIBSHARED_EXPORT PDFStructureElement : public PDFStructureItem
{
public:
explicit inline PDFStructureElement(PDFStructureItem* parent, PDFStructureTree* root) :
@ -419,6 +456,7 @@ public:
virtual PDFStructureElement* asStructureElement() override { return this; }
virtual const PDFStructureElement* asStructureElement() const override { return this; }
virtual void accept(PDFStructureTreeAbstractVisitor* visitor) const override { visitor->visitStructureElement(this); }
const QByteArray& getTypeName() const { return m_typeName; }
Type getStandardType() const { return m_standardType; }
@ -488,7 +526,7 @@ private:
};
/// Structure marked content reference
class PDFStructureMarkedContentReference : public PDFStructureItem
class PDFFORQTLIBSHARED_EXPORT PDFStructureMarkedContentReference : public PDFStructureItem
{
public:
explicit inline PDFStructureMarkedContentReference(PDFStructureItem* parent, PDFStructureTree* root) :
@ -499,6 +537,7 @@ public:
virtual PDFStructureMarkedContentReference* asStructureMarkedContentReference() override { return this; }
virtual const PDFStructureMarkedContentReference* asStructureMarkedContentReference() const override { return this; }
virtual void accept(PDFStructureTreeAbstractVisitor* visitor) const override { visitor->visitStructureMarkedContentReference(this); }
const PDFObjectReference& getPageReference() const { return m_pageReference; }
const PDFObjectReference& getContentStreamReference() const { return m_contentStreamReference; }
@ -525,7 +564,7 @@ private:
};
/// Structure object reference
class PDFStructureObjectReference : public PDFStructureItem
class PDFFORQTLIBSHARED_EXPORT PDFStructureObjectReference : public PDFStructureItem
{
public:
explicit inline PDFStructureObjectReference(PDFStructureItem* parent, PDFStructureTree* root) :
@ -536,6 +575,7 @@ public:
virtual PDFStructureObjectReference* asStructureObjectReference() override { return this; }
virtual const PDFStructureObjectReference* asStructureObjectReference() const override { return this; }
virtual void accept(PDFStructureTreeAbstractVisitor* visitor) const override { visitor->visitStructureObjectReference(this); }
const PDFObjectReference& getPageReference() const { return m_pageReference; }
const PDFObjectReference& getObjectReference() const { return m_objectReference; }

View File

@ -48,6 +48,7 @@ SOURCES += \
pdftoolinfometadata.cpp \
pdftoolinfonameddestinations.cpp \
pdftoolinfopageboxes.cpp \
pdftoolinfostructuretree.cpp \
pdftoolverifysignatures.cpp \
pdftoolxml.cpp
@ -69,5 +70,6 @@ HEADERS += \
pdftoolinfometadata.h \
pdftoolinfonameddestinations.h \
pdftoolinfopageboxes.h \
pdftoolinfostructuretree.h \
pdftoolverifysignatures.h \
pdftoolxml.h

View File

@ -530,7 +530,17 @@ void PDFXmlOutputFormatterImpl::beginElement(PDFOutputFormatter::Element type, Q
case PDFOutputFormatter::Element::TableColumn:
case PDFOutputFormatter::Element::TableHeaderColumn:
{
m_streamWriter.writeTextElement(m_namespace, name, description);
if (reference != 0)
{
m_streamWriter.writeStartElement(m_namespace, name);
m_streamWriter.writeAttribute(m_namespace, "ref", QString::number(reference));
m_streamWriter.writeCharacters(description);
m_streamWriter.writeEndElement();
}
else
{
m_streamWriter.writeTextElement(m_namespace, name, description);
}
break;
}

View File

@ -78,7 +78,7 @@ public:
inline void endTableRow() { endElement(); }
inline void writeTableHeaderColumn(QString name, QString description, Qt::Alignment alignment = Qt::AlignLeft) { beginElement(Element::TableHeaderColumn, name, description, alignment); endElement(); }
inline void writeTableColumn(QString name, QString description, Qt::Alignment alignment = Qt::AlignLeft) { beginElement(Element::TableColumn, name, description, alignment); endElement(); }
inline void writeText(QString name, QString description) { beginElement(Element::Text, name, description); endElement(); }
inline void writeText(QString name, QString description, int reference = 0) { beginElement(Element::Text, name, description, Qt::AlignLeft, reference); endElement(); }
inline void beginHeader(QString name, QString description, int reference = 0) { beginElement(Element::Header, name, description, Qt::AlignLeft, reference); }
inline void endHeader() { endElement(); }

View File

@ -0,0 +1,299 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdftoolinfostructuretree.h"
#include "pdfstructuretree.h"
#include "pdfencoding.h"
namespace pdftool
{
class PDFStructureTreePrintVisitor : public pdf::PDFStructureTreeAbstractVisitor
{
public:
explicit PDFStructureTreePrintVisitor(const pdf::PDFDocument* document,
const pdf::PDFStructureTree* tree,
PDFOutputFormatter* formatter) :
m_document(document),
m_tree(tree),
m_formatter(formatter)
{
}
virtual void visitStructureTree(const pdf::PDFStructureTree* structureTree) override;
virtual void visitStructureElement(const pdf::PDFStructureElement* structureElement) override;
virtual void visitStructureMarkedContentReference(const pdf::PDFStructureMarkedContentReference* structureMarkedContentReference) override;
virtual void visitStructureObjectReference(const pdf::PDFStructureObjectReference* structureObjectReference) override;
private:
const pdf::PDFDocument* m_document;
const pdf::PDFStructureTree* m_tree;
PDFOutputFormatter* m_formatter;
QLocale m_locale;
};
void PDFStructureTreePrintVisitor::visitStructureTree(const pdf::PDFStructureTree* structureTree)
{
m_formatter->beginHeader("tree", PDFToolTranslationContext::tr("Structure Tree"));
acceptChildren(structureTree);
m_formatter->endHeader();
}
void PDFStructureTreePrintVisitor::visitStructureElement(const pdf::PDFStructureElement* structureElement)
{
pdf::PDFInteger pageIndex = m_document->getCatalog()->getPageIndexFromPageReference(structureElement->getPageReference());
m_formatter->beginHeader("element", QString::fromLatin1(structureElement->getTypeName()), pageIndex);
const std::vector<pdf::PDFStructureTreeAttribute>& attributes = structureElement->getAttributes();
if (!attributes.empty())
{
m_formatter->beginTable("attributes", PDFToolTranslationContext::tr("Attributes"));
m_formatter->beginTableHeaderRow("header");
m_formatter->writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No"));
m_formatter->writeTableHeaderColumn("type", PDFToolTranslationContext::tr("Type"));
m_formatter->writeTableHeaderColumn("owner", PDFToolTranslationContext::tr("Owner"));
m_formatter->writeTableHeaderColumn("revision", PDFToolTranslationContext::tr("Revision"));
m_formatter->writeTableHeaderColumn("hidden", PDFToolTranslationContext::tr("Hidden"));
m_formatter->writeTableHeaderColumn("value", PDFToolTranslationContext::tr("Value"));
m_formatter->endTableHeaderRow();
int ref = 0;
for (const pdf::PDFStructureTreeAttribute& attribute : attributes)
{
m_formatter->beginTableRow("attribute", ref);
m_formatter->writeTableColumn("no", m_locale.toString(ref + 1), Qt::AlignRight);
m_formatter->writeTableColumn("type", attribute.getTypeName(&m_document->getStorage()));
m_formatter->writeTableColumn("owner", attribute.getOwnerName());
if (attribute.getRevision() > 0)
{
m_formatter->writeTableColumn("revision", m_locale.toString(attribute.getRevision()));
}
else
{
m_formatter->writeTableColumn("revision", QString());
}
if (attribute.isUser())
{
m_formatter->writeTableColumn("hidden", attribute.getUserPropertyIsHidden(&m_document->getStorage()) ? PDFToolTranslationContext::tr("Yes") : PDFToolTranslationContext::tr("No"));
}
else
{
m_formatter->writeTableColumn("hidden", QString());
}
QString value;
pdf::PDFObject valueObject = attribute.getValue();
if (attribute.isUser())
{
value = attribute.getUserPropertyFormattedValue(&m_document->getStorage());
valueObject = attribute.getUserPropertyValue(&m_document->getStorage());
}
valueObject = m_document->getObject(valueObject);
if (value.isEmpty())
{
switch (valueObject.getType())
{
case pdf::PDFObject::Type::Null:
value = PDFToolTranslationContext::tr("[null]");
break;
case pdf::PDFObject::Type::Bool:
value = valueObject.getBool() ? PDFToolTranslationContext::tr("Yes") : PDFToolTranslationContext::tr("No");
break;
case pdf::PDFObject::Type::Int:
value = m_locale.toString(valueObject.getInteger());
break;
case pdf::PDFObject::Type::Real:
value = m_locale.toString(valueObject.getReal());
break;
case pdf::PDFObject::Type::String:
case pdf::PDFObject::Type::Name:
value = pdf::PDFEncoding::convertSmartFromByteStringToUnicode(valueObject.getString(), nullptr);
break;
case pdf::PDFObject::Type::Array:
case pdf::PDFObject::Type::Dictionary:
case pdf::PDFObject::Type::Stream:
case pdf::PDFObject::Type::Reference:
value = PDFToolTranslationContext::tr("[complex type]");
break;
default:
break;
}
}
m_formatter->writeTableColumn("value", value);
m_formatter->endTableRow();
++ref;
}
m_formatter->endTable();
}
bool hasText = false;
std::array<QString, pdf::PDFStructureElement::LastStringValue> stringValues;
for (int i = 0; i < pdf::PDFStructureElement::LastStringValue; ++i)
{
stringValues[i] = structureElement->getText(static_cast<pdf::PDFStructureElement::StringValue>(i));
hasText = hasText || !stringValues[i].isEmpty();
}
if (hasText)
{
m_formatter->beginTable("properties", PDFToolTranslationContext::tr("Properties"));
m_formatter->beginTableHeaderRow("header");
m_formatter->writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No"));
m_formatter->writeTableHeaderColumn("property", PDFToolTranslationContext::tr("Property"));
m_formatter->writeTableHeaderColumn("value", PDFToolTranslationContext::tr("Value"));
m_formatter->endTableHeaderRow();
int ref = 1;
for (int i = 0; i < pdf::PDFStructureElement::LastStringValue; ++i)
{
if (stringValues[i].isEmpty())
{
continue;
}
QString propertyName;
switch (i)
{
case pdf::PDFStructureElement::Title:
propertyName = PDFToolTranslationContext::tr("Title");
break;
case pdf::PDFStructureElement::Language:
propertyName = PDFToolTranslationContext::tr("Language");
break;
case pdf::PDFStructureElement::AlternativeDescription:
propertyName = PDFToolTranslationContext::tr("Alternative description");
break;
case pdf::PDFStructureElement::ExpandedForm:
propertyName = PDFToolTranslationContext::tr("Expanded form");
break;
case pdf::PDFStructureElement::ActualText:
propertyName = PDFToolTranslationContext::tr("Actual text");
break;
case pdf::PDFStructureElement::Phoneme:
propertyName = PDFToolTranslationContext::tr("Phoneme");
break;
default:
Q_ASSERT(false);
break;
}
m_formatter->beginTableRow("property", i);
m_formatter->writeTableColumn("no", m_locale.toString(ref++), Qt::AlignRight);
m_formatter->writeTableColumn("property", propertyName);
m_formatter->writeTableColumn("value", stringValues[i]);
m_formatter->endTableRow();
}
m_formatter->endTable();
}
acceptChildren(structureElement);
m_formatter->endHeader();
}
void PDFStructureTreePrintVisitor::visitStructureMarkedContentReference(const pdf::PDFStructureMarkedContentReference* structureMarkedContentReference)
{
const pdf::PDFInteger reference = structureMarkedContentReference->getMarkedContentIdentifier();
m_formatter->writeText("marked-content-reference", PDFToolTranslationContext::tr("Marked Content Reference %1").arg(reference), reference);
}
void PDFStructureTreePrintVisitor::visitStructureObjectReference(const pdf::PDFStructureObjectReference* structureObjectReference)
{
const pdf::PDFObjectReference reference = structureObjectReference->getObjectReference();
m_formatter->writeText("structure-object-reference", PDFToolTranslationContext::tr("Structure Object Reference [%1 %2 R]").arg(reference.objectNumber).arg(reference.generation), reference.objectNumber);
}
static PDFToolInfoStructureTreeApplication s_infoStructureTreeApplication;
QString PDFToolInfoStructureTreeApplication::getStandardString(StandardString standardString) const
{
switch (standardString)
{
case Command:
return "info-struct-tree";
case Name:
return PDFToolTranslationContext::tr("Info (Structure tree)");
case Description:
return PDFToolTranslationContext::tr("Examine structure tree in tagged document.");
default:
Q_ASSERT(false);
break;
}
return QString();
}
int PDFToolInfoStructureTreeApplication::execute(const PDFToolOptions& options)
{
pdf::PDFDocument document;
QByteArray sourceData;
if (!readDocument(options, document, &sourceData))
{
return ErrorDocumentReading;
}
pdf::PDFStructureTree structureTree = pdf::PDFStructureTree::parse(&document.getStorage(), document.getCatalog()->getStructureTreeRoot());
if (structureTree.isValid())
{
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
formatter.beginDocument("info-structure-tree", PDFToolTranslationContext::tr("Structure tree in document %1").arg(options.document));
PDFStructureTreePrintVisitor visitor(&document, &structureTree, &formatter);
structureTree.accept(&visitor);
formatter.endDocument();
PDFConsole::writeText(formatter.getString(), options.outputCodec);
}
else
{
PDFConsole::writeError(PDFToolTranslationContext::tr("No structure tree found in document."), options.outputCodec);
}
return ExitSuccess;
}
PDFToolAbstractApplication::Options PDFToolInfoStructureTreeApplication::getOptionsFlags() const
{
return ConsoleFormat | OpenDocument;
}
} // namespace pdftool

View File

@ -0,0 +1,36 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFTOOLINFOSTRUCTURETREE_H
#define PDFTOOLINFOSTRUCTURETREE_H
#include "pdftoolabstractapplication.h"
namespace pdftool
{
class PDFToolInfoStructureTreeApplication : public PDFToolAbstractApplication
{
public:
virtual QString getStandardString(StandardString standardString) const override;
virtual int execute(const PDFToolOptions& options) override;
virtual Options getOptionsFlags() const override;
};
} // namespace pdftool
#endif // PDFTOOLINFOSTRUCTURETREE_H