mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Structure tree information tool
This commit is contained in:
299
PdfTool/pdftoolinfostructuretree.cpp
Normal file
299
PdfTool/pdftoolinfostructuretree.cpp
Normal file
@ -0,0 +1,299 @@
|
||||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdftoolinfostructuretree.h"
|
||||
#include "pdfstructuretree.h"
|
||||
#include "pdfencoding.h"
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
class PDFStructureTreePrintVisitor : public pdf::PDFStructureTreeAbstractVisitor
|
||||
{
|
||||
public:
|
||||
explicit PDFStructureTreePrintVisitor(const pdf::PDFDocument* document,
|
||||
const pdf::PDFStructureTree* tree,
|
||||
PDFOutputFormatter* formatter) :
|
||||
m_document(document),
|
||||
m_tree(tree),
|
||||
m_formatter(formatter)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
virtual void visitStructureTree(const pdf::PDFStructureTree* structureTree) override;
|
||||
virtual void visitStructureElement(const pdf::PDFStructureElement* structureElement) override;
|
||||
virtual void visitStructureMarkedContentReference(const pdf::PDFStructureMarkedContentReference* structureMarkedContentReference) override;
|
||||
virtual void visitStructureObjectReference(const pdf::PDFStructureObjectReference* structureObjectReference) override;
|
||||
|
||||
private:
|
||||
const pdf::PDFDocument* m_document;
|
||||
const pdf::PDFStructureTree* m_tree;
|
||||
PDFOutputFormatter* m_formatter;
|
||||
QLocale m_locale;
|
||||
};
|
||||
|
||||
void PDFStructureTreePrintVisitor::visitStructureTree(const pdf::PDFStructureTree* structureTree)
|
||||
{
|
||||
m_formatter->beginHeader("tree", PDFToolTranslationContext::tr("Structure Tree"));
|
||||
acceptChildren(structureTree);
|
||||
m_formatter->endHeader();
|
||||
}
|
||||
|
||||
void PDFStructureTreePrintVisitor::visitStructureElement(const pdf::PDFStructureElement* structureElement)
|
||||
{
|
||||
pdf::PDFInteger pageIndex = m_document->getCatalog()->getPageIndexFromPageReference(structureElement->getPageReference());
|
||||
m_formatter->beginHeader("element", QString::fromLatin1(structureElement->getTypeName()), pageIndex);
|
||||
|
||||
const std::vector<pdf::PDFStructureTreeAttribute>& attributes = structureElement->getAttributes();
|
||||
if (!attributes.empty())
|
||||
{
|
||||
m_formatter->beginTable("attributes", PDFToolTranslationContext::tr("Attributes"));
|
||||
|
||||
m_formatter->beginTableHeaderRow("header");
|
||||
m_formatter->writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No"));
|
||||
m_formatter->writeTableHeaderColumn("type", PDFToolTranslationContext::tr("Type"));
|
||||
m_formatter->writeTableHeaderColumn("owner", PDFToolTranslationContext::tr("Owner"));
|
||||
m_formatter->writeTableHeaderColumn("revision", PDFToolTranslationContext::tr("Revision"));
|
||||
m_formatter->writeTableHeaderColumn("hidden", PDFToolTranslationContext::tr("Hidden"));
|
||||
m_formatter->writeTableHeaderColumn("value", PDFToolTranslationContext::tr("Value"));
|
||||
m_formatter->endTableHeaderRow();
|
||||
|
||||
int ref = 0;
|
||||
for (const pdf::PDFStructureTreeAttribute& attribute : attributes)
|
||||
{
|
||||
m_formatter->beginTableRow("attribute", ref);
|
||||
|
||||
m_formatter->writeTableColumn("no", m_locale.toString(ref + 1), Qt::AlignRight);
|
||||
m_formatter->writeTableColumn("type", attribute.getTypeName(&m_document->getStorage()));
|
||||
m_formatter->writeTableColumn("owner", attribute.getOwnerName());
|
||||
|
||||
if (attribute.getRevision() > 0)
|
||||
{
|
||||
m_formatter->writeTableColumn("revision", m_locale.toString(attribute.getRevision()));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_formatter->writeTableColumn("revision", QString());
|
||||
}
|
||||
|
||||
if (attribute.isUser())
|
||||
{
|
||||
m_formatter->writeTableColumn("hidden", attribute.getUserPropertyIsHidden(&m_document->getStorage()) ? PDFToolTranslationContext::tr("Yes") : PDFToolTranslationContext::tr("No"));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_formatter->writeTableColumn("hidden", QString());
|
||||
}
|
||||
|
||||
QString value;
|
||||
pdf::PDFObject valueObject = attribute.getValue();
|
||||
if (attribute.isUser())
|
||||
{
|
||||
value = attribute.getUserPropertyFormattedValue(&m_document->getStorage());
|
||||
valueObject = attribute.getUserPropertyValue(&m_document->getStorage());
|
||||
}
|
||||
valueObject = m_document->getObject(valueObject);
|
||||
|
||||
if (value.isEmpty())
|
||||
{
|
||||
switch (valueObject.getType())
|
||||
{
|
||||
case pdf::PDFObject::Type::Null:
|
||||
value = PDFToolTranslationContext::tr("[null]");
|
||||
break;
|
||||
|
||||
case pdf::PDFObject::Type::Bool:
|
||||
value = valueObject.getBool() ? PDFToolTranslationContext::tr("Yes") : PDFToolTranslationContext::tr("No");
|
||||
break;
|
||||
|
||||
case pdf::PDFObject::Type::Int:
|
||||
value = m_locale.toString(valueObject.getInteger());
|
||||
break;
|
||||
|
||||
case pdf::PDFObject::Type::Real:
|
||||
value = m_locale.toString(valueObject.getReal());
|
||||
break;
|
||||
|
||||
case pdf::PDFObject::Type::String:
|
||||
case pdf::PDFObject::Type::Name:
|
||||
value = pdf::PDFEncoding::convertSmartFromByteStringToUnicode(valueObject.getString(), nullptr);
|
||||
break;
|
||||
|
||||
case pdf::PDFObject::Type::Array:
|
||||
case pdf::PDFObject::Type::Dictionary:
|
||||
case pdf::PDFObject::Type::Stream:
|
||||
case pdf::PDFObject::Type::Reference:
|
||||
value = PDFToolTranslationContext::tr("[complex type]");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
m_formatter->writeTableColumn("value", value);
|
||||
|
||||
m_formatter->endTableRow();
|
||||
|
||||
++ref;
|
||||
}
|
||||
|
||||
m_formatter->endTable();
|
||||
}
|
||||
|
||||
bool hasText = false;
|
||||
std::array<QString, pdf::PDFStructureElement::LastStringValue> stringValues;
|
||||
for (int i = 0; i < pdf::PDFStructureElement::LastStringValue; ++i)
|
||||
{
|
||||
stringValues[i] = structureElement->getText(static_cast<pdf::PDFStructureElement::StringValue>(i));
|
||||
hasText = hasText || !stringValues[i].isEmpty();
|
||||
}
|
||||
|
||||
if (hasText)
|
||||
{
|
||||
m_formatter->beginTable("properties", PDFToolTranslationContext::tr("Properties"));
|
||||
|
||||
m_formatter->beginTableHeaderRow("header");
|
||||
m_formatter->writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No"));
|
||||
m_formatter->writeTableHeaderColumn("property", PDFToolTranslationContext::tr("Property"));
|
||||
m_formatter->writeTableHeaderColumn("value", PDFToolTranslationContext::tr("Value"));
|
||||
m_formatter->endTableHeaderRow();
|
||||
|
||||
int ref = 1;
|
||||
for (int i = 0; i < pdf::PDFStructureElement::LastStringValue; ++i)
|
||||
{
|
||||
if (stringValues[i].isEmpty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
QString propertyName;
|
||||
switch (i)
|
||||
{
|
||||
case pdf::PDFStructureElement::Title:
|
||||
propertyName = PDFToolTranslationContext::tr("Title");
|
||||
break;
|
||||
|
||||
case pdf::PDFStructureElement::Language:
|
||||
propertyName = PDFToolTranslationContext::tr("Language");
|
||||
break;
|
||||
|
||||
case pdf::PDFStructureElement::AlternativeDescription:
|
||||
propertyName = PDFToolTranslationContext::tr("Alternative description");
|
||||
break;
|
||||
|
||||
case pdf::PDFStructureElement::ExpandedForm:
|
||||
propertyName = PDFToolTranslationContext::tr("Expanded form");
|
||||
break;
|
||||
|
||||
case pdf::PDFStructureElement::ActualText:
|
||||
propertyName = PDFToolTranslationContext::tr("Actual text");
|
||||
break;
|
||||
|
||||
case pdf::PDFStructureElement::Phoneme:
|
||||
propertyName = PDFToolTranslationContext::tr("Phoneme");
|
||||
break;
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
m_formatter->beginTableRow("property", i);
|
||||
m_formatter->writeTableColumn("no", m_locale.toString(ref++), Qt::AlignRight);
|
||||
m_formatter->writeTableColumn("property", propertyName);
|
||||
m_formatter->writeTableColumn("value", stringValues[i]);
|
||||
m_formatter->endTableRow();
|
||||
}
|
||||
|
||||
m_formatter->endTable();
|
||||
}
|
||||
|
||||
acceptChildren(structureElement);
|
||||
m_formatter->endHeader();
|
||||
}
|
||||
|
||||
void PDFStructureTreePrintVisitor::visitStructureMarkedContentReference(const pdf::PDFStructureMarkedContentReference* structureMarkedContentReference)
|
||||
{
|
||||
const pdf::PDFInteger reference = structureMarkedContentReference->getMarkedContentIdentifier();
|
||||
m_formatter->writeText("marked-content-reference", PDFToolTranslationContext::tr("Marked Content Reference %1").arg(reference), reference);
|
||||
}
|
||||
|
||||
void PDFStructureTreePrintVisitor::visitStructureObjectReference(const pdf::PDFStructureObjectReference* structureObjectReference)
|
||||
{
|
||||
const pdf::PDFObjectReference reference = structureObjectReference->getObjectReference();
|
||||
m_formatter->writeText("structure-object-reference", PDFToolTranslationContext::tr("Structure Object Reference [%1 %2 R]").arg(reference.objectNumber).arg(reference.generation), reference.objectNumber);
|
||||
}
|
||||
|
||||
static PDFToolInfoStructureTreeApplication s_infoStructureTreeApplication;
|
||||
|
||||
QString PDFToolInfoStructureTreeApplication::getStandardString(StandardString standardString) const
|
||||
{
|
||||
switch (standardString)
|
||||
{
|
||||
case Command:
|
||||
return "info-struct-tree";
|
||||
|
||||
case Name:
|
||||
return PDFToolTranslationContext::tr("Info (Structure tree)");
|
||||
|
||||
case Description:
|
||||
return PDFToolTranslationContext::tr("Examine structure tree in tagged document.");
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return QString();
|
||||
}
|
||||
|
||||
int PDFToolInfoStructureTreeApplication::execute(const PDFToolOptions& options)
|
||||
{
|
||||
pdf::PDFDocument document;
|
||||
QByteArray sourceData;
|
||||
if (!readDocument(options, document, &sourceData))
|
||||
{
|
||||
return ErrorDocumentReading;
|
||||
}
|
||||
|
||||
pdf::PDFStructureTree structureTree = pdf::PDFStructureTree::parse(&document.getStorage(), document.getCatalog()->getStructureTreeRoot());
|
||||
if (structureTree.isValid())
|
||||
{
|
||||
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
|
||||
formatter.beginDocument("info-structure-tree", PDFToolTranslationContext::tr("Structure tree in document %1").arg(options.document));
|
||||
|
||||
PDFStructureTreePrintVisitor visitor(&document, &structureTree, &formatter);
|
||||
structureTree.accept(&visitor);
|
||||
|
||||
formatter.endDocument();
|
||||
PDFConsole::writeText(formatter.getString(), options.outputCodec);
|
||||
}
|
||||
else
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("No structure tree found in document."), options.outputCodec);
|
||||
}
|
||||
|
||||
return ExitSuccess;
|
||||
}
|
||||
|
||||
PDFToolAbstractApplication::Options PDFToolInfoStructureTreeApplication::getOptionsFlags() const
|
||||
{
|
||||
return ConsoleFormat | OpenDocument;
|
||||
}
|
||||
|
||||
} // namespace pdftool
|
Reference in New Issue
Block a user