mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-03-15 10:50:18 +01:00
Structure tree parsing
This commit is contained in:
parent
0a62ad618d
commit
2daf7bccf4
@ -43,9 +43,12 @@ struct PDFStructureTreeAttributeDefinition
|
|||||||
/// always returns valid pointer. For uknown attribute, it returns
|
/// always returns valid pointer. For uknown attribute, it returns
|
||||||
/// user attribute definition.
|
/// user attribute definition.
|
||||||
/// \param name Attribute name
|
/// \param name Attribute name
|
||||||
const PDFStructureTreeAttributeDefinition* getDefinition(const QByteArray& name);
|
static const PDFStructureTreeAttributeDefinition* getDefinition(const QByteArray& name);
|
||||||
|
|
||||||
PDFStructureTreeAttribute::Owner getOwnerFromString(const QByteArray& string);
|
/// Returns owner from string. If owner is not valid, then invalid
|
||||||
|
/// owner is returned.
|
||||||
|
/// \param string String
|
||||||
|
static PDFStructureTreeAttribute::Owner getOwnerFromString(const QByteArray& string);
|
||||||
|
|
||||||
PDFStructureTreeAttribute::Attribute type = PDFStructureTreeAttribute::Attribute::User;
|
PDFStructureTreeAttribute::Attribute type = PDFStructureTreeAttribute::Attribute::User;
|
||||||
const char* name = nullptr;
|
const char* name = nullptr;
|
||||||
@ -136,6 +139,67 @@ static constexpr std::array<const PDFStructureTreeAttributeDefinition, PDFStruct
|
|||||||
PDFStructureTreeAttributeDefinition(PDFStructureTreeAttribute::Attribute::Subtype, "Subtype", false)
|
PDFStructureTreeAttributeDefinition(PDFStructureTreeAttribute::Attribute::Subtype, "Subtype", false)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr std::pair<PDFStructureItem::Type, const char*> s_structureTreeItemTypes[] = {
|
||||||
|
std::make_pair(PDFStructureItem::Document, "Document"),
|
||||||
|
std::make_pair(PDFStructureItem::DocumentFragment, "DocumentFragment"),
|
||||||
|
std::make_pair(PDFStructureItem::Part, "Part"),
|
||||||
|
std::make_pair(PDFStructureItem::Div, "Div"),
|
||||||
|
std::make_pair(PDFStructureItem::Aside, "Aside"),
|
||||||
|
std::make_pair(PDFStructureItem::P, "P"),
|
||||||
|
std::make_pair(PDFStructureItem::H1, "H1"),
|
||||||
|
std::make_pair(PDFStructureItem::H2, "H2"),
|
||||||
|
std::make_pair(PDFStructureItem::H3, "H3"),
|
||||||
|
std::make_pair(PDFStructureItem::H4, "H4"),
|
||||||
|
std::make_pair(PDFStructureItem::H5, "H5"),
|
||||||
|
std::make_pair(PDFStructureItem::H6, "H6"),
|
||||||
|
std::make_pair(PDFStructureItem::H7, "H7"),
|
||||||
|
std::make_pair(PDFStructureItem::H, "H"),
|
||||||
|
std::make_pair(PDFStructureItem::Title, "Title"),
|
||||||
|
std::make_pair(PDFStructureItem::FENote, "FENote"),
|
||||||
|
std::make_pair(PDFStructureItem::Sub, "Sub"),
|
||||||
|
std::make_pair(PDFStructureItem::Lbl, "Lbl"),
|
||||||
|
std::make_pair(PDFStructureItem::Span, "Span"),
|
||||||
|
std::make_pair(PDFStructureItem::Em, "Em"),
|
||||||
|
std::make_pair(PDFStructureItem::Strong, "Strong"),
|
||||||
|
std::make_pair(PDFStructureItem::Link, "Link"),
|
||||||
|
std::make_pair(PDFStructureItem::Annot, "Annot"),
|
||||||
|
std::make_pair(PDFStructureItem::Form, "Form"),
|
||||||
|
std::make_pair(PDFStructureItem::Ruby, "Ruby"),
|
||||||
|
std::make_pair(PDFStructureItem::RB, "RB"),
|
||||||
|
std::make_pair(PDFStructureItem::RT, "RT"),
|
||||||
|
std::make_pair(PDFStructureItem::RP, "RP"),
|
||||||
|
std::make_pair(PDFStructureItem::Warichu, "Warichu"),
|
||||||
|
std::make_pair(PDFStructureItem::WR, "WR"),
|
||||||
|
std::make_pair(PDFStructureItem::WP, "WP"),
|
||||||
|
std::make_pair(PDFStructureItem::L, "L"),
|
||||||
|
std::make_pair(PDFStructureItem::LI, "LI"),
|
||||||
|
std::make_pair(PDFStructureItem::LBody, "LBody"),
|
||||||
|
std::make_pair(PDFStructureItem::Table, "Table"),
|
||||||
|
std::make_pair(PDFStructureItem::TR, "TR"),
|
||||||
|
std::make_pair(PDFStructureItem::TH, "TH"),
|
||||||
|
std::make_pair(PDFStructureItem::TD, "TD"),
|
||||||
|
std::make_pair(PDFStructureItem::THead, "THead"),
|
||||||
|
std::make_pair(PDFStructureItem::TBody, "TBody"),
|
||||||
|
std::make_pair(PDFStructureItem::TFoot, "TFoot"),
|
||||||
|
std::make_pair(PDFStructureItem::Caption, "Caption"),
|
||||||
|
std::make_pair(PDFStructureItem::Figure, "Figure"),
|
||||||
|
std::make_pair(PDFStructureItem::Formula, "Formula"),
|
||||||
|
std::make_pair(PDFStructureItem::Artifact, "Artifact"),
|
||||||
|
std::make_pair(PDFStructureItem::Sect, "Sect"),
|
||||||
|
std::make_pair(PDFStructureItem::Art, "Art"),
|
||||||
|
std::make_pair(PDFStructureItem::BlockQuote, "BlockQuote"),
|
||||||
|
std::make_pair(PDFStructureItem::TOC, "TOC"),
|
||||||
|
std::make_pair(PDFStructureItem::TOCI, "TOCI"),
|
||||||
|
std::make_pair(PDFStructureItem::Index, "Index"),
|
||||||
|
std::make_pair(PDFStructureItem::NonStruct, "NonStruct"),
|
||||||
|
std::make_pair(PDFStructureItem::Private, "Private"),
|
||||||
|
std::make_pair(PDFStructureItem::Quote, "Quote"),
|
||||||
|
std::make_pair(PDFStructureItem::Note, "Note"),
|
||||||
|
std::make_pair(PDFStructureItem::Reference, "Reference"),
|
||||||
|
std::make_pair(PDFStructureItem::BibEntry, "BibEntry"),
|
||||||
|
std::make_pair(PDFStructureItem::Code, "Code")
|
||||||
|
};
|
||||||
|
|
||||||
const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::getDefinition(const QByteArray& name)
|
const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::getDefinition(const QByteArray& name)
|
||||||
{
|
{
|
||||||
for (const PDFStructureTreeAttributeDefinition& definition : s_attributeDefinitions)
|
for (const PDFStructureTreeAttributeDefinition& definition : s_attributeDefinitions)
|
||||||
@ -160,7 +224,7 @@ PDFStructureTreeAttribute::Owner PDFStructureTreeAttributeDefinition::getOwnerFr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return PDFStructureTreeAttribute::Owner::User;
|
return PDFStructureTreeAttribute::Owner::Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
PDFStructureTreeAttribute::PDFStructureTreeAttribute() :
|
PDFStructureTreeAttribute::PDFStructureTreeAttribute() :
|
||||||
@ -173,6 +237,20 @@ PDFStructureTreeAttribute::PDFStructureTreeAttribute() :
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFStructureTreeAttribute::PDFStructureTreeAttribute(const PDFStructureTreeAttributeDefinition* definition,
|
||||||
|
PDFStructureTreeAttribute::Owner owner,
|
||||||
|
PDFInteger revision,
|
||||||
|
PDFObjectReference namespaceReference,
|
||||||
|
PDFObject value) :
|
||||||
|
m_definition(definition),
|
||||||
|
m_owner(owner),
|
||||||
|
m_revision(revision),
|
||||||
|
m_namespace(namespaceReference),
|
||||||
|
m_value(qMove(value))
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const
|
PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const
|
||||||
{
|
{
|
||||||
Q_ASSERT(m_definition);
|
Q_ASSERT(m_definition);
|
||||||
@ -228,6 +306,76 @@ bool PDFStructureTreeAttribute::getUserPropertyIsHidden(const PDFObjectStorage*
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PDFStructureTreeAttribute::parseAttributes(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes)
|
||||||
|
{
|
||||||
|
object = storage->getObject(object);
|
||||||
|
if (object.isDictionary())
|
||||||
|
{
|
||||||
|
parseAttributeDictionary(storage, object, attributes);
|
||||||
|
}
|
||||||
|
else if (object.isArray())
|
||||||
|
{
|
||||||
|
size_t startIndex = attributes.size();
|
||||||
|
|
||||||
|
for (PDFObject itemObject : *object.getArray())
|
||||||
|
{
|
||||||
|
itemObject = storage->getObject(itemObject);
|
||||||
|
if (itemObject.isInt())
|
||||||
|
{
|
||||||
|
// It is revision number
|
||||||
|
const PDFInteger revision = itemObject.getInteger();
|
||||||
|
for (; startIndex < attributes.size(); ++startIndex)
|
||||||
|
{
|
||||||
|
attributes[startIndex].setRevision(revision);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (itemObject.isDictionary())
|
||||||
|
{
|
||||||
|
// It is attribute
|
||||||
|
parseAttributeDictionary(storage, itemObject, attributes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PDFStructureTreeAttribute::parseAttributeDictionary(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes)
|
||||||
|
{
|
||||||
|
Q_ASSERT(object.isDictionary());
|
||||||
|
const PDFDictionary* attributeDictionary = object.getDictionary();
|
||||||
|
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
const QByteArray ownerName = loader.readNameFromDictionary(attributeDictionary, "O");
|
||||||
|
const Owner owner = PDFStructureTreeAttributeDefinition::getOwnerFromString(ownerName);
|
||||||
|
if (owner == Owner::UserProperties)
|
||||||
|
{
|
||||||
|
// User properties
|
||||||
|
PDFObject userPropertiesArrayObject = storage->getObject(attributeDictionary->get("P"));
|
||||||
|
if (userPropertiesArrayObject.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* userPropertiesArray = userPropertiesArrayObject.getArray();
|
||||||
|
for (const PDFObject& userPropertyObject : *userPropertiesArray)
|
||||||
|
{
|
||||||
|
attributes.emplace_back(&s_attributeDefinitions.front(), owner, 0, PDFObjectReference(), userPropertyObject);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const PDFObjectReference namespaceReference = loader.readReferenceFromDictionary(attributeDictionary, "NS");
|
||||||
|
const size_t count = attributeDictionary->getCount();
|
||||||
|
for (size_t i = 0; i < count; ++i)
|
||||||
|
{
|
||||||
|
const PDFInplaceOrMemoryString& key = attributeDictionary->getKey(i);
|
||||||
|
if (key == "O" || key == "NS")
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes.emplace_back(PDFStructureTreeAttributeDefinition::getDefinition(key.getString()), owner, 0, namespaceReference, attributeDictionary->getValue(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) const
|
std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) const
|
||||||
{
|
{
|
||||||
std::vector<PDFObjectReference> result;
|
std::vector<PDFObjectReference> result;
|
||||||
@ -240,15 +388,39 @@ std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) cons
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFStructureItem::Type PDFStructureTree::getTypeFromRole(const QByteArray& role) const
|
||||||
|
{
|
||||||
|
auto it = m_roleMap.find(role);
|
||||||
|
if (it != m_roleMap.cend())
|
||||||
|
{
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return getTypeFromName(role);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<PDFStructureTreeAttribute>& PDFStructureTree::getClassAttributes(const QByteArray& className) const
|
||||||
|
{
|
||||||
|
auto it = m_classMap.find(className);
|
||||||
|
if (it != m_classMap.cend())
|
||||||
|
{
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::vector<PDFStructureTreeAttribute> dummy;
|
||||||
|
return dummy;
|
||||||
|
}
|
||||||
|
|
||||||
PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObject object)
|
PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObject object)
|
||||||
{
|
{
|
||||||
PDFStructureTree tree;
|
PDFStructureTree tree;
|
||||||
|
|
||||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||||
{
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
|
||||||
PDFMarkedObjectsContext context;
|
PDFMarkedObjectsContext context;
|
||||||
PDFObject kids = dictionary->get("K");
|
PDFObject kids = dictionary->get("K");
|
||||||
|
|
||||||
if (kids.isArray())
|
if (kids.isArray())
|
||||||
{
|
{
|
||||||
const PDFArray* kidsArray = kids.getArray();
|
const PDFArray* kidsArray = kids.getArray();
|
||||||
@ -320,9 +492,78 @@ PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObj
|
|||||||
}
|
}
|
||||||
std::stable_sort(tree.m_parentTreeEntries.begin(), tree.m_parentTreeEntries.end());
|
std::stable_sort(tree.m_parentTreeEntries.begin(), tree.m_parentTreeEntries.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tree.m_parentNextKey = loader.readIntegerFromDictionary(dictionary, "ParentTreeNextKey", 0);
|
||||||
|
|
||||||
|
if (const PDFDictionary* roleMapDictionary = storage->getDictionaryFromObject(dictionary->get("RoleMap")))
|
||||||
|
{
|
||||||
|
const size_t size = roleMapDictionary->getCount();
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
tree.m_roleMap[roleMapDictionary->getKey(i).getString()] = getTypeFromName(loader.readName(roleMapDictionary->getValue(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const PDFDictionary* classMapDictionary = storage->getDictionaryFromObject(dictionary->get("ClassMap")))
|
||||||
|
{
|
||||||
|
const size_t size = classMapDictionary->getCount();
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
PDFStructureTreeAttribute::parseAttributes(storage, classMapDictionary->getValue(i), tree.m_classMap[classMapDictionary->getKey(i).getString()]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary->hasKey("Namespaces"))
|
||||||
|
{
|
||||||
|
tree.m_namespaces = loader.readObjectList<PDFStructureTreeNamespace>(dictionary->get("Namespaces"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary->hasKey("PronunciationLexicon"))
|
||||||
|
{
|
||||||
|
tree.m_pronunciationLexicons = loader.readObjectList<PDFFileSpecification>(dictionary->get("PronunciationLexicon"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary->hasKey("AF"))
|
||||||
|
{
|
||||||
|
tree.m_associatedFiles = loader.readObjectList<PDFFileSpecification>(dictionary->get("AF"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFStructureItem::Type PDFStructureItem::getTypeFromName(const QByteArray& name)
|
||||||
|
{
|
||||||
|
for (const auto& item : s_structureTreeItemTypes)
|
||||||
|
{
|
||||||
|
if (name == item.second)
|
||||||
|
{
|
||||||
|
return item.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Invalid;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFStructureTreeNamespace PDFStructureTreeNamespace::parse(const PDFObjectStorage* storage, PDFObject object)
|
||||||
|
{
|
||||||
|
PDFStructureTreeNamespace result;
|
||||||
|
|
||||||
|
if (object.isReference())
|
||||||
|
{
|
||||||
|
result.m_selfReference = object.getReference();
|
||||||
|
}
|
||||||
|
object = storage->getObject(object);
|
||||||
|
|
||||||
|
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||||
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
result.m_namespace = loader.readTextStringFromDictionary(dictionary, "NS", QString());
|
||||||
|
result.m_schema = PDFFileSpecification::parse(storage, dictionary->get("Schema"));
|
||||||
|
result.m_roleMapNS = dictionary->get("RoleMapNS");
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "pdfobject.h"
|
#include "pdfobject.h"
|
||||||
#include "pdfobjectutils.h"
|
#include "pdfobjectutils.h"
|
||||||
|
#include "pdffile.h"
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
@ -30,14 +31,13 @@ struct PDFStructureTreeAttributeDefinition;
|
|||||||
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute
|
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit PDFStructureTreeAttribute();
|
|
||||||
|
|
||||||
enum class Owner
|
enum class Owner
|
||||||
{
|
{
|
||||||
Invalid,
|
Invalid,
|
||||||
|
|
||||||
/// Defined for user owner
|
/// Defined for user owner
|
||||||
User,
|
UserProperties,
|
||||||
|
|
||||||
/// Defined for NSO (namespace owner)
|
/// Defined for NSO (namespace owner)
|
||||||
NSO,
|
NSO,
|
||||||
@ -62,6 +62,13 @@ public:
|
|||||||
ARIA_1_1,
|
ARIA_1_1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
explicit PDFStructureTreeAttribute();
|
||||||
|
explicit PDFStructureTreeAttribute(const PDFStructureTreeAttributeDefinition* definition,
|
||||||
|
Owner owner,
|
||||||
|
PDFInteger revision,
|
||||||
|
PDFObjectReference namespaceReference,
|
||||||
|
PDFObject value);
|
||||||
|
|
||||||
enum Attribute
|
enum Attribute
|
||||||
{
|
{
|
||||||
User,
|
User,
|
||||||
@ -138,6 +145,9 @@ public:
|
|||||||
/// Returns attribute revision number
|
/// Returns attribute revision number
|
||||||
PDFInteger getRevision() const { return m_revision; }
|
PDFInteger getRevision() const { return m_revision; }
|
||||||
|
|
||||||
|
/// Sets attribute revision number
|
||||||
|
void setRevision(PDFInteger revision) { m_revision = revision; }
|
||||||
|
|
||||||
/// Returns namespace for this attribute (or empty reference, if it doesn't exists)
|
/// Returns namespace for this attribute (or empty reference, if it doesn't exists)
|
||||||
PDFObjectReference getNamespace() const { return m_namespace; }
|
PDFObjectReference getNamespace() const { return m_namespace; }
|
||||||
|
|
||||||
@ -167,7 +177,21 @@ public:
|
|||||||
/// \param storage Storage (for resolving of indirect objects)
|
/// \param storage Storage (for resolving of indirect objects)
|
||||||
bool getUserPropertyIsHidden(const PDFObjectStorage* storage) const;
|
bool getUserPropertyIsHidden(const PDFObjectStorage* storage) const;
|
||||||
|
|
||||||
|
/// Parses attributes and adds them into \p attributes array. Invalid
|
||||||
|
/// attributes are not added. New attributes are appended to the end
|
||||||
|
/// of the array.
|
||||||
|
/// \param storage Storage
|
||||||
|
/// \param object Container of attributes
|
||||||
|
/// \param attributes[in,out] Attributes
|
||||||
|
static void parseAttributes(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Parses single attribute dictionary and appends new attributes to the end of the list.
|
||||||
|
/// \param storage Storage
|
||||||
|
/// \param object Container of attributes
|
||||||
|
/// \param attributes[in,out] Attributes
|
||||||
|
static void parseAttributeDictionary(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes);
|
||||||
|
|
||||||
const PDFStructureTreeAttributeDefinition* m_definition = nullptr;
|
const PDFStructureTreeAttributeDefinition* m_definition = nullptr;
|
||||||
|
|
||||||
/// Attribute owner
|
/// Attribute owner
|
||||||
@ -200,8 +224,38 @@ public:
|
|||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~PDFStructureItem() = default;
|
virtual ~PDFStructureItem() = default;
|
||||||
|
|
||||||
|
enum Type
|
||||||
|
{
|
||||||
|
Invalid,
|
||||||
|
|
||||||
|
// Document level types - chapter 14.8.4.3 of PDF 2.0 specification
|
||||||
|
Document, DocumentFragment,
|
||||||
|
|
||||||
|
// Grouping types - chapter 14.8.4.4 of PDF 2.0 specification
|
||||||
|
Part, Div, Aside,
|
||||||
|
|
||||||
|
// Block level structure types - chapter 14.8.4.5 of PDF 2.0 specification
|
||||||
|
P, H1, H2, H3, H4, H5, H6, H7, H, Title, FENote,
|
||||||
|
|
||||||
|
// Subblock level structure types - chapter 14.8.4.6 of PDF 2.0 specification
|
||||||
|
Sub,
|
||||||
|
|
||||||
|
// Inline structure types - chapter 14.8.4.7 of PDF 2.0 specification
|
||||||
|
Lbl, Span, Em, Strong, Link, Annot, Form, Ruby, RB, RT, RP, Warichu, WR, WP,
|
||||||
|
|
||||||
|
// Other structure types - chapter 14.8.4.7 of PDF 2.0 specification
|
||||||
|
L, LI, LBody, Table, TR, TH, TD, THead, TBody, TFoot, Caption, Figure, Formula, Artifact,
|
||||||
|
|
||||||
|
// PDF 1.7 backward compatibility types
|
||||||
|
Sect, Art, BlockQuote, TOC, TOCI, Index, NonStruct, Private, Quote, Note, Reference, BibEntry, Code,
|
||||||
|
|
||||||
|
// Last type identifier
|
||||||
|
LastType,
|
||||||
|
};
|
||||||
|
|
||||||
virtual PDFStructureTree* asStructureTree() { return nullptr; }
|
virtual PDFStructureTree* asStructureTree() { return nullptr; }
|
||||||
virtual const PDFStructureTree* asStructureTree() const { return nullptr; }
|
virtual const PDFStructureTree* asStructureTree() const { return nullptr; }
|
||||||
|
|
||||||
@ -217,12 +271,38 @@ public:
|
|||||||
/// \param context Parsing context
|
/// \param context Parsing context
|
||||||
static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context);
|
static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context);
|
||||||
|
|
||||||
|
/// Get structure tree type from name
|
||||||
|
/// \param name Name
|
||||||
|
static Type getTypeFromName(const QByteArray& name);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
PDFStructureItem* m_parent;
|
PDFStructureItem* m_parent;
|
||||||
PDFStructureTree* m_root;
|
PDFStructureTree* m_root;
|
||||||
std::vector<PDFStructureItemPointer> m_children;
|
std::vector<PDFStructureItemPointer> m_children;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Structure tree namespace
|
||||||
|
class PDFStructureTreeNamespace
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit inline PDFStructureTreeNamespace() = default;
|
||||||
|
|
||||||
|
const PDFObjectReference& getSelfReference() const { return m_selfReference; }
|
||||||
|
const QString& getNamespace() const { return m_namespace; }
|
||||||
|
const PDFFileSpecification& getSchema() const { return m_schema; }
|
||||||
|
const PDFObject& getRoleMapNS() const { return m_roleMapNS; }
|
||||||
|
|
||||||
|
static PDFStructureTreeNamespace parse(const PDFObjectStorage* storage, PDFObject object);
|
||||||
|
|
||||||
|
private:
|
||||||
|
PDFObjectReference m_selfReference;
|
||||||
|
QString m_namespace;
|
||||||
|
PDFFileSpecification m_schema;
|
||||||
|
PDFObject m_roleMapNS;
|
||||||
|
};
|
||||||
|
|
||||||
|
using PDFStructureTreeNamespaces = std::vector<PDFStructureTreeNamespace>;
|
||||||
|
|
||||||
/// Structure tree, contains structure element hierarchy
|
/// Structure tree, contains structure element hierarchy
|
||||||
class PDFStructureTree : public PDFStructureItem
|
class PDFStructureTree : public PDFStructureItem
|
||||||
{
|
{
|
||||||
@ -237,6 +317,25 @@ public:
|
|||||||
/// \param id Id
|
/// \param id Id
|
||||||
std::vector<PDFObjectReference> getParents(PDFInteger id) const;
|
std::vector<PDFObjectReference> getParents(PDFInteger id) const;
|
||||||
|
|
||||||
|
/// Returns type from role. Role can be an entry in RoleMap dictionary,
|
||||||
|
/// or one of the standard roles.
|
||||||
|
/// \param role Role
|
||||||
|
Type getTypeFromRole(const QByteArray& role) const;
|
||||||
|
|
||||||
|
/// Returns class attributes for given class. If class is not found,
|
||||||
|
/// then empty attributes are returned.
|
||||||
|
/// \param className Class name
|
||||||
|
const std::vector<PDFStructureTreeAttribute>& getClassAttributes(const QByteArray& className) const;
|
||||||
|
|
||||||
|
/// Returns a list of namespaces
|
||||||
|
const PDFStructureTreeNamespaces& getNamespaces() const { return m_namespaces; }
|
||||||
|
|
||||||
|
/// Returns a list of pronunciation lexicons
|
||||||
|
const std::vector<PDFFileSpecification>& getPronunciationLexicons() const { return m_pronunciationLexicons; }
|
||||||
|
|
||||||
|
/// Returns a list of associated files
|
||||||
|
const std::vector<PDFFileSpecification>& getAssociatedFiles() const { return m_associatedFiles; }
|
||||||
|
|
||||||
/// Parses structure tree from the object. If error occurs, empty structure
|
/// Parses structure tree from the object. If error occurs, empty structure
|
||||||
/// tree is returned.
|
/// tree is returned.
|
||||||
/// \param storage Storage
|
/// \param storage Storage
|
||||||
@ -259,6 +358,12 @@ private:
|
|||||||
|
|
||||||
std::map<QByteArray, PDFObjectReference> m_idTreeMap;
|
std::map<QByteArray, PDFObjectReference> m_idTreeMap;
|
||||||
ParentTreeEntries m_parentTreeEntries;
|
ParentTreeEntries m_parentTreeEntries;
|
||||||
|
PDFInteger m_parentNextKey = 0;
|
||||||
|
std::map<QByteArray, Type> m_roleMap;
|
||||||
|
std::map<QByteArray, std::vector<PDFStructureTreeAttribute>> m_classMap;
|
||||||
|
PDFStructureTreeNamespaces m_namespaces;
|
||||||
|
std::vector<PDFFileSpecification> m_pronunciationLexicons;
|
||||||
|
std::vector<PDFFileSpecification> m_associatedFiles;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
Loading…
x
Reference in New Issue
Block a user