mirror of
				https://github.com/JakubMelka/PDF4QT.git
				synced 2025-06-05 21:59:17 +02:00 
			
		
		
		
	Structure tree parsing
This commit is contained in:
		| @@ -43,9 +43,12 @@ struct PDFStructureTreeAttributeDefinition | |||||||
|     /// always returns valid pointer. For uknown attribute, it returns |     /// always returns valid pointer. For uknown attribute, it returns | ||||||
|     /// user attribute definition. |     /// user attribute definition. | ||||||
|     /// \param name Attribute name |     /// \param name Attribute name | ||||||
|     const PDFStructureTreeAttributeDefinition* getDefinition(const QByteArray& name); |     static const PDFStructureTreeAttributeDefinition* getDefinition(const QByteArray& name); | ||||||
|  |  | ||||||
|     PDFStructureTreeAttribute::Owner getOwnerFromString(const QByteArray& string); |     /// Returns owner from string. If owner is not valid, then invalid | ||||||
|  |     /// owner is returned. | ||||||
|  |     /// \param string String | ||||||
|  |     static PDFStructureTreeAttribute::Owner getOwnerFromString(const QByteArray& string); | ||||||
|  |  | ||||||
|     PDFStructureTreeAttribute::Attribute type = PDFStructureTreeAttribute::Attribute::User; |     PDFStructureTreeAttribute::Attribute type = PDFStructureTreeAttribute::Attribute::User; | ||||||
|     const char* name = nullptr; |     const char* name = nullptr; | ||||||
| @@ -136,6 +139,67 @@ static constexpr std::array<const PDFStructureTreeAttributeDefinition, PDFStruct | |||||||
|     PDFStructureTreeAttributeDefinition(PDFStructureTreeAttribute::Attribute::Subtype, "Subtype", false) |     PDFStructureTreeAttributeDefinition(PDFStructureTreeAttribute::Attribute::Subtype, "Subtype", false) | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | static constexpr std::pair<PDFStructureItem::Type, const char*> s_structureTreeItemTypes[] = { | ||||||
|  |     std::make_pair(PDFStructureItem::Document, "Document"), | ||||||
|  |     std::make_pair(PDFStructureItem::DocumentFragment, "DocumentFragment"), | ||||||
|  |     std::make_pair(PDFStructureItem::Part, "Part"), | ||||||
|  |     std::make_pair(PDFStructureItem::Div, "Div"), | ||||||
|  |     std::make_pair(PDFStructureItem::Aside, "Aside"), | ||||||
|  |     std::make_pair(PDFStructureItem::P, "P"), | ||||||
|  |     std::make_pair(PDFStructureItem::H1, "H1"), | ||||||
|  |     std::make_pair(PDFStructureItem::H2, "H2"), | ||||||
|  |     std::make_pair(PDFStructureItem::H3, "H3"), | ||||||
|  |     std::make_pair(PDFStructureItem::H4, "H4"), | ||||||
|  |     std::make_pair(PDFStructureItem::H5, "H5"), | ||||||
|  |     std::make_pair(PDFStructureItem::H6, "H6"), | ||||||
|  |     std::make_pair(PDFStructureItem::H7, "H7"), | ||||||
|  |     std::make_pair(PDFStructureItem::H, "H"), | ||||||
|  |     std::make_pair(PDFStructureItem::Title, "Title"), | ||||||
|  |     std::make_pair(PDFStructureItem::FENote, "FENote"), | ||||||
|  |     std::make_pair(PDFStructureItem::Sub, "Sub"), | ||||||
|  |     std::make_pair(PDFStructureItem::Lbl, "Lbl"), | ||||||
|  |     std::make_pair(PDFStructureItem::Span, "Span"), | ||||||
|  |     std::make_pair(PDFStructureItem::Em, "Em"), | ||||||
|  |     std::make_pair(PDFStructureItem::Strong, "Strong"), | ||||||
|  |     std::make_pair(PDFStructureItem::Link, "Link"), | ||||||
|  |     std::make_pair(PDFStructureItem::Annot, "Annot"), | ||||||
|  |     std::make_pair(PDFStructureItem::Form, "Form"), | ||||||
|  |     std::make_pair(PDFStructureItem::Ruby, "Ruby"), | ||||||
|  |     std::make_pair(PDFStructureItem::RB, "RB"), | ||||||
|  |     std::make_pair(PDFStructureItem::RT, "RT"), | ||||||
|  |     std::make_pair(PDFStructureItem::RP, "RP"), | ||||||
|  |     std::make_pair(PDFStructureItem::Warichu, "Warichu"), | ||||||
|  |     std::make_pair(PDFStructureItem::WR, "WR"), | ||||||
|  |     std::make_pair(PDFStructureItem::WP, "WP"), | ||||||
|  |     std::make_pair(PDFStructureItem::L, "L"), | ||||||
|  |     std::make_pair(PDFStructureItem::LI, "LI"), | ||||||
|  |     std::make_pair(PDFStructureItem::LBody, "LBody"), | ||||||
|  |     std::make_pair(PDFStructureItem::Table, "Table"), | ||||||
|  |     std::make_pair(PDFStructureItem::TR, "TR"), | ||||||
|  |     std::make_pair(PDFStructureItem::TH, "TH"), | ||||||
|  |     std::make_pair(PDFStructureItem::TD, "TD"), | ||||||
|  |     std::make_pair(PDFStructureItem::THead, "THead"), | ||||||
|  |     std::make_pair(PDFStructureItem::TBody, "TBody"), | ||||||
|  |     std::make_pair(PDFStructureItem::TFoot, "TFoot"), | ||||||
|  |     std::make_pair(PDFStructureItem::Caption, "Caption"), | ||||||
|  |     std::make_pair(PDFStructureItem::Figure, "Figure"), | ||||||
|  |     std::make_pair(PDFStructureItem::Formula, "Formula"), | ||||||
|  |     std::make_pair(PDFStructureItem::Artifact, "Artifact"), | ||||||
|  |     std::make_pair(PDFStructureItem::Sect, "Sect"), | ||||||
|  |     std::make_pair(PDFStructureItem::Art, "Art"), | ||||||
|  |     std::make_pair(PDFStructureItem::BlockQuote, "BlockQuote"), | ||||||
|  |     std::make_pair(PDFStructureItem::TOC, "TOC"), | ||||||
|  |     std::make_pair(PDFStructureItem::TOCI, "TOCI"), | ||||||
|  |     std::make_pair(PDFStructureItem::Index, "Index"), | ||||||
|  |     std::make_pair(PDFStructureItem::NonStruct, "NonStruct"), | ||||||
|  |     std::make_pair(PDFStructureItem::Private, "Private"), | ||||||
|  |     std::make_pair(PDFStructureItem::Quote, "Quote"), | ||||||
|  |     std::make_pair(PDFStructureItem::Note, "Note"), | ||||||
|  |     std::make_pair(PDFStructureItem::Reference, "Reference"), | ||||||
|  |     std::make_pair(PDFStructureItem::BibEntry, "BibEntry"), | ||||||
|  |     std::make_pair(PDFStructureItem::Code, "Code") | ||||||
|  | }; | ||||||
|  |  | ||||||
| const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::getDefinition(const QByteArray& name) | const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::getDefinition(const QByteArray& name) | ||||||
| { | { | ||||||
|     for (const PDFStructureTreeAttributeDefinition& definition : s_attributeDefinitions) |     for (const PDFStructureTreeAttributeDefinition& definition : s_attributeDefinitions) | ||||||
| @@ -160,7 +224,7 @@ PDFStructureTreeAttribute::Owner PDFStructureTreeAttributeDefinition::getOwnerFr | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return PDFStructureTreeAttribute::Owner::User; |     return PDFStructureTreeAttribute::Owner::Invalid; | ||||||
| } | } | ||||||
|  |  | ||||||
| PDFStructureTreeAttribute::PDFStructureTreeAttribute() : | PDFStructureTreeAttribute::PDFStructureTreeAttribute() : | ||||||
| @@ -173,6 +237,20 @@ PDFStructureTreeAttribute::PDFStructureTreeAttribute() : | |||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | PDFStructureTreeAttribute::PDFStructureTreeAttribute(const PDFStructureTreeAttributeDefinition* definition, | ||||||
|  |                                                      PDFStructureTreeAttribute::Owner owner, | ||||||
|  |                                                      PDFInteger revision, | ||||||
|  |                                                      PDFObjectReference namespaceReference, | ||||||
|  |                                                      PDFObject value) : | ||||||
|  |     m_definition(definition), | ||||||
|  |     m_owner(owner), | ||||||
|  |     m_revision(revision), | ||||||
|  |     m_namespace(namespaceReference), | ||||||
|  |     m_value(qMove(value)) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
| PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const | PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const | ||||||
| { | { | ||||||
|     Q_ASSERT(m_definition); |     Q_ASSERT(m_definition); | ||||||
| @@ -228,6 +306,76 @@ bool PDFStructureTreeAttribute::getUserPropertyIsHidden(const PDFObjectStorage* | |||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void PDFStructureTreeAttribute::parseAttributes(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes) | ||||||
|  | { | ||||||
|  |     object = storage->getObject(object); | ||||||
|  |     if (object.isDictionary()) | ||||||
|  |     { | ||||||
|  |         parseAttributeDictionary(storage, object, attributes); | ||||||
|  |     } | ||||||
|  |     else if (object.isArray()) | ||||||
|  |     { | ||||||
|  |         size_t startIndex = attributes.size(); | ||||||
|  |  | ||||||
|  |         for (PDFObject itemObject : *object.getArray()) | ||||||
|  |         { | ||||||
|  |             itemObject = storage->getObject(itemObject); | ||||||
|  |             if (itemObject.isInt()) | ||||||
|  |             { | ||||||
|  |                 // It is revision number | ||||||
|  |                 const PDFInteger revision = itemObject.getInteger(); | ||||||
|  |                 for (; startIndex < attributes.size(); ++startIndex) | ||||||
|  |                 { | ||||||
|  |                     attributes[startIndex].setRevision(revision); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             else if (itemObject.isDictionary()) | ||||||
|  |             { | ||||||
|  |                 // It is attribute | ||||||
|  |                 parseAttributeDictionary(storage, itemObject, attributes); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void PDFStructureTreeAttribute::parseAttributeDictionary(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes) | ||||||
|  | { | ||||||
|  |     Q_ASSERT(object.isDictionary()); | ||||||
|  |     const PDFDictionary* attributeDictionary = object.getDictionary(); | ||||||
|  |  | ||||||
|  |     PDFDocumentDataLoaderDecorator loader(storage); | ||||||
|  |     const QByteArray ownerName = loader.readNameFromDictionary(attributeDictionary, "O"); | ||||||
|  |     const Owner owner = PDFStructureTreeAttributeDefinition::getOwnerFromString(ownerName); | ||||||
|  |     if (owner == Owner::UserProperties) | ||||||
|  |     { | ||||||
|  |         // User properties | ||||||
|  |         PDFObject userPropertiesArrayObject = storage->getObject(attributeDictionary->get("P")); | ||||||
|  |         if (userPropertiesArrayObject.isArray()) | ||||||
|  |         { | ||||||
|  |             const PDFArray* userPropertiesArray = userPropertiesArrayObject.getArray(); | ||||||
|  |             for (const PDFObject& userPropertyObject : *userPropertiesArray) | ||||||
|  |             { | ||||||
|  |                 attributes.emplace_back(&s_attributeDefinitions.front(), owner, 0, PDFObjectReference(), userPropertyObject); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         const PDFObjectReference namespaceReference = loader.readReferenceFromDictionary(attributeDictionary, "NS"); | ||||||
|  |         const size_t count = attributeDictionary->getCount(); | ||||||
|  |         for (size_t i = 0; i < count; ++i) | ||||||
|  |         { | ||||||
|  |             const PDFInplaceOrMemoryString& key = attributeDictionary->getKey(i); | ||||||
|  |             if (key == "O" || key == "NS") | ||||||
|  |             { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             attributes.emplace_back(PDFStructureTreeAttributeDefinition::getDefinition(key.getString()), owner, 0, namespaceReference, attributeDictionary->getValue(i)); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) const | std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) const | ||||||
| { | { | ||||||
|     std::vector<PDFObjectReference> result; |     std::vector<PDFObjectReference> result; | ||||||
| @@ -240,15 +388,39 @@ std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) cons | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | PDFStructureItem::Type PDFStructureTree::getTypeFromRole(const QByteArray& role) const | ||||||
|  | { | ||||||
|  |     auto it = m_roleMap.find(role); | ||||||
|  |     if (it != m_roleMap.cend()) | ||||||
|  |     { | ||||||
|  |         return it->second; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return getTypeFromName(role); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | const std::vector<PDFStructureTreeAttribute>& PDFStructureTree::getClassAttributes(const QByteArray& className) const | ||||||
|  | { | ||||||
|  |     auto it = m_classMap.find(className); | ||||||
|  |     if (it != m_classMap.cend()) | ||||||
|  |     { | ||||||
|  |         return it->second; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     static const std::vector<PDFStructureTreeAttribute> dummy; | ||||||
|  |     return dummy; | ||||||
|  | } | ||||||
|  |  | ||||||
| PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObject object) | PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObject object) | ||||||
| { | { | ||||||
|     PDFStructureTree tree; |     PDFStructureTree tree; | ||||||
|  |  | ||||||
|     if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object)) |     if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object)) | ||||||
|     { |     { | ||||||
|  |         PDFDocumentDataLoaderDecorator loader(storage); | ||||||
|  |  | ||||||
|         PDFMarkedObjectsContext context; |         PDFMarkedObjectsContext context; | ||||||
|         PDFObject kids = dictionary->get("K"); |         PDFObject kids = dictionary->get("K"); | ||||||
|  |  | ||||||
|         if (kids.isArray()) |         if (kids.isArray()) | ||||||
|         { |         { | ||||||
|             const PDFArray* kidsArray = kids.getArray(); |             const PDFArray* kidsArray = kids.getArray(); | ||||||
| @@ -320,9 +492,78 @@ PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObj | |||||||
|             } |             } | ||||||
|             std::stable_sort(tree.m_parentTreeEntries.begin(), tree.m_parentTreeEntries.end()); |             std::stable_sort(tree.m_parentTreeEntries.begin(), tree.m_parentTreeEntries.end()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         tree.m_parentNextKey = loader.readIntegerFromDictionary(dictionary, "ParentTreeNextKey", 0); | ||||||
|  |  | ||||||
|  |         if (const PDFDictionary* roleMapDictionary = storage->getDictionaryFromObject(dictionary->get("RoleMap"))) | ||||||
|  |         { | ||||||
|  |             const size_t size = roleMapDictionary->getCount(); | ||||||
|  |             for (size_t i = 0; i < size; ++i) | ||||||
|  |             { | ||||||
|  |                 tree.m_roleMap[roleMapDictionary->getKey(i).getString()] = getTypeFromName(loader.readName(roleMapDictionary->getValue(i))); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (const PDFDictionary* classMapDictionary = storage->getDictionaryFromObject(dictionary->get("ClassMap"))) | ||||||
|  |         { | ||||||
|  |             const size_t size = classMapDictionary->getCount(); | ||||||
|  |             for (size_t i = 0; i < size; ++i) | ||||||
|  |             { | ||||||
|  |                 PDFStructureTreeAttribute::parseAttributes(storage, classMapDictionary->getValue(i), tree.m_classMap[classMapDictionary->getKey(i).getString()]); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (dictionary->hasKey("Namespaces")) | ||||||
|  |         { | ||||||
|  |             tree.m_namespaces = loader.readObjectList<PDFStructureTreeNamespace>(dictionary->get("Namespaces")); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (dictionary->hasKey("PronunciationLexicon")) | ||||||
|  |         { | ||||||
|  |             tree.m_pronunciationLexicons = loader.readObjectList<PDFFileSpecification>(dictionary->get("PronunciationLexicon")); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (dictionary->hasKey("AF")) | ||||||
|  |         { | ||||||
|  |             tree.m_associatedFiles = loader.readObjectList<PDFFileSpecification>(dictionary->get("AF")); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return tree; |     return tree; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | PDFStructureItem::Type PDFStructureItem::getTypeFromName(const QByteArray& name) | ||||||
|  | { | ||||||
|  |     for (const auto& item : s_structureTreeItemTypes) | ||||||
|  |     { | ||||||
|  |         if (name == item.second) | ||||||
|  |         { | ||||||
|  |             return item.first; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return Invalid; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | PDFStructureTreeNamespace PDFStructureTreeNamespace::parse(const PDFObjectStorage* storage, PDFObject object) | ||||||
|  | { | ||||||
|  |     PDFStructureTreeNamespace result; | ||||||
|  |  | ||||||
|  |     if (object.isReference()) | ||||||
|  |     { | ||||||
|  |         result.m_selfReference = object.getReference(); | ||||||
|  |     } | ||||||
|  |     object = storage->getObject(object); | ||||||
|  |  | ||||||
|  |     if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object)) | ||||||
|  |     { | ||||||
|  |         PDFDocumentDataLoaderDecorator loader(storage); | ||||||
|  |         result.m_namespace = loader.readTextStringFromDictionary(dictionary, "NS", QString()); | ||||||
|  |         result.m_schema = PDFFileSpecification::parse(storage, dictionary->get("Schema")); | ||||||
|  |         result.m_roleMapNS = dictionary->get("RoleMapNS"); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
| }   // namespace pdf | }   // namespace pdf | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ | |||||||
|  |  | ||||||
| #include "pdfobject.h" | #include "pdfobject.h" | ||||||
| #include "pdfobjectutils.h" | #include "pdfobjectutils.h" | ||||||
|  | #include "pdffile.h" | ||||||
|  |  | ||||||
| namespace pdf | namespace pdf | ||||||
| { | { | ||||||
| @@ -30,14 +31,13 @@ struct PDFStructureTreeAttributeDefinition; | |||||||
| class  PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute | class  PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     explicit PDFStructureTreeAttribute(); |  | ||||||
|  |  | ||||||
|     enum class Owner |     enum class Owner | ||||||
|     { |     { | ||||||
|         Invalid, |         Invalid, | ||||||
|  |  | ||||||
|         /// Defined for user owner |         /// Defined for user owner | ||||||
|         User, |         UserProperties, | ||||||
|  |  | ||||||
|         /// Defined for NSO (namespace owner) |         /// Defined for NSO (namespace owner) | ||||||
|         NSO, |         NSO, | ||||||
| @@ -62,6 +62,13 @@ public: | |||||||
|         ARIA_1_1, |         ARIA_1_1, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     explicit PDFStructureTreeAttribute(); | ||||||
|  |     explicit PDFStructureTreeAttribute(const PDFStructureTreeAttributeDefinition* definition, | ||||||
|  |                                        Owner owner, | ||||||
|  |                                        PDFInteger revision, | ||||||
|  |                                        PDFObjectReference namespaceReference, | ||||||
|  |                                        PDFObject value); | ||||||
|  |  | ||||||
|     enum Attribute |     enum Attribute | ||||||
|     { |     { | ||||||
|         User, |         User, | ||||||
| @@ -138,6 +145,9 @@ public: | |||||||
|     /// Returns attribute revision number |     /// Returns attribute revision number | ||||||
|     PDFInteger getRevision() const { return m_revision; } |     PDFInteger getRevision() const { return m_revision; } | ||||||
|  |  | ||||||
|  |     /// Sets attribute revision number | ||||||
|  |     void setRevision(PDFInteger revision) { m_revision = revision; } | ||||||
|  |  | ||||||
|     /// Returns namespace for this attribute (or empty reference, if it doesn't exists) |     /// Returns namespace for this attribute (or empty reference, if it doesn't exists) | ||||||
|     PDFObjectReference getNamespace() const { return m_namespace; } |     PDFObjectReference getNamespace() const { return m_namespace; } | ||||||
|  |  | ||||||
| @@ -167,7 +177,21 @@ public: | |||||||
|     /// \param storage Storage (for resolving of indirect objects) |     /// \param storage Storage (for resolving of indirect objects) | ||||||
|     bool getUserPropertyIsHidden(const PDFObjectStorage* storage) const; |     bool getUserPropertyIsHidden(const PDFObjectStorage* storage) const; | ||||||
|  |  | ||||||
|  |     /// Parses attributes and adds them into \p attributes array. Invalid | ||||||
|  |     /// attributes are not added. New attributes are appended to the end | ||||||
|  |     /// of the array. | ||||||
|  |     /// \param storage Storage | ||||||
|  |     /// \param object Container of attributes | ||||||
|  |     /// \param attributes[in,out] Attributes | ||||||
|  |     static void parseAttributes(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|  |     /// Parses single attribute dictionary and appends new attributes to the end of the list. | ||||||
|  |     /// \param storage Storage | ||||||
|  |     /// \param object Container of attributes | ||||||
|  |     /// \param attributes[in,out] Attributes | ||||||
|  |     static void parseAttributeDictionary(const PDFObjectStorage* storage, PDFObject object, std::vector<PDFStructureTreeAttribute>& attributes); | ||||||
|  |  | ||||||
|     const PDFStructureTreeAttributeDefinition* m_definition = nullptr; |     const PDFStructureTreeAttributeDefinition* m_definition = nullptr; | ||||||
|  |  | ||||||
|     /// Attribute owner |     /// Attribute owner | ||||||
| @@ -200,8 +224,38 @@ public: | |||||||
|     { |     { | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     virtual ~PDFStructureItem() = default; |     virtual ~PDFStructureItem() = default; | ||||||
|  |  | ||||||
|  |     enum Type | ||||||
|  |     { | ||||||
|  |         Invalid, | ||||||
|  |  | ||||||
|  |         // Document level types - chapter 14.8.4.3 of PDF 2.0 specification | ||||||
|  |         Document, DocumentFragment, | ||||||
|  |  | ||||||
|  |         // Grouping types - chapter 14.8.4.4 of PDF 2.0 specification | ||||||
|  |         Part, Div, Aside, | ||||||
|  |  | ||||||
|  |         // Block level structure types - chapter 14.8.4.5 of PDF 2.0 specification | ||||||
|  |         P, H1, H2, H3, H4, H5, H6, H7, H, Title, FENote, | ||||||
|  |  | ||||||
|  |         // Subblock level structure types - chapter 14.8.4.6 of PDF 2.0 specification | ||||||
|  |         Sub, | ||||||
|  |  | ||||||
|  |         // Inline structure types - chapter 14.8.4.7 of PDF 2.0 specification | ||||||
|  |         Lbl, Span, Em, Strong, Link, Annot, Form, Ruby, RB, RT, RP, Warichu, WR, WP, | ||||||
|  |  | ||||||
|  |         // Other structure types - chapter 14.8.4.7 of PDF 2.0 specification | ||||||
|  |         L, LI, LBody, Table, TR, TH, TD, THead, TBody, TFoot, Caption, Figure, Formula, Artifact, | ||||||
|  |  | ||||||
|  |         // PDF 1.7 backward compatibility types | ||||||
|  |         Sect, Art, BlockQuote, TOC, TOCI, Index, NonStruct, Private, Quote, Note, Reference, BibEntry, Code, | ||||||
|  |  | ||||||
|  |         // Last type identifier | ||||||
|  |         LastType, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     virtual PDFStructureTree* asStructureTree() { return nullptr; } |     virtual PDFStructureTree* asStructureTree() { return nullptr; } | ||||||
|     virtual const PDFStructureTree* asStructureTree() const { return nullptr; } |     virtual const PDFStructureTree* asStructureTree() const { return nullptr; } | ||||||
|  |  | ||||||
| @@ -217,12 +271,38 @@ public: | |||||||
|     /// \param context Parsing context |     /// \param context Parsing context | ||||||
|     static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context); |     static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context); | ||||||
|  |  | ||||||
|  |     /// Get structure tree type from name | ||||||
|  |     /// \param name Name | ||||||
|  |     static Type getTypeFromName(const QByteArray& name); | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     PDFStructureItem* m_parent; |     PDFStructureItem* m_parent; | ||||||
|     PDFStructureTree* m_root; |     PDFStructureTree* m_root; | ||||||
|     std::vector<PDFStructureItemPointer> m_children; |     std::vector<PDFStructureItemPointer> m_children; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | /// Structure tree namespace | ||||||
|  | class PDFStructureTreeNamespace | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     explicit inline PDFStructureTreeNamespace() = default; | ||||||
|  |  | ||||||
|  |     const PDFObjectReference& getSelfReference() const { return m_selfReference; } | ||||||
|  |     const QString& getNamespace() const { return m_namespace; } | ||||||
|  |     const PDFFileSpecification& getSchema() const { return m_schema; } | ||||||
|  |     const PDFObject& getRoleMapNS() const { return m_roleMapNS; } | ||||||
|  |  | ||||||
|  |     static PDFStructureTreeNamespace parse(const PDFObjectStorage* storage, PDFObject object); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     PDFObjectReference m_selfReference; | ||||||
|  |     QString m_namespace; | ||||||
|  |     PDFFileSpecification m_schema; | ||||||
|  |     PDFObject m_roleMapNS; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | using PDFStructureTreeNamespaces = std::vector<PDFStructureTreeNamespace>; | ||||||
|  |  | ||||||
| /// Structure tree, contains structure element hierarchy | /// Structure tree, contains structure element hierarchy | ||||||
| class PDFStructureTree : public PDFStructureItem | class PDFStructureTree : public PDFStructureItem | ||||||
| { | { | ||||||
| @@ -237,6 +317,25 @@ public: | |||||||
|     /// \param id Id |     /// \param id Id | ||||||
|     std::vector<PDFObjectReference> getParents(PDFInteger id) const; |     std::vector<PDFObjectReference> getParents(PDFInteger id) const; | ||||||
|  |  | ||||||
|  |     /// Returns type from role. Role can be an entry in RoleMap dictionary, | ||||||
|  |     /// or one of the standard roles. | ||||||
|  |     /// \param role Role | ||||||
|  |     Type getTypeFromRole(const QByteArray& role) const; | ||||||
|  |  | ||||||
|  |     /// Returns class attributes for given class. If class is not found, | ||||||
|  |     /// then empty attributes are returned. | ||||||
|  |     /// \param className Class name | ||||||
|  |     const std::vector<PDFStructureTreeAttribute>& getClassAttributes(const QByteArray& className) const; | ||||||
|  |  | ||||||
|  |     /// Returns a list of namespaces | ||||||
|  |     const PDFStructureTreeNamespaces& getNamespaces() const { return m_namespaces; } | ||||||
|  |  | ||||||
|  |     /// Returns a list of pronunciation lexicons | ||||||
|  |     const std::vector<PDFFileSpecification>& getPronunciationLexicons() const { return m_pronunciationLexicons; } | ||||||
|  |  | ||||||
|  |     /// Returns a list of associated files | ||||||
|  |     const std::vector<PDFFileSpecification>& getAssociatedFiles() const { return m_associatedFiles; } | ||||||
|  |  | ||||||
|     /// Parses structure tree from the object. If error occurs, empty structure |     /// Parses structure tree from the object. If error occurs, empty structure | ||||||
|     /// tree is returned. |     /// tree is returned. | ||||||
|     /// \param storage Storage |     /// \param storage Storage | ||||||
| @@ -259,6 +358,12 @@ private: | |||||||
|  |  | ||||||
|     std::map<QByteArray, PDFObjectReference> m_idTreeMap; |     std::map<QByteArray, PDFObjectReference> m_idTreeMap; | ||||||
|     ParentTreeEntries m_parentTreeEntries; |     ParentTreeEntries m_parentTreeEntries; | ||||||
|  |     PDFInteger m_parentNextKey = 0; | ||||||
|  |     std::map<QByteArray, Type> m_roleMap; | ||||||
|  |     std::map<QByteArray, std::vector<PDFStructureTreeAttribute>> m_classMap; | ||||||
|  |     PDFStructureTreeNamespaces m_namespaces; | ||||||
|  |     std::vector<PDFFileSpecification> m_pronunciationLexicons; | ||||||
|  |     std::vector<PDFFileSpecification> m_associatedFiles; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }   // namespace pdf | }   // namespace pdf | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user