mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-04-24 23:18:51 +02:00
Structure tree parsing
This commit is contained in:
parent
a21f185e89
commit
0a62ad618d
@ -93,7 +93,7 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
|||||||
PDFCatalog catalogObject;
|
PDFCatalog catalogObject;
|
||||||
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
catalogObject.m_viewerPreferences = PDFViewerPreferences::parse(catalog, document);
|
||||||
catalogObject.m_pages = PDFPage::parse(document, catalogDictionary->get("Pages"));
|
catalogObject.m_pages = PDFPage::parse(document, catalogDictionary->get("Pages"));
|
||||||
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(document, catalogDictionary->get("PageLabels"));
|
catalogObject.m_pageLabels = PDFNumberTreeLoader<PDFPageLabel>::parse(&document->getStorage(), catalogDictionary->get("PageLabels"));
|
||||||
|
|
||||||
if (catalogDictionary->hasKey("OCProperties"))
|
if (catalogDictionary->hasKey("OCProperties"))
|
||||||
{
|
{
|
||||||
@ -198,6 +198,15 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
|||||||
catalogObject.m_threads = loader.readObjectList<PDFArticleThread>(catalogDictionary->get("Threads"));
|
catalogObject.m_threads = loader.readObjectList<PDFArticleThread>(catalogDictionary->get("Threads"));
|
||||||
catalogObject.m_metadata = catalogDictionary->get("Metadata");
|
catalogObject.m_metadata = catalogDictionary->get("Metadata");
|
||||||
|
|
||||||
|
// Examine mark info dictionary
|
||||||
|
catalogObject.m_markInfoFlags = MarkInfo_None;
|
||||||
|
if (const PDFDictionary* markInfoDictionary = document->getDictionaryFromObject(catalogDictionary->get("MarkInfo")))
|
||||||
|
{
|
||||||
|
catalogObject.m_markInfoFlags.setFlag(MarkInfo_Marked, loader.readBooleanFromDictionary(markInfoDictionary, "Marked", false));
|
||||||
|
catalogObject.m_markInfoFlags.setFlag(MarkInfo_UserProperties, loader.readBooleanFromDictionary(markInfoDictionary, "UserProperties", false));
|
||||||
|
catalogObject.m_markInfoFlags.setFlag(MarkInfo_Suspects, loader.readBooleanFromDictionary(markInfoDictionary, "Suspects", false));
|
||||||
|
}
|
||||||
|
|
||||||
return catalogObject;
|
return catalogObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,9 +472,9 @@ PDFViewerPreferences PDFViewerPreferences::parse(const PDFObject& catalogDiction
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object)
|
PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFObjectStorage* storage, const PDFObject& object)
|
||||||
{
|
{
|
||||||
const PDFObject& dereferencedObject = document->getObject(object);
|
const PDFObject& dereferencedObject = storage->getObject(object);
|
||||||
if (dereferencedObject.isDictionary())
|
if (dereferencedObject.isDictionary())
|
||||||
{
|
{
|
||||||
std::array<std::pair<const char*, NumberingStyle>, 5> numberingStyles = { std::pair<const char*, NumberingStyle>{ "D", NumberingStyle::DecimalArabic},
|
std::array<std::pair<const char*, NumberingStyle>, 5> numberingStyles = { std::pair<const char*, NumberingStyle>{ "D", NumberingStyle::DecimalArabic},
|
||||||
@ -475,7 +484,7 @@ PDFPageLabel PDFPageLabel::parse(PDFInteger pageIndex, const PDFDocument* docume
|
|||||||
std::pair<const char*, NumberingStyle>{ "a", NumberingStyle::LowercaseLetters} };
|
std::pair<const char*, NumberingStyle>{ "a", NumberingStyle::LowercaseLetters} };
|
||||||
|
|
||||||
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
|
const PDFDictionary* dictionary = dereferencedObject.getDictionary();
|
||||||
const PDFDocumentDataLoaderDecorator loader(document);
|
const PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None);
|
const NumberingStyle numberingStyle = loader.readEnumByName(dictionary->get("S"), numberingStyles.cbegin(), numberingStyles.cend(), NumberingStyle::None);
|
||||||
const QString prefix = loader.readTextString(dictionary->get("P"), QString());
|
const QString prefix = loader.readTextString(dictionary->get("P"), QString());
|
||||||
const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1);
|
const PDFInteger startNumber = loader.readInteger(dictionary->get("St"), 1);
|
||||||
|
@ -95,7 +95,7 @@ public:
|
|||||||
bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; }
|
bool operator<(const PDFPageLabel& other) const { return m_pageIndex < other.m_pageIndex; }
|
||||||
|
|
||||||
/// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10
|
/// Parses page label object from PDF object, according to PDF Reference 1.7, Table 8.10
|
||||||
static PDFPageLabel parse(PDFInteger pageIndex, const PDFDocument* document, const PDFObject& object);
|
static PDFPageLabel parse(PDFInteger pageIndex, const PDFObjectStorage* storage, const PDFObject& object);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
NumberingStyle m_numberingType;
|
NumberingStyle m_numberingType;
|
||||||
@ -380,6 +380,15 @@ public:
|
|||||||
const PDFAction* getDocumentAction(DocumentAction action) const { return m_documentActions.at(action).get(); }
|
const PDFAction* getDocumentAction(DocumentAction action) const { return m_documentActions.at(action).get(); }
|
||||||
const PDFObject& getMetadata() const { return m_metadata; }
|
const PDFObject& getMetadata() const { return m_metadata; }
|
||||||
|
|
||||||
|
/// Is document marked to have structure tree conforming to tagged document convention?
|
||||||
|
bool isLogicalStructureMarked() const { return m_markInfoFlags.testFlag(MarkInfo_Marked); }
|
||||||
|
|
||||||
|
/// Is document marked to have structure tree with user attributes?
|
||||||
|
bool isLogicalStructureUserPropertiesUsed() const { return m_markInfoFlags.testFlag(MarkInfo_UserProperties); }
|
||||||
|
|
||||||
|
/// Is document marked to have structure tree not completely conforming to standard?
|
||||||
|
bool isLogicalStructureSuspects() const { return m_markInfoFlags.testFlag(MarkInfo_Suspects); }
|
||||||
|
|
||||||
/// Returns destination using the key. If destination with the key is not found,
|
/// Returns destination using the key. If destination with the key is not found,
|
||||||
/// then nullptr is returned.
|
/// then nullptr is returned.
|
||||||
/// \param key Destination key
|
/// \param key Destination key
|
||||||
@ -391,6 +400,16 @@ public:
|
|||||||
static PDFCatalog parse(const PDFObject& catalog, const PDFDocument* document);
|
static PDFCatalog parse(const PDFObject& catalog, const PDFDocument* document);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
enum MarkInfoFlag : uint8_t
|
||||||
|
{
|
||||||
|
MarkInfo_None = 0x0000,
|
||||||
|
MarkInfo_Marked = 0x0001, ///< Document conforms to tagged PDF convention
|
||||||
|
MarkInfo_UserProperties = 0x0002, ///< Structure tree contains user properties
|
||||||
|
MarkInfo_Suspects = 0x0004, ///< Suspects
|
||||||
|
};
|
||||||
|
Q_DECLARE_FLAGS(MarkInfoFlags, MarkInfoFlag)
|
||||||
|
|
||||||
QByteArray m_version;
|
QByteArray m_version;
|
||||||
PDFViewerPreferences m_viewerPreferences;
|
PDFViewerPreferences m_viewerPreferences;
|
||||||
std::vector<PDFPage> m_pages;
|
std::vector<PDFPage> m_pages;
|
||||||
@ -407,6 +426,7 @@ private:
|
|||||||
PDFDocumentSecurityStore m_documentSecurityStore;
|
PDFDocumentSecurityStore m_documentSecurityStore;
|
||||||
std::vector<PDFArticleThread> m_threads;
|
std::vector<PDFArticleThread> m_threads;
|
||||||
PDFObject m_metadata;
|
PDFObject m_metadata;
|
||||||
|
MarkInfoFlags m_markInfoFlags = MarkInfo_None;
|
||||||
|
|
||||||
// Maps from Names dictionary
|
// Maps from Names dictionary
|
||||||
std::map<QByteArray, PDFDestination> m_destinations;
|
std::map<QByteArray, PDFDestination> m_destinations;
|
||||||
|
@ -37,12 +37,12 @@ public:
|
|||||||
|
|
||||||
/// Parses the number tree and loads its items into the array. Some errors are ignored,
|
/// Parses the number tree and loads its items into the array. Some errors are ignored,
|
||||||
/// e.g. when kid is null. Type must contain methods to load object array.
|
/// e.g. when kid is null. Type must contain methods to load object array.
|
||||||
static Objects parse(const PDFDocument* document, const PDFObject& root)
|
static Objects parse(const PDFObjectStorage* storage, const PDFObject& root)
|
||||||
{
|
{
|
||||||
Objects result;
|
Objects result;
|
||||||
|
|
||||||
// First, try to load items from the tree into the array
|
// First, try to load items from the tree into the array
|
||||||
parseImpl(result, document, root);
|
parseImpl(result, storage, root);
|
||||||
|
|
||||||
// Array may not be sorted. Sort it using comparison operator for Type.
|
// Array may not be sorted. Sort it using comparison operator for Type.
|
||||||
std::stable_sort(result.begin(), result.end());
|
std::stable_sort(result.begin(), result.end());
|
||||||
@ -51,12 +51,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void parseImpl(Objects& objects, const PDFDocument* document, const PDFObject& root)
|
static void parseImpl(Objects& objects, const PDFObjectStorage* storage, const PDFObject& root)
|
||||||
{
|
{
|
||||||
if (const PDFDictionary* dictionary = document->getDictionaryFromObject(root))
|
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(root))
|
||||||
{
|
{
|
||||||
// First, load the objects into the array
|
// First, load the objects into the array
|
||||||
const PDFObject& numberedItems = document->getObject(dictionary->get("Nums"));
|
const PDFObject& numberedItems = storage->getObject(dictionary->get("Nums"));
|
||||||
if (numberedItems.isArray())
|
if (numberedItems.isArray())
|
||||||
{
|
{
|
||||||
const PDFArray* numberedItemsArray = numberedItems.getArray();
|
const PDFArray* numberedItemsArray = numberedItems.getArray();
|
||||||
@ -67,25 +67,25 @@ private:
|
|||||||
const size_t numberIndex = 2 * i;
|
const size_t numberIndex = 2 * i;
|
||||||
const size_t valueIndex = 2 * i + 1;
|
const size_t valueIndex = 2 * i + 1;
|
||||||
|
|
||||||
const PDFObject& number = document->getObject(numberedItemsArray->getItem(numberIndex));
|
const PDFObject& number = storage->getObject(numberedItemsArray->getItem(numberIndex));
|
||||||
if (!number.isInt())
|
if (!number.isInt())
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
objects.emplace_back(Type::parse(number.getInteger(), document, numberedItemsArray->getItem(valueIndex)));
|
objects.emplace_back(Type::parse(number.getInteger(), storage, numberedItemsArray->getItem(valueIndex)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then, follow the kids
|
// Then, follow the kids
|
||||||
const PDFObject& kids = document->getObject(dictionary->get("Kids"));
|
const PDFObject& kids = storage->getObject(dictionary->get("Kids"));
|
||||||
if (kids.isArray())
|
if (kids.isArray())
|
||||||
{
|
{
|
||||||
const PDFArray* kidsArray = kids.getArray();
|
const PDFArray* kidsArray = kids.getArray();
|
||||||
const size_t count = kidsArray->getCount();
|
const size_t count = kidsArray->getCount();
|
||||||
for (size_t i = 0; i < count; ++i)
|
for (size_t i = 0; i < count; ++i)
|
||||||
{
|
{
|
||||||
parseImpl(objects, document, kidsArray->getItem(i));
|
parseImpl(objects, storage, kidsArray->getItem(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -332,6 +332,12 @@ public:
|
|||||||
/// Optimizes the array for memory consumption
|
/// Optimizes the array for memory consumption
|
||||||
virtual void optimize() override;
|
virtual void optimize() override;
|
||||||
|
|
||||||
|
auto begin() { return m_objects.begin(); }
|
||||||
|
auto end() { return m_objects.end(); }
|
||||||
|
|
||||||
|
auto begin() const { return m_objects.begin(); }
|
||||||
|
auto end() const { return m_objects.end(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<PDFObject> m_objects;
|
std::vector<PDFObject> m_objects;
|
||||||
};
|
};
|
||||||
|
@ -43,6 +43,54 @@ private:
|
|||||||
PDFObjectUtils() = delete;
|
PDFObjectUtils() = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Storage, which can mark objects (for example, when we want to mark already visited objects
|
||||||
|
/// during parsing some complex structure, such as tree)
|
||||||
|
class PDFMarkedObjectsContext
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline explicit PDFMarkedObjectsContext() = default;
|
||||||
|
|
||||||
|
inline bool isMarked(PDFObjectReference reference) const { return m_markedReferences.count(reference); }
|
||||||
|
inline void mark(PDFObjectReference reference) { m_markedReferences.insert(reference); }
|
||||||
|
inline void unmark(PDFObjectReference reference) { m_markedReferences.erase(reference); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::set<PDFObjectReference> m_markedReferences;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Class for marking/unmarking objects functioning as guard. If not already marked, then
|
||||||
|
/// during existence of this guard, object is marked and then it is unmarked. If it is
|
||||||
|
/// already marked, then nothing happens and locked flag is set to false.
|
||||||
|
class PDFMarkedObjectsLock
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit inline PDFMarkedObjectsLock(PDFMarkedObjectsContext* context, PDFObjectReference reference) :
|
||||||
|
m_context(context),
|
||||||
|
m_reference(reference),
|
||||||
|
m_locked(!context->isMarked(reference))
|
||||||
|
{
|
||||||
|
if (m_locked)
|
||||||
|
{
|
||||||
|
context->mark(reference);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ~PDFMarkedObjectsLock()
|
||||||
|
{
|
||||||
|
if (m_locked)
|
||||||
|
{
|
||||||
|
m_context->unmark(m_reference);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit operator bool() const { return m_locked; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
PDFMarkedObjectsContext* m_context;
|
||||||
|
PDFObjectReference m_reference;
|
||||||
|
bool m_locked;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
||||||
#endif // PDFOBJECTUTILS_H
|
#endif // PDFOBJECTUTILS_H
|
||||||
|
@ -16,6 +16,9 @@
|
|||||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#include "pdfstructuretree.h"
|
#include "pdfstructuretree.h"
|
||||||
|
#include "pdfdocument.h"
|
||||||
|
#include "pdfnametreeloader.h"
|
||||||
|
#include "pdfnumbertreeloader.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
@ -160,4 +163,166 @@ PDFStructureTreeAttribute::Owner PDFStructureTreeAttributeDefinition::getOwnerFr
|
|||||||
return PDFStructureTreeAttribute::Owner::User;
|
return PDFStructureTreeAttribute::Owner::User;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFStructureTreeAttribute::PDFStructureTreeAttribute() :
|
||||||
|
m_definition(&s_attributeDefinitions.front()),
|
||||||
|
m_owner(Owner::Invalid),
|
||||||
|
m_revision(0),
|
||||||
|
m_namespace(),
|
||||||
|
m_value()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFStructureTreeAttribute::Attribute PDFStructureTreeAttribute::getType() const
|
||||||
|
{
|
||||||
|
Q_ASSERT(m_definition);
|
||||||
|
return m_definition->type;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PDFStructureTreeAttribute::isInheritable() const
|
||||||
|
{
|
||||||
|
Q_ASSERT(m_definition);
|
||||||
|
return m_definition->inheritable;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString PDFStructureTreeAttribute::getUserPropertyName(const PDFObjectStorage* storage) const
|
||||||
|
{
|
||||||
|
if (const PDFDictionary* value = storage->getDictionaryFromObject(m_value))
|
||||||
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
return loader.readTextStringFromDictionary(value, "N", QString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFObject PDFStructureTreeAttribute::getUserPropertyValue(const PDFObjectStorage* storage) const
|
||||||
|
{
|
||||||
|
if (const PDFDictionary* value = storage->getDictionaryFromObject(m_value))
|
||||||
|
{
|
||||||
|
return value->get("V");
|
||||||
|
}
|
||||||
|
|
||||||
|
return PDFObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString PDFStructureTreeAttribute::getUserPropertyFormattedValue(const PDFObjectStorage* storage) const
|
||||||
|
{
|
||||||
|
if (const PDFDictionary* value = storage->getDictionaryFromObject(m_value))
|
||||||
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
return loader.readTextStringFromDictionary(value, "F", QString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PDFStructureTreeAttribute::getUserPropertyIsHidden(const PDFObjectStorage* storage) const
|
||||||
|
{
|
||||||
|
if (const PDFDictionary* value = storage->getDictionaryFromObject(m_value))
|
||||||
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(storage);
|
||||||
|
return loader.readBooleanFromDictionary(value, "H", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<PDFObjectReference> PDFStructureTree::getParents(PDFInteger id) const
|
||||||
|
{
|
||||||
|
std::vector<PDFObjectReference> result;
|
||||||
|
ParentTreeEntry entry{ id, PDFObjectReference() };
|
||||||
|
|
||||||
|
Q_ASSERT(std::is_sorted(m_parentTreeEntries.cbegin(), m_parentTreeEntries.cend()));
|
||||||
|
auto iterators = std::equal_range(m_parentTreeEntries.cbegin(), m_parentTreeEntries.cend(), entry);
|
||||||
|
result.reserve(std::distance(iterators.first, iterators.second));
|
||||||
|
std::transform(iterators.first, iterators.second, std::back_inserter(result), [](const auto& item) { return item.reference; });
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObject object)
|
||||||
|
{
|
||||||
|
PDFStructureTree tree;
|
||||||
|
|
||||||
|
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||||
|
{
|
||||||
|
PDFMarkedObjectsContext context;
|
||||||
|
PDFObject kids = dictionary->get("K");
|
||||||
|
|
||||||
|
if (kids.isArray())
|
||||||
|
{
|
||||||
|
const PDFArray* kidsArray = kids.getArray();
|
||||||
|
for (const PDFObject& object : *kidsArray)
|
||||||
|
{
|
||||||
|
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, &context);
|
||||||
|
if (item)
|
||||||
|
{
|
||||||
|
tree.m_children.emplace_back(qMove(item));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, &context);
|
||||||
|
if (item)
|
||||||
|
{
|
||||||
|
tree.m_children.emplace_back(qMove(item));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary->hasKey("IDTree"))
|
||||||
|
{
|
||||||
|
tree.m_idTreeMap = PDFNameTreeLoader<PDFObjectReference>::parse(storage, dictionary->get("IDTree"), [](const PDFObjectStorage*, const PDFObject& object) { return object.isReference() ? object.getReference() : PDFObjectReference(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary->hasKey("ParentTree"))
|
||||||
|
{
|
||||||
|
struct ParentTreeParseEntry
|
||||||
|
{
|
||||||
|
PDFInteger id = 0;
|
||||||
|
std::vector<PDFObjectReference> references;
|
||||||
|
|
||||||
|
bool operator<(const ParentTreeParseEntry& other) const
|
||||||
|
{
|
||||||
|
return id < other.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ParentTreeParseEntry parse(PDFInteger id, const PDFObjectStorage*, const PDFObject& object)
|
||||||
|
{
|
||||||
|
if (object.isReference())
|
||||||
|
{
|
||||||
|
return ParentTreeParseEntry{ id, { object.getReference() } };
|
||||||
|
}
|
||||||
|
else if (object.isArray())
|
||||||
|
{
|
||||||
|
std::vector<PDFObjectReference> references;
|
||||||
|
for (const PDFObject& object : *object.getArray())
|
||||||
|
{
|
||||||
|
if (object.isReference())
|
||||||
|
{
|
||||||
|
references.emplace_back(object.getReference());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParentTreeParseEntry{ id, qMove(references) };
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParentTreeParseEntry{ id, { } };
|
||||||
|
}
|
||||||
|
};
|
||||||
|
auto entries = PDFNumberTreeLoader<ParentTreeParseEntry>::parse(storage, dictionary->get("ParentTree"));
|
||||||
|
for (const auto& entry : entries)
|
||||||
|
{
|
||||||
|
for (const PDFObjectReference& reference : entry.references)
|
||||||
|
{
|
||||||
|
tree.m_parentTreeEntries.emplace_back(ParentTreeEntry{entry.id, reference});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::stable_sort(tree.m_parentTreeEntries.begin(), tree.m_parentTreeEntries.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
@ -19,21 +19,29 @@
|
|||||||
#define PDFSTRUCTURETREE_H
|
#define PDFSTRUCTURETREE_H
|
||||||
|
|
||||||
#include "pdfobject.h"
|
#include "pdfobject.h"
|
||||||
|
#include "pdfobjectutils.h"
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class PDFObjectStorage;
|
||||||
struct PDFStructureTreeAttributeDefinition;
|
struct PDFStructureTreeAttributeDefinition;
|
||||||
|
|
||||||
class PDFStructureTreeAttribute
|
class PDFFORQTLIBSHARED_EXPORT PDFStructureTreeAttribute
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
explicit PDFStructureTreeAttribute();
|
||||||
|
|
||||||
enum class Owner
|
enum class Owner
|
||||||
{
|
{
|
||||||
|
Invalid,
|
||||||
|
|
||||||
/// Defined for user owner
|
/// Defined for user owner
|
||||||
User,
|
User,
|
||||||
|
|
||||||
|
/// Defined for NSO (namespace owner)
|
||||||
|
NSO,
|
||||||
|
|
||||||
Layout,
|
Layout,
|
||||||
List,
|
List,
|
||||||
PrintField,
|
PrintField,
|
||||||
@ -118,13 +126,139 @@ public:
|
|||||||
LastAttribute
|
LastAttribute
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Returns attribute type
|
||||||
|
Attribute getType() const;
|
||||||
|
|
||||||
|
/// Returns attribute owner
|
||||||
|
Owner getOwner() const { return m_owner; }
|
||||||
|
|
||||||
|
/// Returns true, if attribute is inheritable
|
||||||
|
bool isInheritable() const;
|
||||||
|
|
||||||
|
/// Returns attribute revision number
|
||||||
|
PDFInteger getRevision() const { return m_revision; }
|
||||||
|
|
||||||
|
/// Returns namespace for this attribute (or empty reference, if it doesn't exists)
|
||||||
|
PDFObjectReference getNamespace() const { return m_namespace; }
|
||||||
|
|
||||||
|
/// Returns attribute value
|
||||||
|
const PDFObject& getValue() const { return m_value; }
|
||||||
|
|
||||||
|
/// Returns default attribute value
|
||||||
|
PDFObject getDefaultValue() const;
|
||||||
|
|
||||||
|
/// Returns user property name. This function should be called only for
|
||||||
|
/// user properties. If error occurs, then empty string is returned.
|
||||||
|
/// \param storage Storage (for resolving of indirect objects)
|
||||||
|
QString getUserPropertyName(const PDFObjectStorage* storage) const;
|
||||||
|
|
||||||
|
/// Returns user property value. This function should be called only for
|
||||||
|
/// user properties. If error occurs, then empty string is returned.
|
||||||
|
/// \param storage Storage (for resolving of indirect objects)
|
||||||
|
PDFObject getUserPropertyValue(const PDFObjectStorage* storage) const;
|
||||||
|
|
||||||
|
/// Returns user property formatted value. This function should be called only for
|
||||||
|
/// user properties. If error occurs, then empty string is returned.
|
||||||
|
/// \param storage Storage (for resolving of indirect objects)
|
||||||
|
QString getUserPropertyFormattedValue(const PDFObjectStorage* storage) const;
|
||||||
|
|
||||||
|
/// Returns true, if user property is hidden. This function should be called only for
|
||||||
|
/// user properties. If error occurs, then empty string is returned.
|
||||||
|
/// \param storage Storage (for resolving of indirect objects)
|
||||||
|
bool getUserPropertyIsHidden(const PDFObjectStorage* storage) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
const PDFStructureTreeAttributeDefinition* m_definition = nullptr;
|
||||||
|
|
||||||
|
/// Attribute owner
|
||||||
|
Owner m_owner = Owner::Invalid;
|
||||||
|
|
||||||
|
/// Revision number
|
||||||
|
PDFInteger m_revision = 0;
|
||||||
|
|
||||||
|
/// Namespace
|
||||||
|
PDFObjectReference m_namespace;
|
||||||
|
|
||||||
|
/// Value of attribute. In case of standard attribute, attribute
|
||||||
|
/// value is directly stored here. In case of user attribute,
|
||||||
|
/// then user attribute dictionary is stored here.
|
||||||
|
PDFObject m_value;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFStructureTree
|
class PDFStructureTree;
|
||||||
|
class PDFStructureItem;
|
||||||
|
|
||||||
|
using PDFStructureItemPointer = QSharedPointer<PDFStructureItem>;
|
||||||
|
|
||||||
|
/// Root class for all structure tree items
|
||||||
|
class PDFStructureItem
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
PDFStructureTree();
|
explicit inline PDFStructureItem(PDFStructureItem* parent, PDFStructureTree* root) :
|
||||||
|
m_parent(parent),
|
||||||
|
m_root(root)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual ~PDFStructureItem() = default;
|
||||||
|
|
||||||
|
virtual PDFStructureTree* asStructureTree() { return nullptr; }
|
||||||
|
virtual const PDFStructureTree* asStructureTree() const { return nullptr; }
|
||||||
|
|
||||||
|
const PDFStructureItem* getParent() const { return m_parent; }
|
||||||
|
const PDFStructureTree* getTree() const { return m_root; }
|
||||||
|
std::size_t getChildCount() const { return m_children.size(); }
|
||||||
|
const PDFStructureItem* getChild(size_t i) const { return m_children.at(i).get(); }
|
||||||
|
|
||||||
|
/// Parses structure tree item from the object. If error occurs,
|
||||||
|
/// null pointer is returned.
|
||||||
|
/// \param storage Storage
|
||||||
|
/// \param object Structure tree item object
|
||||||
|
/// \param context Parsing context
|
||||||
|
static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
PDFStructureItem* m_parent;
|
||||||
|
PDFStructureTree* m_root;
|
||||||
|
std::vector<PDFStructureItemPointer> m_children;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Structure tree, contains structure element hierarchy
|
||||||
|
class PDFStructureTree : public PDFStructureItem
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit inline PDFStructureTree() : PDFStructureItem(nullptr, this) { }
|
||||||
|
|
||||||
|
virtual PDFStructureTree* asStructureTree() override { return this; }
|
||||||
|
virtual const PDFStructureTree* asStructureTree() const override { return this; }
|
||||||
|
|
||||||
|
/// Returns parents from parent tree for given entry. If entry
|
||||||
|
/// is not found, then empty vector is returned.
|
||||||
|
/// \param id Id
|
||||||
|
std::vector<PDFObjectReference> getParents(PDFInteger id) const;
|
||||||
|
|
||||||
|
/// Parses structure tree from the object. If error occurs, empty structure
|
||||||
|
/// tree is returned.
|
||||||
|
/// \param storage Storage
|
||||||
|
/// \param object Structure tree root object
|
||||||
|
static PDFStructureTree parse(const PDFObjectStorage* storage, PDFObject object);
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
struct ParentTreeEntry
|
||||||
|
{
|
||||||
|
PDFInteger id = 0;
|
||||||
|
PDFObjectReference reference;
|
||||||
|
|
||||||
|
bool operator<(const ParentTreeEntry& other) const
|
||||||
|
{
|
||||||
|
return id < other.id;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
using ParentTreeEntries = std::vector<ParentTreeEntry>;
|
||||||
|
|
||||||
|
std::map<QByteArray, PDFObjectReference> m_idTreeMap;
|
||||||
|
ParentTreeEntries m_parentTreeEntries;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
Loading…
x
Reference in New Issue
Block a user