Structure tree parsing finalization

This commit is contained in:
Jakub Melka 2020-07-24 19:47:21 +02:00
parent 707f68fa19
commit 5d0b485d4e
2 changed files with 166 additions and 0 deletions

View File

@ -512,6 +512,30 @@ PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObj
return tree;
}
PDFStructureItemPointer PDFStructureItem::parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context)
{
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
{
PDFDocumentDataLoaderDecorator loader(storage);
QByteArray typeName = loader.readNameFromDictionary(dictionary, "Type");
if (typeName == "MCR")
{
return PDFStructureMarkedContentReference::parse(storage, object, context);
}
else if (typeName == "OBJR")
{
return PDFStructureObjectReference::parse(storage, object, context);
}
else
{
return PDFStructureElement::parse(storage, object, context);
}
}
return nullptr;
}
PDFStructureItem::Type PDFStructureItem::getTypeFromName(const QByteArray& name)
{
for (const auto& item : s_structureTreeItemTypes)
@ -655,4 +679,68 @@ PDFStructureItemPointer PDFStructureElement::parseElement(const PDFObjectStorage
return pointer;
}
PDFStructureItemPointer PDFStructureMarkedContentReference::parseMarkedContentReference(const PDFObjectStorage* storage,
PDFObject object,
PDFMarkedObjectsContext* context,
PDFStructureItem* parent,
PDFStructureTree* root)
{
PDFStructureItemPointer pointer;
Q_ASSERT(root);
if (auto lock = PDFMarkedObjectsLock(context, object))
{
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
{
PDFStructureMarkedContentReference* item = new PDFStructureMarkedContentReference(parent, root);
pointer.reset(item);
if (object.isReference())
{
item->m_selfReference = object.getReference();
}
PDFDocumentDataLoaderDecorator loader(storage);
item->m_pageReference = loader.readReferenceFromDictionary(dictionary, "Pg");
item->m_contentStreamReference = loader.readReferenceFromDictionary(dictionary, "Stm");
item->m_contentStreamOwnerReference = loader.readReferenceFromDictionary(dictionary, "StmOwn");
item->m_markedContentIdentifier = loader.readIntegerFromDictionary(dictionary, "MCID", 0);
}
}
return pointer;
}
PDFStructureItemPointer PDFStructureObjectReference::parseObjectReference(const PDFObjectStorage* storage,
PDFObject object,
PDFMarkedObjectsContext* context,
PDFStructureItem* parent,
PDFStructureTree* root)
{
PDFStructureItemPointer pointer;
Q_ASSERT(root);
if (auto lock = PDFMarkedObjectsLock(context, object))
{
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
{
PDFStructureObjectReference* item = new PDFStructureObjectReference(parent, root);
pointer.reset(item);
if (object.isReference())
{
item->m_selfReference = object.getReference();
}
PDFDocumentDataLoaderDecorator loader(storage);
item->m_pageReference = loader.readReferenceFromDictionary(dictionary, "Pg");
item->m_objectReference = loader.readReferenceFromDictionary(dictionary, "Obj");
}
}
return pointer;
}
} // namespace pdf

View File

@ -212,6 +212,8 @@ private:
class PDFStructureTree;
class PDFStructureItem;
class PDFStructureElement;
class PDFStructureObjectReference;
class PDFStructureMarkedContentReference;
using PDFStructureItemPointer = QSharedPointer<PDFStructureItem>;
@ -263,6 +265,12 @@ public:
virtual PDFStructureElement* asStructureElement() { return nullptr; }
virtual const PDFStructureElement* asStructureElement() const { return nullptr; }
virtual PDFStructureMarkedContentReference* asStructureMarkedContentReference() { return nullptr; }
virtual const PDFStructureMarkedContentReference* asStructureMarkedContentReference() const { return nullptr; }
virtual PDFStructureObjectReference* asStructureObjectReference() { return nullptr; }
virtual const PDFStructureObjectReference* asStructureObjectReference() const { return nullptr; }
const PDFStructureItem* getParent() const { return m_parent; }
const PDFStructureTree* getTree() const { return m_root; }
PDFObjectReference getSelfReference() const { return m_selfReference; }
@ -445,6 +453,76 @@ private:
QByteArray m_phoneticAlphabet;
};
/// Structure marked content reference
class PDFStructureMarkedContentReference : public PDFStructureItem
{
public:
explicit inline PDFStructureMarkedContentReference(PDFStructureItem* parent, PDFStructureTree* root) :
PDFStructureItem(parent, root)
{
}
virtual PDFStructureMarkedContentReference* asStructureMarkedContentReference() override { return this; }
virtual const PDFStructureMarkedContentReference* asStructureMarkedContentReference() const override { return this; }
const PDFObjectReference& getPageReference() const { return m_pageReference; }
const PDFObjectReference& getContentStreamReference() const { return m_contentStreamReference; }
const PDFObjectReference& getContentStreamOwnerReference() const { return m_contentStreamOwnerReference; }
PDFInteger getMarkedContentIdentifier() const { return m_markedContentIdentifier; }
/// Parses structure marked content reference from the object. If error occurs, nullptr is returned.
/// \param storage Storage
/// \param object Structure marked content reference
/// \param context Visited items context
/// \param parent Parent structure tree item
/// \param root Structure tree root
static PDFStructureItemPointer parseMarkedContentReference(const PDFObjectStorage* storage,
PDFObject object,
PDFMarkedObjectsContext* context,
PDFStructureItem* parent,
PDFStructureTree* root);
private:
PDFObjectReference m_pageReference;
PDFObjectReference m_contentStreamReference;
PDFObjectReference m_contentStreamOwnerReference;
PDFInteger m_markedContentIdentifier = 0;
};
/// Structure object reference
class PDFStructureObjectReference : public PDFStructureItem
{
public:
explicit inline PDFStructureObjectReference(PDFStructureItem* parent, PDFStructureTree* root) :
PDFStructureItem(parent, root)
{
}
virtual PDFStructureObjectReference* asStructureObjectReference() override { return this; }
virtual const PDFStructureObjectReference* asStructureObjectReference() const override { return this; }
const PDFObjectReference& getPageReference() const { return m_pageReference; }
const PDFObjectReference& getObjectReference() const { return m_objectReference; }
/// Parses structure object reference from the object. If error occurs, nullptr is returned.
/// \param storage Storage
/// \param object Structure marked content reference
/// \param context Visited items context
/// \param parent Parent structure tree item
/// \param root Structure tree root
static PDFStructureItemPointer parseObjectReference(const PDFObjectStorage* storage,
PDFObject object,
PDFMarkedObjectsContext* context,
PDFStructureItem* parent,
PDFStructureTree* root);
private:
PDFObjectReference m_pageReference;
PDFObjectReference m_objectReference;
};
} // namespace pdf
#endif // PDFSTRUCTURETREE_H