mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Parsing structure element
This commit is contained in:
@ -67,17 +67,23 @@ public:
|
||||
explicit inline PDFMarkedObjectsLock(PDFMarkedObjectsContext* context, PDFObjectReference reference) :
|
||||
m_context(context),
|
||||
m_reference(reference),
|
||||
m_locked(!context->isMarked(reference))
|
||||
m_locked(!reference.isValid() || !context->isMarked(reference))
|
||||
{
|
||||
if (m_locked)
|
||||
if (m_locked && reference.isValid())
|
||||
{
|
||||
context->mark(reference);
|
||||
}
|
||||
}
|
||||
|
||||
explicit inline PDFMarkedObjectsLock(PDFMarkedObjectsContext* context, const PDFObject& object) :
|
||||
PDFMarkedObjectsLock(context, object.isReference() ? object.getReference() : PDFObjectReference())
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
inline ~PDFMarkedObjectsLock()
|
||||
{
|
||||
if (m_locked)
|
||||
if (m_locked && m_reference.isValid())
|
||||
{
|
||||
m_context->unmark(m_reference);
|
||||
}
|
||||
|
@ -420,27 +420,7 @@ PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObj
|
||||
PDFDocumentDataLoaderDecorator loader(storage);
|
||||
|
||||
PDFMarkedObjectsContext context;
|
||||
PDFObject kids = dictionary->get("K");
|
||||
if (kids.isArray())
|
||||
{
|
||||
const PDFArray* kidsArray = kids.getArray();
|
||||
for (const PDFObject& object : *kidsArray)
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, &context);
|
||||
if (item)
|
||||
{
|
||||
tree.m_children.emplace_back(qMove(item));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, &context);
|
||||
if (item)
|
||||
{
|
||||
tree.m_children.emplace_back(qMove(item));
|
||||
}
|
||||
}
|
||||
parseKids(storage, &tree, dictionary, &context);
|
||||
|
||||
if (dictionary->hasKey("IDTree"))
|
||||
{
|
||||
@ -545,6 +525,31 @@ PDFStructureItem::Type PDFStructureItem::getTypeFromName(const QByteArray& name)
|
||||
return Invalid;
|
||||
}
|
||||
|
||||
void PDFStructureItem::parseKids(const PDFObjectStorage* storage, PDFStructureItem* parentItem, const PDFDictionary* dictionary, PDFMarkedObjectsContext* context)
|
||||
{
|
||||
PDFObject kids = dictionary->get("K");
|
||||
if (kids.isArray())
|
||||
{
|
||||
const PDFArray* kidsArray = kids.getArray();
|
||||
for (const PDFObject& object : *kidsArray)
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, context);
|
||||
if (item)
|
||||
{
|
||||
parentItem->m_children.emplace_back(qMove(item));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!kids.isNull())
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, kids, context);
|
||||
if (item)
|
||||
{
|
||||
parentItem->m_children.emplace_back(qMove(item));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PDFStructureTreeNamespace PDFStructureTreeNamespace::parse(const PDFObjectStorage* storage, PDFObject object)
|
||||
{
|
||||
PDFStructureTreeNamespace result;
|
||||
@ -566,4 +571,88 @@ PDFStructureTreeNamespace PDFStructureTreeNamespace::parse(const PDFObjectStorag
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFStructureItemPointer PDFStructureElement::parseElement(const PDFObjectStorage* storage,
|
||||
PDFObject object,
|
||||
PDFMarkedObjectsContext* context,
|
||||
PDFStructureItem* parent,
|
||||
PDFStructureTree* root)
|
||||
{
|
||||
PDFStructureItemPointer pointer;
|
||||
|
||||
Q_ASSERT(root);
|
||||
|
||||
if (auto lock = PDFMarkedObjectsLock(context, object))
|
||||
{
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
PDFStructureElement* item = new PDFStructureElement(parent, root);
|
||||
pointer.reset(item);
|
||||
|
||||
if (object.isReference())
|
||||
{
|
||||
item->m_selfReference = object.getReference();
|
||||
}
|
||||
|
||||
PDFDocumentDataLoaderDecorator loader(storage);
|
||||
item->m_typeName = loader.readNameFromDictionary(dictionary, "S");
|
||||
item->m_standardType = root->getTypeFromRole(item->m_typeName);
|
||||
item->m_id = loader.readStringFromDictionary(dictionary, "ID");
|
||||
item->m_references = loader.readReferenceArrayFromDictionary(dictionary, "Ref");
|
||||
item->m_pageReference = loader.readReferenceFromDictionary(dictionary, "Pg");
|
||||
|
||||
std::vector<PDFStructureTreeAttribute> attributes;
|
||||
PDFObject classObject = storage->getObject(dictionary->get("C"));
|
||||
if (classObject.isName())
|
||||
{
|
||||
QByteArray name = classObject.getString();
|
||||
const std::vector<PDFStructureTreeAttribute>& classAttributes = root->getClassAttributes(name);
|
||||
attributes.insert(attributes.end(), classAttributes.begin(), classAttributes.end());
|
||||
}
|
||||
else if (classObject.isArray())
|
||||
{
|
||||
size_t startIndex = attributes.size();
|
||||
|
||||
for (PDFObject itemObject : *classObject.getArray())
|
||||
{
|
||||
itemObject = storage->getObject(itemObject);
|
||||
if (itemObject.isInt())
|
||||
{
|
||||
// It is revision number
|
||||
const PDFInteger revision = itemObject.getInteger();
|
||||
for (; startIndex < attributes.size(); ++startIndex)
|
||||
{
|
||||
attributes[startIndex].setRevision(revision);
|
||||
}
|
||||
}
|
||||
else if (itemObject.isName())
|
||||
{
|
||||
// It is class name
|
||||
QByteArray name = itemObject.getString();
|
||||
const std::vector<PDFStructureTreeAttribute>& classAttributes = root->getClassAttributes(name);
|
||||
attributes.insert(attributes.end(), classAttributes.begin(), classAttributes.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
PDFStructureTreeAttribute::parseAttributes(storage, dictionary->get("A"), attributes);
|
||||
std::reverse(attributes.begin(), attributes.end());
|
||||
item->m_attributes = qMove(attributes);
|
||||
item->m_revision = loader.readIntegerFromDictionary(dictionary, "R", 0);
|
||||
item->m_texts[Title] = loader.readTextStringFromDictionary(dictionary, "T", QString());
|
||||
item->m_texts[Language] = loader.readTextStringFromDictionary(dictionary, "Lang", QString());
|
||||
item->m_texts[AlternativeDescription] = loader.readTextStringFromDictionary(dictionary, "Alt", QString());
|
||||
item->m_texts[ExpandedForm] = loader.readTextStringFromDictionary(dictionary, "E", QString());
|
||||
item->m_texts[ActualText] = loader.readTextStringFromDictionary(dictionary, "ActualText", QString());
|
||||
item->m_texts[Phoneme] = loader.readTextStringFromDictionary(dictionary, "Phoneme", QString());
|
||||
|
||||
item->m_associatedFiles = loader.readObjectList<PDFFileSpecification>(dictionary->get("AF"));
|
||||
item->m_namespace = loader.readReferenceFromDictionary(dictionary, "NS");
|
||||
item->m_phoneticAlphabet = loader.readNameFromDictionary(dictionary, "PhoneticAlphabet");
|
||||
|
||||
parseKids(storage, item, dictionary, context);
|
||||
}
|
||||
}
|
||||
|
||||
return pointer;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -211,6 +211,7 @@ private:
|
||||
|
||||
class PDFStructureTree;
|
||||
class PDFStructureItem;
|
||||
class PDFStructureElement;
|
||||
|
||||
using PDFStructureItemPointer = QSharedPointer<PDFStructureItem>;
|
||||
|
||||
@ -259,8 +260,12 @@ public:
|
||||
virtual PDFStructureTree* asStructureTree() { return nullptr; }
|
||||
virtual const PDFStructureTree* asStructureTree() const { return nullptr; }
|
||||
|
||||
virtual PDFStructureElement* asStructureElement() { return nullptr; }
|
||||
virtual const PDFStructureElement* asStructureElement() const { return nullptr; }
|
||||
|
||||
const PDFStructureItem* getParent() const { return m_parent; }
|
||||
const PDFStructureTree* getTree() const { return m_root; }
|
||||
PDFObjectReference getSelfReference() const { return m_selfReference; }
|
||||
std::size_t getChildCount() const { return m_children.size(); }
|
||||
const PDFStructureItem* getChild(size_t i) const { return m_children.at(i).get(); }
|
||||
|
||||
@ -276,8 +281,20 @@ public:
|
||||
static Type getTypeFromName(const QByteArray& name);
|
||||
|
||||
protected:
|
||||
/// Parses kids of the item. Invalid items aren't added
|
||||
/// to the kid list.
|
||||
/// \param storage Storage
|
||||
/// \param parentItem Parent item, where children are inserted
|
||||
/// \param dictionary Dictionary
|
||||
/// \param context Context
|
||||
static void parseKids(const PDFObjectStorage* storage,
|
||||
PDFStructureItem* parentItem,
|
||||
const PDFDictionary* dictionary,
|
||||
PDFMarkedObjectsContext* context);
|
||||
|
||||
PDFStructureItem* m_parent;
|
||||
PDFStructureTree* m_root;
|
||||
PDFObjectReference m_selfReference;
|
||||
std::vector<PDFStructureItemPointer> m_children;
|
||||
};
|
||||
|
||||
@ -366,6 +383,68 @@ private:
|
||||
std::vector<PDFFileSpecification> m_associatedFiles;
|
||||
};
|
||||
|
||||
/// Structure element
|
||||
class PDFStructureElement : public PDFStructureItem
|
||||
{
|
||||
public:
|
||||
explicit inline PDFStructureElement(PDFStructureItem* parent, PDFStructureTree* root) :
|
||||
PDFStructureItem(parent, root)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
enum StringValue
|
||||
{
|
||||
Title,
|
||||
Language,
|
||||
AlternativeDescription,
|
||||
ExpandedForm,
|
||||
ActualText,
|
||||
Phoneme,
|
||||
LastStringValue
|
||||
};
|
||||
|
||||
virtual PDFStructureElement* asStructureElement() override { return this; }
|
||||
virtual const PDFStructureElement* asStructureElement() const override { return this; }
|
||||
|
||||
const QByteArray& getTypeName() const { return m_typeName; }
|
||||
Type getStandardType() const { return m_standardType; }
|
||||
const QByteArray& getId() const { return m_id; }
|
||||
const std::vector<PDFObjectReference>& getReferences() const { return m_references; }
|
||||
const PDFObjectReference& getPageReference() const { return m_pageReference; }
|
||||
const std::vector<PDFStructureTreeAttribute>& getAttributes() const { return m_attributes; }
|
||||
PDFInteger getRevision() const { return m_revision; }
|
||||
const QString& getText(StringValue stringValue) const { return m_texts.at(stringValue); }
|
||||
const std::vector<PDFFileSpecification>& getAssociatedFiles() const { return m_associatedFiles; }
|
||||
const PDFObjectReference& getNamespace() const { return m_namespace; }
|
||||
const QByteArray& getPhoneticAlphabet() const { return m_phoneticAlphabet; }
|
||||
|
||||
/// Parses structure element from the object. If error occurs, nullptr is returned.
|
||||
/// \param storage Storage
|
||||
/// \param object Structure element object
|
||||
/// \param context Visited elements context
|
||||
/// \param parent Parent structure tree item
|
||||
/// \param root Structure tree root
|
||||
static PDFStructureItemPointer parseElement(const PDFObjectStorage* storage,
|
||||
PDFObject object,
|
||||
PDFMarkedObjectsContext* context,
|
||||
PDFStructureItem* parent,
|
||||
PDFStructureTree* root);
|
||||
|
||||
private:
|
||||
QByteArray m_typeName;
|
||||
Type m_standardType;
|
||||
QByteArray m_id;
|
||||
std::vector<PDFObjectReference> m_references;
|
||||
PDFObjectReference m_pageReference;
|
||||
std::vector<PDFStructureTreeAttribute> m_attributes;
|
||||
PDFInteger m_revision = 0;
|
||||
std::array<QString, LastStringValue> m_texts;
|
||||
std::vector<PDFFileSpecification> m_associatedFiles;
|
||||
PDFObjectReference m_namespace;
|
||||
QByteArray m_phoneticAlphabet;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFSTRUCTURETREE_H
|
||||
|
Reference in New Issue
Block a user