mirror of https://github.com/JakubMelka/PDF4QT.git
Catalog update
This commit is contained in:
parent
5d0b485d4e
commit
e734946ae9
|
@ -207,6 +207,11 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
|
|||
catalogObject.m_markInfoFlags.setFlag(MarkInfo_Suspects, loader.readBooleanFromDictionary(markInfoDictionary, "Suspects", false));
|
||||
}
|
||||
|
||||
catalogObject.m_structureTreeRoot = catalogDictionary->get("StructTreeRoot");
|
||||
catalogObject.m_language = loader.readTextStringFromDictionary(catalogDictionary, "Lang", QString());
|
||||
catalogObject.m_webCaptureInfo = PDFWebCaptureInfo::parse(catalogDictionary->get("SpiderInfo"), &document->getStorage());
|
||||
catalogObject.m_outputIntents = loader.readObjectList<PDFOutputIntent>(catalogDictionary->get("OutputIntents"));
|
||||
|
||||
return catalogObject;
|
||||
}
|
||||
|
||||
|
@ -766,4 +771,57 @@ PDFArticleThread PDFArticleThread::parse(const PDFObjectStorage* storage, const
|
|||
return result;
|
||||
}
|
||||
|
||||
PDFWebCaptureInfo PDFWebCaptureInfo::parse(const PDFObject& object, const PDFObjectStorage* storage)
|
||||
{
|
||||
PDFWebCaptureInfo result;
|
||||
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
PDFDocumentDataLoaderDecorator loader(storage);
|
||||
result.m_version = loader.readNameFromDictionary(dictionary, "V");
|
||||
result.m_commands = loader.readReferenceArrayFromDictionary(dictionary, "C");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFOutputIntent PDFOutputIntent::parse(const PDFObjectStorage* storage, const PDFObject& object)
|
||||
{
|
||||
PDFOutputIntent result;
|
||||
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
PDFDocumentDataLoaderDecorator loader(storage);
|
||||
result.m_subtype = loader.readNameFromDictionary(dictionary, "S");
|
||||
result.m_outputCondition = loader.readTextStringFromDictionary(dictionary, "OutputCondition", QString());
|
||||
result.m_outputConditionIdentifier = loader.readTextStringFromDictionary(dictionary, "OutputConditionIdentifier", QString());
|
||||
result.m_registryName = loader.readTextStringFromDictionary(dictionary, "RegistryName", QString());
|
||||
result.m_info = loader.readTextStringFromDictionary(dictionary, "Info", QString());
|
||||
result.m_destOutputProfile = dictionary->get("DestOutputProfile");
|
||||
result.m_destOutputProfileRef = PDFOutputIntentICCProfileInfo::parse(dictionary->get("DestOutputProfileRef"), storage);
|
||||
result.m_mixingHints = dictionary->get("MixingHints");
|
||||
result.m_spectralData = dictionary->get("SpectralData");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFOutputIntentICCProfileInfo PDFOutputIntentICCProfileInfo::parse(const PDFObject& object, const PDFObjectStorage* storage)
|
||||
{
|
||||
PDFOutputIntentICCProfileInfo result;
|
||||
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
PDFDocumentDataLoaderDecorator loader(storage);
|
||||
result.m_checkSum = loader.readStringFromDictionary(dictionary, "CheckSum");
|
||||
result.m_colorants = loader.readNameArrayFromDictionary(dictionary, "ColorantTable");
|
||||
result.m_iccVersion = loader.readStringFromDictionary(dictionary, "ICCVersion");
|
||||
result.m_signature = loader.readStringFromDictionary(dictionary, "ProfileCS");
|
||||
result.m_profileName = loader.readTextStringFromDictionary(dictionary, "ProfileName", QString());
|
||||
result.m_urls = dictionary->get("URLs");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -322,6 +322,87 @@ private:
|
|||
Extensions m_extensions;
|
||||
};
|
||||
|
||||
/// Web capture info
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFWebCaptureInfo
|
||||
{
|
||||
public:
|
||||
explicit PDFWebCaptureInfo() = default;
|
||||
|
||||
const QByteArray& getVersion() const { return m_version; }
|
||||
const std::vector<PDFObjectReference>& getCommands() const { return m_commands; }
|
||||
|
||||
/// Parses web capture info from catalog dictionary. If object cannot be parsed, or error occurs,
|
||||
/// then empty object is returned, no exception is thrown.
|
||||
/// \param object Spider info dictionary
|
||||
/// \param storage Storage
|
||||
static PDFWebCaptureInfo parse(const PDFObject& object, const PDFObjectStorage* storage);
|
||||
|
||||
private:
|
||||
QByteArray m_version;
|
||||
std::vector<PDFObjectReference> m_commands;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFOutputIntentICCProfileInfo
|
||||
{
|
||||
public:
|
||||
explicit PDFOutputIntentICCProfileInfo() = default;
|
||||
|
||||
const QByteArray& getChecksum() const { return m_checkSum; }
|
||||
const std::vector<QByteArray>& getColorants() const { return m_colorants; }
|
||||
const QByteArray& getIccVersion() const { return m_iccVersion; }
|
||||
const QByteArray& getSignature() const { return m_signature; }
|
||||
const QString& getProfileName() const { return m_profileName; }
|
||||
const PDFObject& getUrls() const { return m_urls; }
|
||||
|
||||
/// Parses icc profile info from object. If object cannot be parsed, or error occurs,
|
||||
/// then empty object is returned, no exception is thrown.
|
||||
/// \param object Output intent dictionary
|
||||
/// \param storage Storage
|
||||
static PDFOutputIntentICCProfileInfo parse(const PDFObject& object, const PDFObjectStorage* storage);
|
||||
|
||||
private:
|
||||
QByteArray m_checkSum;
|
||||
std::vector<QByteArray> m_colorants;
|
||||
QByteArray m_iccVersion;
|
||||
QByteArray m_signature;
|
||||
QString m_profileName;
|
||||
PDFObject m_urls;
|
||||
};
|
||||
|
||||
/// Output intent
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFOutputIntent
|
||||
{
|
||||
public:
|
||||
explicit PDFOutputIntent() = default;
|
||||
|
||||
const QByteArray& getSubtype() const { return m_subtype; }
|
||||
const QString& getOutputCondition() const { return m_outputCondition; }
|
||||
const QString& getOutputConditionIdentifier() const { return m_outputConditionIdentifier; }
|
||||
const QString& getRegistryName() const { return m_registryName; }
|
||||
const QString& getInfo() const { return m_info; }
|
||||
const PDFObject& getOutputProfile() const { return m_destOutputProfile; }
|
||||
const PDFOutputIntentICCProfileInfo& getOutputProfileInfo() const { return m_destOutputProfileRef; }
|
||||
const PDFObject& getMixingHints() const { return m_mixingHints; }
|
||||
const PDFObject& getSpectralData() const { return m_spectralData; }
|
||||
|
||||
/// Parses output intent from object. If object cannot be parsed, or error occurs,
|
||||
/// then empty object is returned, no exception is thrown.
|
||||
/// \param object Output intent dictionary
|
||||
/// \param storage Storage
|
||||
static PDFOutputIntent parse(const PDFObjectStorage* storage, const PDFObject& object);
|
||||
|
||||
private:
|
||||
QByteArray m_subtype;
|
||||
QString m_outputCondition;
|
||||
QString m_outputConditionIdentifier;
|
||||
QString m_registryName;
|
||||
QString m_info;
|
||||
PDFObject m_destOutputProfile;
|
||||
PDFOutputIntentICCProfileInfo m_destOutputProfileRef;
|
||||
PDFObject m_mixingHints;
|
||||
PDFObject m_spectralData;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFCatalog
|
||||
{
|
||||
public:
|
||||
|
@ -379,6 +460,10 @@ public:
|
|||
const std::vector<PDFArticleThread>& getArticleThreads() const { return m_threads; }
|
||||
const PDFAction* getDocumentAction(DocumentAction action) const { return m_documentActions.at(action).get(); }
|
||||
const PDFObject& getMetadata() const { return m_metadata; }
|
||||
const PDFObject& getStructureTreeRoot() const { return m_structureTreeRoot; }
|
||||
const QString& getLanguage() const { return m_language; }
|
||||
const PDFWebCaptureInfo& getWebCaptureInfo() const { return m_webCaptureInfo; }
|
||||
const std::vector<PDFOutputIntent>& getOutputIntents() const { return m_outputIntents; }
|
||||
|
||||
/// Is document marked to have structure tree conforming to tagged document convention?
|
||||
bool isLogicalStructureMarked() const { return m_markInfoFlags.testFlag(MarkInfo_Marked); }
|
||||
|
@ -422,11 +507,15 @@ private:
|
|||
PageMode m_pageMode = PageMode::UseNone;
|
||||
QByteArray m_baseURI;
|
||||
PDFObject m_formObject;
|
||||
PDFObject m_structureTreeRoot;
|
||||
PDFDeveloperExtensions m_extensions;
|
||||
PDFDocumentSecurityStore m_documentSecurityStore;
|
||||
std::vector<PDFArticleThread> m_threads;
|
||||
PDFObject m_metadata;
|
||||
MarkInfoFlags m_markInfoFlags = MarkInfo_None;
|
||||
QString m_language;
|
||||
PDFWebCaptureInfo m_webCaptureInfo;
|
||||
std::vector<PDFOutputIntent> m_outputIntents;
|
||||
|
||||
// Maps from Names dictionary
|
||||
std::map<QByteArray, PDFDestination> m_destinations;
|
||||
|
|
|
@ -45,6 +45,12 @@ struct PDFStructureTreeAttributeDefinition
|
|||
/// \param name Attribute name
|
||||
static const PDFStructureTreeAttributeDefinition* getDefinition(const QByteArray& name);
|
||||
|
||||
/// Returns attribute definition for given attribute type. This function
|
||||
/// always returns valid pointer. For uknown attribute, it returns
|
||||
/// user attribute definition.
|
||||
/// \param name Attribute name
|
||||
static const PDFStructureTreeAttributeDefinition* getDefinition(PDFStructureTreeAttribute::Attribute type);
|
||||
|
||||
/// Returns owner from string. If owner is not valid, then invalid
|
||||
/// owner is returned.
|
||||
/// \param string String
|
||||
|
@ -214,6 +220,20 @@ const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::
|
|||
return &s_attributeDefinitions.front();
|
||||
}
|
||||
|
||||
const PDFStructureTreeAttributeDefinition* PDFStructureTreeAttributeDefinition::getDefinition(PDFStructureTreeAttribute::Attribute type)
|
||||
{
|
||||
for (const PDFStructureTreeAttributeDefinition& definition : s_attributeDefinitions)
|
||||
{
|
||||
if (type == definition.type)
|
||||
{
|
||||
return &definition;
|
||||
}
|
||||
}
|
||||
|
||||
Q_ASSERT(s_attributeDefinitions.front().type == PDFStructureTreeAttribute::Attribute::User);
|
||||
return &s_attributeDefinitions.front();
|
||||
}
|
||||
|
||||
PDFStructureTreeAttribute::Owner PDFStructureTreeAttributeDefinition::getOwnerFromString(const QByteArray& string)
|
||||
{
|
||||
for (const auto& item : s_ownerDefinitions)
|
||||
|
@ -512,7 +532,7 @@ PDFStructureTree PDFStructureTree::parse(const PDFObjectStorage* storage, PDFObj
|
|||
return tree;
|
||||
}
|
||||
|
||||
PDFStructureItemPointer PDFStructureItem::parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context)
|
||||
PDFStructureItemPointer PDFStructureItem::parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context, PDFStructureItem* parent)
|
||||
{
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
|
@ -521,15 +541,15 @@ PDFStructureItemPointer PDFStructureItem::parse(const PDFObjectStorage* storage,
|
|||
|
||||
if (typeName == "MCR")
|
||||
{
|
||||
return PDFStructureMarkedContentReference::parse(storage, object, context);
|
||||
return PDFStructureMarkedContentReference::parseMarkedContentReference(storage, object, context, parent, parent->getTree());
|
||||
}
|
||||
else if (typeName == "OBJR")
|
||||
{
|
||||
return PDFStructureObjectReference::parse(storage, object, context);
|
||||
return PDFStructureObjectReference::parseObjectReference(storage, object, context, parent, parent->getTree());
|
||||
}
|
||||
else
|
||||
{
|
||||
return PDFStructureElement::parse(storage, object, context);
|
||||
return PDFStructureElement::parseElement(storage, object, context, parent, parent->getTree());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -557,7 +577,7 @@ void PDFStructureItem::parseKids(const PDFObjectStorage* storage, PDFStructureIt
|
|||
const PDFArray* kidsArray = kids.getArray();
|
||||
for (const PDFObject& object : *kidsArray)
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, context);
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, object, context, parentItem);
|
||||
if (item)
|
||||
{
|
||||
parentItem->m_children.emplace_back(qMove(item));
|
||||
|
@ -566,7 +586,7 @@ void PDFStructureItem::parseKids(const PDFObjectStorage* storage, PDFStructureIt
|
|||
}
|
||||
else if (!kids.isNull())
|
||||
{
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, kids, context);
|
||||
PDFStructureItemPointer item = PDFStructureItem::parse(storage, kids, context, parentItem);
|
||||
if (item)
|
||||
{
|
||||
parentItem->m_children.emplace_back(qMove(item));
|
||||
|
@ -595,6 +615,56 @@ PDFStructureTreeNamespace PDFStructureTreeNamespace::parse(const PDFObjectStorag
|
|||
return result;
|
||||
}
|
||||
|
||||
const PDFStructureTreeAttribute* PDFStructureElement::findAttribute(Attribute attribute,
|
||||
AttributeOwner owner,
|
||||
RevisionPolicy policy) const
|
||||
{
|
||||
const PDFStructureTreeAttributeDefinition* definition = PDFStructureTreeAttributeDefinition::getDefinition(attribute);
|
||||
|
||||
if (const PDFStructureTreeAttribute* result = findAttributeImpl(attribute, owner, policy, definition))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (owner != AttributeOwner::Invalid)
|
||||
{
|
||||
return findAttributeImpl(attribute, AttributeOwner::Invalid, policy, definition);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const PDFStructureTreeAttribute* PDFStructureElement::findAttributeImpl(Attribute attribute,
|
||||
AttributeOwner owner,
|
||||
RevisionPolicy policy,
|
||||
const PDFStructureTreeAttributeDefinition* definition) const
|
||||
{
|
||||
// We do not search for user properties
|
||||
if (attribute == Attribute::User)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Try to search for attribute in attribute list
|
||||
for (const PDFStructureTreeAttribute& attributeObject : m_attributes)
|
||||
{
|
||||
if ((attributeObject.getType() == attribute) &&
|
||||
(attributeObject.getOwner() == owner || owner == AttributeOwner::Invalid) &&
|
||||
(attributeObject.getRevision() == m_revision || policy == RevisionPolicy::Ignore))
|
||||
{
|
||||
return &attributeObject;
|
||||
}
|
||||
}
|
||||
|
||||
// Check, if attribute is inheritable and then search for it in parent
|
||||
if (definition->inheritable && m_parent && m_parent->asStructureElement())
|
||||
{
|
||||
return m_parent->asStructureElement()->findAttributeImpl(attribute, owner, policy, definition);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
PDFStructureItemPointer PDFStructureElement::parseElement(const PDFObjectStorage* storage,
|
||||
PDFObject object,
|
||||
PDFMarkedObjectsContext* context,
|
||||
|
|
|
@ -272,7 +272,9 @@ public:
|
|||
virtual const PDFStructureObjectReference* asStructureObjectReference() const { return nullptr; }
|
||||
|
||||
const PDFStructureItem* getParent() const { return m_parent; }
|
||||
PDFStructureItem* getParent() { return m_parent; }
|
||||
const PDFStructureTree* getTree() const { return m_root; }
|
||||
PDFStructureTree* getTree() { return m_root; }
|
||||
PDFObjectReference getSelfReference() const { return m_selfReference; }
|
||||
std::size_t getChildCount() const { return m_children.size(); }
|
||||
const PDFStructureItem* getChild(size_t i) const { return m_children.at(i).get(); }
|
||||
|
@ -282,7 +284,8 @@ public:
|
|||
/// \param storage Storage
|
||||
/// \param object Structure tree item object
|
||||
/// \param context Parsing context
|
||||
static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context);
|
||||
/// \param parent Parent item
|
||||
static PDFStructureItemPointer parse(const PDFObjectStorage* storage, PDFObject object, PDFMarkedObjectsContext* context, PDFStructureItem* parent);
|
||||
|
||||
/// Get structure tree type from name
|
||||
/// \param name Name
|
||||
|
@ -427,6 +430,23 @@ public:
|
|||
const PDFObjectReference& getNamespace() const { return m_namespace; }
|
||||
const QByteArray& getPhoneticAlphabet() const { return m_phoneticAlphabet; }
|
||||
|
||||
enum class RevisionPolicy
|
||||
{
|
||||
Ignore,
|
||||
Match
|
||||
};
|
||||
|
||||
using Attribute = PDFStructureTreeAttribute::Attribute;
|
||||
using AttributeOwner = PDFStructureTreeAttribute::Owner;
|
||||
|
||||
/// Finds attribute matching given owner and revision policy. If attribute with given
|
||||
/// owner is not found, then any matching attribute is returned. If none is found,
|
||||
/// then nullptr is returned.
|
||||
/// \param attribute Attribute
|
||||
/// \param owner Owner
|
||||
/// \param policy Revision number policy
|
||||
const PDFStructureTreeAttribute* findAttribute(Attribute attribute, AttributeOwner owner, RevisionPolicy policy) const;
|
||||
|
||||
/// Parses structure element from the object. If error occurs, nullptr is returned.
|
||||
/// \param storage Storage
|
||||
/// \param object Structure element object
|
||||
|
@ -440,6 +460,18 @@ public:
|
|||
PDFStructureTree* root);
|
||||
|
||||
private:
|
||||
/// Finds attribute matching given owner and revision policy. If attribute with given
|
||||
/// owner is not found, then any matching attribute is returned. If none is found,
|
||||
/// then nullptr is returned.
|
||||
/// \param attribute Attribute
|
||||
/// \param owner Owner
|
||||
/// \param policy Revision number policy
|
||||
/// \param definition Definition
|
||||
const PDFStructureTreeAttribute* findAttributeImpl(Attribute attribute,
|
||||
AttributeOwner owner,
|
||||
RevisionPolicy policy,
|
||||
const PDFStructureTreeAttributeDefinition* definition) const;
|
||||
|
||||
QByteArray m_typeName;
|
||||
Type m_standardType;
|
||||
QByteArray m_id;
|
||||
|
|
Loading…
Reference in New Issue