Object classifier

This commit is contained in:
Jakub Melka
2021-06-11 19:01:18 +02:00
parent 2745c7828c
commit eb5f904842
9 changed files with 428 additions and 40 deletions

View File

@ -20,23 +20,30 @@
#include "pdfobject.h"
#include <QtCore>
#include <set>
#include <vector>
namespace pdf
{
class PDFObjectStorage;
class PDFDocument;
/// Utilities for manipulation with objects
class PDFObjectUtils
{
public:
/// Returns list of references referenced by \p objects. So, all references, which are present
/// Returns a list of references referenced by \p objects. So, all references, which are present
/// in objects, appear in the result set, including objects, which are referenced by referenced
/// objects (so, transitive closure above reference graph is returned).
/// \param objects Objects
/// \param storage Storage
static std::set<PDFObjectReference> getReferences(const std::vector<PDFObject>& objects, const PDFObjectStorage& storage);
/// Returns a list of references directly referenced from object. References itself are not followed.
static std::set<PDFObjectReference> getDirectReferences(const PDFObject& object);
static PDFObject replaceReferences(const PDFObject& object, const std::map<PDFObjectReference, PDFObjectReference>& referenceMapping);
private:
@ -97,6 +104,67 @@ private:
bool m_locked;
};
/// Classifies objects according to their type. Some heuristic is used
/// when object type is missing or document is not well-formed.
class Pdf4QtLIBSHARED_EXPORT PDFObjectClassifier
{
public:
inline PDFObjectClassifier() = default;
/// Performs object classification on a document. Old classification
/// is being cleared.
/// \param document Document
void classify(const PDFDocument* document);
enum Type : uint32_t
{
None = 0x00000000,
Page = 0x00000001,
ContentStream = 0x00000002,
GraphicState = 0x00000004,
ColorSpace = 0x00000008,
Pattern = 0x00000010,
Shading = 0x00000020,
Image = 0x00000040,
Form = 0x00000080,
Font = 0x00000100,
Action = 0x00000200,
Annotation = 0x00000400
};
Q_DECLARE_FLAGS(Types, Type)
/// Returns true, if object with given reference exists
/// and was classified.
/// \param reference Reference
bool hasObject(PDFObjectReference reference) const;
/// Returns true, if any object with given type is present in a document
/// \param type Object type
bool hasType(Type type) const { return m_allTypesUsed.testFlag(type); }
/// Returns a list of objects with a given type
/// \param type Type
std::vector<PDFObjectReference> getObjectsByType(Type type) const;
private:
struct Classification
{
PDFObjectReference reference;
Types types = None;
};
/// Marks object with a given type
void mark(PDFObjectReference reference, Type type);
/// Marks objects in dictionary with a given type
void markDictionary(const PDFDocument* document, PDFObject object, Type type);
std::vector<Classification> m_classification;
Types m_allTypesUsed;
};
} // namespace pdf
#endif // PDFOBJECTUTILS_H