mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Tool for gathering info about document
This commit is contained in:
@@ -54,6 +54,7 @@ SOURCES += \
|
||||
sources/pdffile.cpp \
|
||||
sources/pdfform.cpp \
|
||||
sources/pdfitemmodels.cpp \
|
||||
sources/pdfjavascriptscanner.cpp \
|
||||
sources/pdfjbig2decoder.cpp \
|
||||
sources/pdfmultimedia.cpp \
|
||||
sources/pdfobject.cpp \
|
||||
@@ -108,6 +109,7 @@ HEADERS += \
|
||||
sources/pdffile.h \
|
||||
sources/pdfform.h \
|
||||
sources/pdfitemmodels.h \
|
||||
sources/pdfjavascriptscanner.h \
|
||||
sources/pdfjbig2decoder.h \
|
||||
sources/pdfmeshqualitysettings.h \
|
||||
sources/pdfmultimedia.h \
|
||||
|
@@ -487,7 +487,7 @@ public:
|
||||
const PDFRendition* getRendition() const { return m_rendition.has_value() ? &m_rendition.value() : nullptr; }
|
||||
PDFObjectReference getAnnotation() const { return m_annotation; }
|
||||
Operation getOperation() const { return m_operation; }
|
||||
const QString& getJavascript() const { return m_javascript; }
|
||||
const QString& getJavaScript() const { return m_javascript; }
|
||||
|
||||
private:
|
||||
std::optional<PDFRendition> m_rendition;
|
||||
|
@@ -1026,6 +1026,10 @@ PDFAnnotationAdditionalActions PDFAnnotationAdditionalActions::parse(const PDFOb
|
||||
result.m_actions[PageClosed] = PDFAction::parse(storage, dictionary->get("PC"));
|
||||
result.m_actions[PageShow] = PDFAction::parse(storage, dictionary->get("PV"));
|
||||
result.m_actions[PageHide] = PDFAction::parse(storage, dictionary->get("PI"));
|
||||
result.m_actions[FormFieldModified] = PDFAction::parse(storage, dictionary->get("K"));
|
||||
result.m_actions[FormFieldFormatted] = PDFAction::parse(storage, dictionary->get("F"));
|
||||
result.m_actions[FormFieldValidated] = PDFAction::parse(storage, dictionary->get("V"));
|
||||
result.m_actions[FormFieldCalculated] = PDFAction::parse(storage, dictionary->get("C"));
|
||||
}
|
||||
|
||||
result.m_actions[Default] = PDFAction::parse(storage, defaultAction);
|
||||
|
@@ -394,6 +394,10 @@ public:
|
||||
PageClosed,
|
||||
PageShow,
|
||||
PageHide,
|
||||
FormFieldModified,
|
||||
FormFieldFormatted,
|
||||
FormFieldValidated,
|
||||
FormFieldCalculated,
|
||||
Default,
|
||||
End
|
||||
};
|
||||
@@ -405,6 +409,9 @@ public:
|
||||
/// \param action Action type
|
||||
const PDFAction* getAction(Action action) const { return m_actions.at(action).get(); }
|
||||
|
||||
/// Returns array with all actions
|
||||
const std::array<PDFActionPtr, End>& getActions() const { return m_actions; }
|
||||
|
||||
/// Parses annotation additional actions from the object. If object is invalid, then
|
||||
/// empty additional actions is constructed.
|
||||
/// \param storage Object storage
|
||||
|
@@ -1120,4 +1120,17 @@ PDFDocumentRequirements::RequirementEntry PDFDocumentRequirements::RequirementEn
|
||||
return entry;
|
||||
}
|
||||
|
||||
PDFPageAdditionalActions PDFPageAdditionalActions::parse(const PDFObjectStorage* storage, PDFObject object)
|
||||
{
|
||||
PDFPageAdditionalActions result;
|
||||
|
||||
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
|
||||
{
|
||||
result.m_actions[Open] = PDFAction::parse(storage, dictionary->get("O"));
|
||||
result.m_actions[Close] = PDFAction::parse(storage, dictionary->get("C"));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@@ -526,6 +526,38 @@ private:
|
||||
std::vector<RequirementEntry> m_requirements;
|
||||
};
|
||||
|
||||
/// Storage for page additional actions
|
||||
class PDFPageAdditionalActions
|
||||
{
|
||||
public:
|
||||
|
||||
enum Action
|
||||
{
|
||||
Open,
|
||||
Close,
|
||||
End
|
||||
};
|
||||
|
||||
inline explicit PDFPageAdditionalActions() = default;
|
||||
|
||||
/// Returns action for given type. If action is invalid,
|
||||
/// or not present, nullptr is returned.
|
||||
/// \param action Action type
|
||||
const PDFAction* getAction(Action action) const { return m_actions.at(action).get(); }
|
||||
|
||||
/// Returns array with all actions
|
||||
const std::array<PDFActionPtr, End>& getActions() const { return m_actions; }
|
||||
|
||||
/// Parses page additional actions from the object. If object is invalid, then
|
||||
/// empty additional actions is constructed.
|
||||
/// \param storage Object storage
|
||||
/// \param object Additional actions object
|
||||
static PDFPageAdditionalActions parse(const PDFObjectStorage* storage, PDFObject object);
|
||||
|
||||
private:
|
||||
std::array<PDFActionPtr, End> m_actions;
|
||||
};
|
||||
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFCatalog
|
||||
{
|
||||
public:
|
||||
@@ -582,6 +614,7 @@ public:
|
||||
const PDFDocumentSecurityStore& getDocumentSecurityStore() const { return m_documentSecurityStore; }
|
||||
const std::vector<PDFArticleThread>& getArticleThreads() const { return m_threads; }
|
||||
const PDFAction* getDocumentAction(DocumentAction action) const { return m_documentActions.at(action).get(); }
|
||||
const auto& getDocumentActions() const { return m_documentActions; }
|
||||
const PDFObject& getMetadata() const { return m_metadata; }
|
||||
const PDFObject& getStructureTreeRoot() const { return m_structureTreeRoot; }
|
||||
const QString& getLanguage() const { return m_language; }
|
||||
@@ -661,6 +694,9 @@ public:
|
||||
/// \returns Rendition, or nullptr
|
||||
PDFObject getNamedRendition(const QByteArray& key) const;
|
||||
|
||||
/// Returns all named JavaScript actions
|
||||
const std::map<QByteArray, PDFActionPtr>& getNamedJavaScriptActions() const { return m_namedJavaScriptActions; }
|
||||
|
||||
/// Parses catalog from catalog dictionary. If object cannot be parsed, or error occurs,
|
||||
/// then exception is thrown.
|
||||
static PDFCatalog parse(const PDFObject& catalog, const PDFDocument* document);
|
||||
|
@@ -217,6 +217,9 @@ public:
|
||||
/// \param action Action type
|
||||
const PDFAction* getAction(PDFAnnotationAdditionalActions::Action action) const { return m_additionalActions.getAction(action); }
|
||||
|
||||
/// Returns container of actions
|
||||
const PDFAnnotationAdditionalActions& getActions() const { return m_additionalActions; }
|
||||
|
||||
/// Parses form field from the object reference. If some error occurs
|
||||
/// then null pointer is returned, no exception is thrown.
|
||||
/// \param storage Storage
|
||||
|
199
PdfForQtLib/sources/pdfjavascriptscanner.cpp
Normal file
199
PdfForQtLib/sources/pdfjavascriptscanner.cpp
Normal file
@@ -0,0 +1,199 @@
|
||||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfjavascriptscanner.h"
|
||||
#include "pdfaction.h"
|
||||
#include "pdfform.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
PDFJavaScriptScanner::PDFJavaScriptScanner(const PDFDocument* document) :
|
||||
m_document(document)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
PDFJavaScriptScanner::Entries PDFJavaScriptScanner::scan(const std::vector<PDFInteger>& pages, Options options) const
|
||||
{
|
||||
Entries result;
|
||||
|
||||
auto scanAction = [this, options, &result](PDFJavaScriptEntry::Type type, PDFInteger pageIndex, const PDFAction* action)
|
||||
{
|
||||
if (!result.empty() && options.testFlag(FindFirstOnly))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (action)
|
||||
{
|
||||
std::vector<const PDFAction*> actions = action->getActionList();
|
||||
for (const PDFAction* a : actions)
|
||||
{
|
||||
switch (a->getType())
|
||||
{
|
||||
case ActionType::JavaScript:
|
||||
{
|
||||
const PDFActionJavaScript* javascriptAction = dynamic_cast<const PDFActionJavaScript*>(a);
|
||||
Q_ASSERT(javascriptAction);
|
||||
|
||||
result.emplace_back(type, pageIndex, javascriptAction->getJavaScript());
|
||||
break;
|
||||
}
|
||||
|
||||
case ActionType::Rendition:
|
||||
{
|
||||
const PDFActionRendition* renditionAction = dynamic_cast<const PDFActionRendition*>(a);
|
||||
Q_ASSERT(renditionAction);
|
||||
|
||||
if (!renditionAction->getJavaScript().isEmpty())
|
||||
{
|
||||
result.emplace_back(type, pageIndex, renditionAction->getJavaScript());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!result.empty() && options.testFlag(FindFirstOnly))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto scanContainer = [this, options, &scanAction](PDFJavaScriptEntry::Type type, PDFInteger pageIndex, const auto& container)
|
||||
{
|
||||
for (const PDFActionPtr& action : container)
|
||||
{
|
||||
scanAction(type, pageIndex, action.get());
|
||||
}
|
||||
};
|
||||
|
||||
const PDFCatalog* catalog = m_document->getCatalog();
|
||||
|
||||
if (options.testFlag(ScanDocument) && (result.empty() || !options.testFlag(FindFirstOnly)))
|
||||
{
|
||||
scanContainer(PDFJavaScriptEntry::Type::Document, -1, catalog->getDocumentActions());
|
||||
}
|
||||
|
||||
if (options.testFlag(ScanNamed) && (result.empty() || !options.testFlag(FindFirstOnly)))
|
||||
{
|
||||
for (const auto& actionItem : catalog->getNamedJavaScriptActions())
|
||||
{
|
||||
scanAction(PDFJavaScriptEntry::Type::Named, -1, actionItem.second.get());
|
||||
}
|
||||
}
|
||||
|
||||
if (options.testFlag(ScanForm) && (result.empty() || !options.testFlag(FindFirstOnly)))
|
||||
{
|
||||
PDFForm form = PDFForm::parse(m_document, catalog->getFormObject());
|
||||
if (form.isAcroForm() || form.isXFAForm())
|
||||
{
|
||||
auto fillActions = [this, &scanContainer](const PDFFormField* formField)
|
||||
{
|
||||
scanContainer(PDFJavaScriptEntry::Type::Form, -1, formField->getActions().getActions());
|
||||
};
|
||||
form.apply(fillActions);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.testFlag(ScanPage) && (result.empty() || !options.testFlag(FindFirstOnly)))
|
||||
{
|
||||
std::vector<PDFInteger> scannedPages;
|
||||
if (options.testFlag(AllPages))
|
||||
{
|
||||
scannedPages.resize(m_document->getCatalog()->getPageCount(), 0);
|
||||
std::iota(scannedPages.begin(), scannedPages.end(), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
scannedPages = pages;
|
||||
}
|
||||
|
||||
for (const PDFInteger pageIndex : scannedPages)
|
||||
{
|
||||
if (pageIndex < 0 || pageIndex >= PDFInteger(catalog->getPageCount()))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!result.empty() && options.testFlag(FindFirstOnly))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
PDFPageAdditionalActions pageActions = PDFPageAdditionalActions::parse(&m_document->getStorage(), catalog->getPage(pageIndex)->getAdditionalActions(&m_document->getStorage()));
|
||||
scanContainer(PDFJavaScriptEntry::Type::Page, pageIndex, pageActions.getActions());
|
||||
|
||||
const std::vector<PDFObjectReference>& pageAnnotations = catalog->getPage(pageIndex)->getAnnotations();
|
||||
for (PDFObjectReference annotationReference : pageAnnotations)
|
||||
{
|
||||
PDFAnnotationPtr annotationPtr = PDFAnnotation::parse(&m_document->getStorage(), annotationReference);
|
||||
if (annotationPtr)
|
||||
{
|
||||
switch (annotationPtr->getType())
|
||||
{
|
||||
case AnnotationType::Link:
|
||||
{
|
||||
const PDFLinkAnnotation* linkAnnotation = dynamic_cast<const PDFLinkAnnotation*>(annotationPtr.get());
|
||||
Q_ASSERT(linkAnnotation);
|
||||
|
||||
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, linkAnnotation->getAction());
|
||||
break;
|
||||
}
|
||||
|
||||
case AnnotationType::Screen:
|
||||
{
|
||||
const PDFScreenAnnotation* screenAnnotation = dynamic_cast<const PDFScreenAnnotation*>(annotationPtr.get());
|
||||
Q_ASSERT(screenAnnotation);
|
||||
|
||||
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, screenAnnotation->getAction());
|
||||
scanContainer(PDFJavaScriptEntry::Type::Annotation, pageIndex, screenAnnotation->getAdditionalActions().getActions());
|
||||
break;
|
||||
}
|
||||
|
||||
case AnnotationType::Widget:
|
||||
{
|
||||
const PDFWidgetAnnotation* widgetAnnotation = dynamic_cast<const PDFWidgetAnnotation*>(annotationPtr.get());
|
||||
Q_ASSERT(widgetAnnotation);
|
||||
|
||||
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, widgetAnnotation->getAction());
|
||||
scanContainer(PDFJavaScriptEntry::Type::Annotation, pageIndex, widgetAnnotation->getAdditionalActions().getActions());
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool PDFJavaScriptScanner::hasJavaScript() const
|
||||
{
|
||||
return !scan({ }, Options(AllPages | FindFirstOnly | ScanDocument | ScanNamed | ScanForm | ScanPage)).empty();
|
||||
}
|
||||
|
||||
} // namespace pdf
|
86
PdfForQtLib/sources/pdfjavascriptscanner.h
Normal file
86
PdfForQtLib/sources/pdfjavascriptscanner.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFJAVASCRIPTSCANNER_H
|
||||
#define PDFJAVASCRIPTSCANNER_H
|
||||
|
||||
#include "pdfdocument.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
struct PDFJavaScriptEntry
|
||||
{
|
||||
enum class Type
|
||||
{
|
||||
Invalid,
|
||||
Document,
|
||||
Named,
|
||||
Form,
|
||||
Page,
|
||||
Annotation
|
||||
};
|
||||
|
||||
explicit PDFJavaScriptEntry() = default;
|
||||
explicit PDFJavaScriptEntry(Type type, PDFInteger pageIndex, QString javaScript) :
|
||||
type(type), pageIndex(pageIndex), javaScript(javaScript)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
Type type = Type::Invalid;
|
||||
PDFInteger pageIndex = -1;
|
||||
QString javaScript;
|
||||
};
|
||||
|
||||
/// Scans document for all javascript presence (in actions). Several option
|
||||
/// can be set, for example, scan only document actions, or stop scanning,
|
||||
/// when first javascript is found.
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFJavaScriptScanner
|
||||
{
|
||||
public:
|
||||
explicit PDFJavaScriptScanner(const PDFDocument* document);
|
||||
|
||||
enum Option
|
||||
{
|
||||
AllPages = 0x0001, ///< Scan all pages
|
||||
FindFirstOnly = 0x0002, ///< Return only first javascript found
|
||||
ScanDocument = 0x0004, ///< Scan document related actions for javascript
|
||||
ScanNamed = 0x0008, ///< Scan named javascript in catalog
|
||||
ScanForm = 0x0010, ///< Scan javascript in form actions
|
||||
ScanPage = 0x0020, ///< Scan javascript in page annotations
|
||||
};
|
||||
Q_DECLARE_FLAGS(Options, Option)
|
||||
|
||||
using Entries = std::vector<PDFJavaScriptEntry>;
|
||||
|
||||
/// Scans document for javascript actions using flags
|
||||
Entries scan(const std::vector<PDFInteger>& pages, Options options) const;
|
||||
|
||||
/// Returns true, if document has any java script action. Calling
|
||||
/// this function can be slow.
|
||||
bool hasJavaScript() const;
|
||||
|
||||
private:
|
||||
const PDFDocument* m_document;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
Q_DECLARE_OPERATORS_FOR_FLAGS(pdf::PDFJavaScriptScanner::Options)
|
||||
|
||||
#endif // PDFJAVASCRIPTSCANNER_H
|
Reference in New Issue
Block a user