Tool for gathering info about document

This commit is contained in:
Jakub Melka
2020-10-04 16:56:55 +02:00
parent b226e35208
commit 2acbcd68b2
17 changed files with 758 additions and 37 deletions

View File

@@ -487,7 +487,7 @@ public:
const PDFRendition* getRendition() const { return m_rendition.has_value() ? &m_rendition.value() : nullptr; }
PDFObjectReference getAnnotation() const { return m_annotation; }
Operation getOperation() const { return m_operation; }
const QString& getJavascript() const { return m_javascript; }
const QString& getJavaScript() const { return m_javascript; }
private:
std::optional<PDFRendition> m_rendition;

View File

@@ -1026,6 +1026,10 @@ PDFAnnotationAdditionalActions PDFAnnotationAdditionalActions::parse(const PDFOb
result.m_actions[PageClosed] = PDFAction::parse(storage, dictionary->get("PC"));
result.m_actions[PageShow] = PDFAction::parse(storage, dictionary->get("PV"));
result.m_actions[PageHide] = PDFAction::parse(storage, dictionary->get("PI"));
result.m_actions[FormFieldModified] = PDFAction::parse(storage, dictionary->get("K"));
result.m_actions[FormFieldFormatted] = PDFAction::parse(storage, dictionary->get("F"));
result.m_actions[FormFieldValidated] = PDFAction::parse(storage, dictionary->get("V"));
result.m_actions[FormFieldCalculated] = PDFAction::parse(storage, dictionary->get("C"));
}
result.m_actions[Default] = PDFAction::parse(storage, defaultAction);

View File

@@ -394,6 +394,10 @@ public:
PageClosed,
PageShow,
PageHide,
FormFieldModified,
FormFieldFormatted,
FormFieldValidated,
FormFieldCalculated,
Default,
End
};
@@ -405,6 +409,9 @@ public:
/// \param action Action type
const PDFAction* getAction(Action action) const { return m_actions.at(action).get(); }
/// Returns array with all actions
const std::array<PDFActionPtr, End>& getActions() const { return m_actions; }
/// Parses annotation additional actions from the object. If object is invalid, then
/// empty additional actions is constructed.
/// \param storage Object storage

View File

@@ -1120,4 +1120,17 @@ PDFDocumentRequirements::RequirementEntry PDFDocumentRequirements::RequirementEn
return entry;
}
PDFPageAdditionalActions PDFPageAdditionalActions::parse(const PDFObjectStorage* storage, PDFObject object)
{
PDFPageAdditionalActions result;
if (const PDFDictionary* dictionary = storage->getDictionaryFromObject(object))
{
result.m_actions[Open] = PDFAction::parse(storage, dictionary->get("O"));
result.m_actions[Close] = PDFAction::parse(storage, dictionary->get("C"));
}
return result;
}
} // namespace pdf

View File

@@ -526,6 +526,38 @@ private:
std::vector<RequirementEntry> m_requirements;
};
/// Storage for page additional actions
class PDFPageAdditionalActions
{
public:
enum Action
{
Open,
Close,
End
};
inline explicit PDFPageAdditionalActions() = default;
/// Returns action for given type. If action is invalid,
/// or not present, nullptr is returned.
/// \param action Action type
const PDFAction* getAction(Action action) const { return m_actions.at(action).get(); }
/// Returns array with all actions
const std::array<PDFActionPtr, End>& getActions() const { return m_actions; }
/// Parses page additional actions from the object. If object is invalid, then
/// empty additional actions is constructed.
/// \param storage Object storage
/// \param object Additional actions object
static PDFPageAdditionalActions parse(const PDFObjectStorage* storage, PDFObject object);
private:
std::array<PDFActionPtr, End> m_actions;
};
class PDFFORQTLIBSHARED_EXPORT PDFCatalog
{
public:
@@ -582,6 +614,7 @@ public:
const PDFDocumentSecurityStore& getDocumentSecurityStore() const { return m_documentSecurityStore; }
const std::vector<PDFArticleThread>& getArticleThreads() const { return m_threads; }
const PDFAction* getDocumentAction(DocumentAction action) const { return m_documentActions.at(action).get(); }
const auto& getDocumentActions() const { return m_documentActions; }
const PDFObject& getMetadata() const { return m_metadata; }
const PDFObject& getStructureTreeRoot() const { return m_structureTreeRoot; }
const QString& getLanguage() const { return m_language; }
@@ -661,6 +694,9 @@ public:
/// \returns Rendition, or nullptr
PDFObject getNamedRendition(const QByteArray& key) const;
/// Returns all named JavaScript actions
const std::map<QByteArray, PDFActionPtr>& getNamedJavaScriptActions() const { return m_namedJavaScriptActions; }
/// Parses catalog from catalog dictionary. If object cannot be parsed, or error occurs,
/// then exception is thrown.
static PDFCatalog parse(const PDFObject& catalog, const PDFDocument* document);

View File

@@ -217,6 +217,9 @@ public:
/// \param action Action type
const PDFAction* getAction(PDFAnnotationAdditionalActions::Action action) const { return m_additionalActions.getAction(action); }
/// Returns container of actions
const PDFAnnotationAdditionalActions& getActions() const { return m_additionalActions; }
/// Parses form field from the object reference. If some error occurs
/// then null pointer is returned, no exception is thrown.
/// \param storage Storage

View File

@@ -0,0 +1,199 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfjavascriptscanner.h"
#include "pdfaction.h"
#include "pdfform.h"
namespace pdf
{
PDFJavaScriptScanner::PDFJavaScriptScanner(const PDFDocument* document) :
m_document(document)
{
}
PDFJavaScriptScanner::Entries PDFJavaScriptScanner::scan(const std::vector<PDFInteger>& pages, Options options) const
{
Entries result;
auto scanAction = [this, options, &result](PDFJavaScriptEntry::Type type, PDFInteger pageIndex, const PDFAction* action)
{
if (!result.empty() && options.testFlag(FindFirstOnly))
{
return;
}
if (action)
{
std::vector<const PDFAction*> actions = action->getActionList();
for (const PDFAction* a : actions)
{
switch (a->getType())
{
case ActionType::JavaScript:
{
const PDFActionJavaScript* javascriptAction = dynamic_cast<const PDFActionJavaScript*>(a);
Q_ASSERT(javascriptAction);
result.emplace_back(type, pageIndex, javascriptAction->getJavaScript());
break;
}
case ActionType::Rendition:
{
const PDFActionRendition* renditionAction = dynamic_cast<const PDFActionRendition*>(a);
Q_ASSERT(renditionAction);
if (!renditionAction->getJavaScript().isEmpty())
{
result.emplace_back(type, pageIndex, renditionAction->getJavaScript());
}
break;
}
default:
break;
}
if (!result.empty() && options.testFlag(FindFirstOnly))
{
break;
}
}
}
};
auto scanContainer = [this, options, &scanAction](PDFJavaScriptEntry::Type type, PDFInteger pageIndex, const auto& container)
{
for (const PDFActionPtr& action : container)
{
scanAction(type, pageIndex, action.get());
}
};
const PDFCatalog* catalog = m_document->getCatalog();
if (options.testFlag(ScanDocument) && (result.empty() || !options.testFlag(FindFirstOnly)))
{
scanContainer(PDFJavaScriptEntry::Type::Document, -1, catalog->getDocumentActions());
}
if (options.testFlag(ScanNamed) && (result.empty() || !options.testFlag(FindFirstOnly)))
{
for (const auto& actionItem : catalog->getNamedJavaScriptActions())
{
scanAction(PDFJavaScriptEntry::Type::Named, -1, actionItem.second.get());
}
}
if (options.testFlag(ScanForm) && (result.empty() || !options.testFlag(FindFirstOnly)))
{
PDFForm form = PDFForm::parse(m_document, catalog->getFormObject());
if (form.isAcroForm() || form.isXFAForm())
{
auto fillActions = [this, &scanContainer](const PDFFormField* formField)
{
scanContainer(PDFJavaScriptEntry::Type::Form, -1, formField->getActions().getActions());
};
form.apply(fillActions);
}
}
if (options.testFlag(ScanPage) && (result.empty() || !options.testFlag(FindFirstOnly)))
{
std::vector<PDFInteger> scannedPages;
if (options.testFlag(AllPages))
{
scannedPages.resize(m_document->getCatalog()->getPageCount(), 0);
std::iota(scannedPages.begin(), scannedPages.end(), 0);
}
else
{
scannedPages = pages;
}
for (const PDFInteger pageIndex : scannedPages)
{
if (pageIndex < 0 || pageIndex >= PDFInteger(catalog->getPageCount()))
{
continue;
}
if (!result.empty() && options.testFlag(FindFirstOnly))
{
break;
}
PDFPageAdditionalActions pageActions = PDFPageAdditionalActions::parse(&m_document->getStorage(), catalog->getPage(pageIndex)->getAdditionalActions(&m_document->getStorage()));
scanContainer(PDFJavaScriptEntry::Type::Page, pageIndex, pageActions.getActions());
const std::vector<PDFObjectReference>& pageAnnotations = catalog->getPage(pageIndex)->getAnnotations();
for (PDFObjectReference annotationReference : pageAnnotations)
{
PDFAnnotationPtr annotationPtr = PDFAnnotation::parse(&m_document->getStorage(), annotationReference);
if (annotationPtr)
{
switch (annotationPtr->getType())
{
case AnnotationType::Link:
{
const PDFLinkAnnotation* linkAnnotation = dynamic_cast<const PDFLinkAnnotation*>(annotationPtr.get());
Q_ASSERT(linkAnnotation);
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, linkAnnotation->getAction());
break;
}
case AnnotationType::Screen:
{
const PDFScreenAnnotation* screenAnnotation = dynamic_cast<const PDFScreenAnnotation*>(annotationPtr.get());
Q_ASSERT(screenAnnotation);
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, screenAnnotation->getAction());
scanContainer(PDFJavaScriptEntry::Type::Annotation, pageIndex, screenAnnotation->getAdditionalActions().getActions());
break;
}
case AnnotationType::Widget:
{
const PDFWidgetAnnotation* widgetAnnotation = dynamic_cast<const PDFWidgetAnnotation*>(annotationPtr.get());
Q_ASSERT(widgetAnnotation);
scanAction(PDFJavaScriptEntry::Type::Annotation, pageIndex, widgetAnnotation->getAction());
scanContainer(PDFJavaScriptEntry::Type::Annotation, pageIndex, widgetAnnotation->getAdditionalActions().getActions());
break;
}
default:
break;
}
}
}
}
}
return result;
}
bool PDFJavaScriptScanner::hasJavaScript() const
{
return !scan({ }, Options(AllPages | FindFirstOnly | ScanDocument | ScanNamed | ScanForm | ScanPage)).empty();
}
} // namespace pdf

View File

@@ -0,0 +1,86 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFJAVASCRIPTSCANNER_H
#define PDFJAVASCRIPTSCANNER_H
#include "pdfdocument.h"
namespace pdf
{
struct PDFJavaScriptEntry
{
enum class Type
{
Invalid,
Document,
Named,
Form,
Page,
Annotation
};
explicit PDFJavaScriptEntry() = default;
explicit PDFJavaScriptEntry(Type type, PDFInteger pageIndex, QString javaScript) :
type(type), pageIndex(pageIndex), javaScript(javaScript)
{
}
Type type = Type::Invalid;
PDFInteger pageIndex = -1;
QString javaScript;
};
/// Scans document for all javascript presence (in actions). Several option
/// can be set, for example, scan only document actions, or stop scanning,
/// when first javascript is found.
class PDFFORQTLIBSHARED_EXPORT PDFJavaScriptScanner
{
public:
explicit PDFJavaScriptScanner(const PDFDocument* document);
enum Option
{
AllPages = 0x0001, ///< Scan all pages
FindFirstOnly = 0x0002, ///< Return only first javascript found
ScanDocument = 0x0004, ///< Scan document related actions for javascript
ScanNamed = 0x0008, ///< Scan named javascript in catalog
ScanForm = 0x0010, ///< Scan javascript in form actions
ScanPage = 0x0020, ///< Scan javascript in page annotations
};
Q_DECLARE_FLAGS(Options, Option)
using Entries = std::vector<PDFJavaScriptEntry>;
/// Scans document for javascript actions using flags
Entries scan(const std::vector<PDFInteger>& pages, Options options) const;
/// Returns true, if document has any java script action. Calling
/// this function can be slow.
bool hasJavaScript() const;
private:
const PDFDocument* m_document;
};
} // namespace pdf
Q_DECLARE_OPERATORS_FOR_FLAGS(pdf::PDFJavaScriptScanner::Options)
#endif // PDFJAVASCRIPTSCANNER_H