diff --git a/PdfForQtLib/sources/pdfjavascriptscanner.h b/PdfForQtLib/sources/pdfjavascriptscanner.h index 8a54171..d137b3f 100644 --- a/PdfForQtLib/sources/pdfjavascriptscanner.h +++ b/PdfForQtLib/sources/pdfjavascriptscanner.h @@ -63,6 +63,8 @@ public: ScanNamed = 0x0008, ///< Scan named javascript in catalog ScanForm = 0x0010, ///< Scan javascript in form actions ScanPage = 0x0020, ///< Scan javascript in page annotations + + ScanMask = ScanDocument | ScanNamed | ScanForm | ScanPage }; Q_DECLARE_FLAGS(Options, Option) diff --git a/PdfForQtLib/sources/pdfutils.cpp b/PdfForQtLib/sources/pdfutils.cpp index 9f7afbc..bcf2307 100644 --- a/PdfForQtLib/sources/pdfutils.cpp +++ b/PdfForQtLib/sources/pdfutils.cpp @@ -308,6 +308,94 @@ QString PDFClosedIntervalSet::toText() const return intervals.join(", "); } +std::vector PDFClosedIntervalSet::unfold() const +{ + std::vector result(getTotalLength(), 0); + + auto it = result.begin(); + for (auto [first, last] : m_intervals) + { + PDFInteger rangeSize = last - first + 1; + std::iota(it, std::next(it, rangeSize), first); + std::advance(it, rangeSize); + } + Q_ASSERT(it == result.end()); + + return result; +} + +PDFClosedIntervalSet PDFClosedIntervalSet::parse(PDFInteger first, PDFInteger last, const QString& text, QString* errorMessage) +{ + PDFClosedIntervalSet result; + *errorMessage = QString(); + + QStringList parts = text.split(",", Qt::SkipEmptyParts, Qt::CaseSensitive); + for (QString part : parts) + { + part = part.trimmed(); + + int separatorPos = part.indexOf(QChar('-')); + const bool isRange = separatorPos != -1; + + if (isRange) + { + const bool isLowerBoundDefined = part.front() != QChar('-'); + const bool isUpperBoundDefined = part.back() != QChar('-'); + + QString lowerString = part.left(separatorPos); + QString upperString = part.mid(separatorPos + 1); + + bool ok1 = true; + bool ok2 = true; + + PDFInteger lower = isLowerBoundDefined ? lowerString.toLongLong(&ok1) : first; + PDFInteger upper = isUpperBoundDefined ? upperString.toLongLong(&ok2) : last; + + if (!ok1) + { + *errorMessage = PDFTranslationContext::tr("Can't convert '%1' to a number.").arg(lowerString); + break; + } + + if (!ok2) + { + *errorMessage = PDFTranslationContext::tr("Can't convert '%1' to a number.").arg(upperString); + break; + } + + if (lower > upper) + { + *errorMessage = PDFTranslationContext::tr("Closed interval [%1, %2] is invalid.").arg(lower).arg(upper); + break; + } + + result.addInterval(lower, upper); + } + else + { + bool ok = true; + PDFInteger value = part.toLongLong(&ok); + + if (!ok) + { + *errorMessage = PDFTranslationContext::tr("Can't convert '%1' to a number.").arg(part); + break; + } + + result.addValue(value); + } + } + + if (!errorMessage->isEmpty()) + { + // Clear the result, error occured + result = PDFClosedIntervalSet(); + } + + result.normalize(); + return result; +} + void PDFClosedIntervalSet::normalize() { // Algorithm: diff --git a/PdfForQtLib/sources/pdfutils.h b/PdfForQtLib/sources/pdfutils.h index cf19300..f2055c9 100644 --- a/PdfForQtLib/sources/pdfutils.h +++ b/PdfForQtLib/sources/pdfutils.h @@ -614,6 +614,24 @@ public: /// Transforms interval set to readable text QString toText() const; + /// Returns all integers from the range + std::vector unfold() const; + + /// Returns true, if interval set is empty + bool isEmpty() const { return m_intervals.empty(); } + + /// Parses text into closed interval set, text should be in form "1,3,4,7,-11,12-,52-53,-", + /// where 1,3,4,7 means single pages, -11 means range from \p first to 11, 12- means range + /// from 12 to \p last, and 52-53 means closed interval [52, 53]. If text is not in this form, + /// then empty interval set is returned and if \p errorMessage is specified, then error message + /// is stored here. Parsed numbers must be equal or greater than \p first and lower or equal + /// to \p last, if overflow occurs, then error message is returned. + /// \param[in] first Lower bound of work range + /// \param[in] last Upper bound of work range + /// \param[in] text Text + /// \param[out] errorMessage Error message + static PDFClosedIntervalSet parse(PDFInteger first, PDFInteger last, const QString& text, QString* errorMessage); + private: /// Normalizes interval ranges - merges adjacent intervals void normalize(); diff --git a/PdfTool/PdfTool.pro b/PdfTool/PdfTool.pro index fc78eca..6aa0ff6 100644 --- a/PdfTool/PdfTool.pro +++ b/PdfTool/PdfTool.pro @@ -44,6 +44,7 @@ SOURCES += \ pdftoolabstractapplication.cpp \ pdftoolattachments.cpp \ pdftoolinfo.cpp \ + pdftoolinfojavascript.cpp \ pdftoolverifysignatures.cpp \ pdftoolxml.cpp @@ -61,5 +62,6 @@ HEADERS += \ pdftoolabstractapplication.h \ pdftoolattachments.h \ pdftoolinfo.h \ + pdftoolinfojavascript.h \ pdftoolverifysignatures.h \ pdftoolxml.h diff --git a/PdfTool/pdftoolabstractapplication.cpp b/PdfTool/pdftoolabstractapplication.cpp index ab3afab..317c5f6 100644 --- a/PdfTool/pdftoolabstractapplication.cpp +++ b/PdfTool/pdftoolabstractapplication.cpp @@ -17,6 +17,7 @@ #include "pdftoolabstractapplication.h" #include "pdfdocumentreader.h" +#include "pdfutils.h" #include @@ -193,6 +194,13 @@ void PDFToolAbstractApplication::initializeCommandLineParser(QCommandLineParser* { parser->addOption(QCommandLineOption("compute-hashes", "Compute hashes (MD5, SHA1, SHA256...) of document.")); } + + if (optionFlags.testFlag(PageSelector)) + { + parser->addOption(QCommandLineOption("page-first", "First page of page range.", "number")); + parser->addOption(QCommandLineOption("page-last", "Last page of page range.", "number")); + parser->addOption(QCommandLineOption("page-select", "Choose arbitrary pages, in form '1,5,3,7-11,-29,43-.'.", "number")); + } } PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser) const @@ -293,6 +301,13 @@ PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser options.computeHashes = parser->isSet("compute-hashes"); } + if (optionFlags.testFlag(PageSelector)) + { + options.pageSelectorFirstPage = parser->isSet("page-first") ? parser->value("page-first") : QString(); + options.pageSelectorLastPage = parser->isSet("page-last") ? parser->value("page-last") : QString(); + options.pageSelectorSelection = parser->isSet("page-select") ? parser->value("page-select") : QString(); + } + return options; } @@ -386,4 +401,40 @@ PDFToolApplicationStorage* PDFToolApplicationStorage::getInstance() return &storage; } +std::vector PDFToolOptions::getPageRange(pdf::PDFInteger pageCount, QString& errorMessage) const +{ + QStringList parts; + + const bool hasFirst = !pageSelectorFirstPage.isEmpty(); + const bool hasLast = !pageSelectorLastPage.isEmpty(); + const bool hasSelection = !pageSelectorSelection.isEmpty(); + + if (hasFirst && hasLast) + { + parts << QString("%1-%2").arg(pageSelectorFirstPage, pageSelectorLastPage); + } + else if (hasFirst) + { + parts << QString("%1-").arg(pageSelectorFirstPage); + } + else if (hasLast) + { + parts << QString("-%1").arg(pageSelectorLastPage); + } + + if (hasSelection) + { + parts << pageSelectorSelection; + } + + if (parts.empty()) + { + parts << "-"; + } + + QString partsString = parts.join(","); + pdf::PDFClosedIntervalSet result = pdf::PDFClosedIntervalSet::parse(0, pageCount, partsString, &errorMessage); + return result.unfold(); +} + } // pdftool diff --git a/PdfTool/pdftoolabstractapplication.h b/PdfTool/pdftoolabstractapplication.h index c112cd3..c163f1d 100644 --- a/PdfTool/pdftoolabstractapplication.h +++ b/PdfTool/pdftoolabstractapplication.h @@ -74,6 +74,14 @@ struct PDFToolOptions // For option 'ComputeHashes' bool computeHashes = false; + + // For option 'PageSelector' + QString pageSelectorFirstPage; + QString pageSelectorLastPage; + QString pageSelectorSelection; + + /// Returns page range. If page range is invalid, then \p errorMessage is empty. + std::vector getPageRange(pdf::PDFInteger pageCount, QString& errorMessage) const; }; /// Base class for all applications @@ -86,6 +94,7 @@ public: enum ExitCodes { ExitSuccess = EXIT_SUCCESS, + ExitFailure = EXIT_FAILURE, ErrorNoDocumentSpecified, ErrorDocumentReading, ErrorInvalidArguments, @@ -108,6 +117,7 @@ public: Attachments = 0x0010, ///< Flags for attachments manipulating DateFormat = 0x0020, ///< Date format ComputeHashes = 0x0040, ///< Compute hashes + PageSelector = 0x0080, ///< Select page range (or all pages) }; Q_DECLARE_FLAGS(Options, Option) diff --git a/PdfTool/pdftoolinfojavascript.cpp b/PdfTool/pdftoolinfojavascript.cpp new file mode 100644 index 0000000..9a8af0a --- /dev/null +++ b/PdfTool/pdftoolinfojavascript.cpp @@ -0,0 +1,76 @@ +// Copyright (C) 2020 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + +#include "pdftoolinfojavascript.h" +#include "pdfjavascriptscanner.h" + +namespace pdftool +{ + +static PDFToolInfoJavaScriptApplication s_infoJavaScriptApplication; + +QString PDFToolInfoJavaScriptApplication::getStandardString(PDFToolAbstractApplication::StandardString standardString) const +{ + switch (standardString) + { + case Command: + return "info-javascript"; + + case Name: + return PDFToolTranslationContext::tr("Info (JavaScript code)"); + + case Description: + return PDFToolTranslationContext::tr("Retrieve informations about JavaScript usage in a document."); + + default: + Q_ASSERT(false); + break; + } + + return QString(); +} + +int PDFToolInfoJavaScriptApplication::execute(const PDFToolOptions& options) +{ + pdf::PDFDocument document; + QByteArray sourceData; + if (!readDocument(options, document, &sourceData)) + { + return ErrorDocumentReading; + } + + QString parseError; + std::vector pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError); + + if (!parseError.isEmpty()) + { + PDFConsole::writeError(parseError, options.outputCodec); + return ErrorInvalidArguments; + } + + pdf::PDFJavaScriptScanner scanner(&document); + pdf::PDFJavaScriptScanner::Entries javascripts = scanner.scan(pages, pdf::PDFJavaScriptScanner::ScanMask); + + return ExitSuccess; +} + +PDFToolAbstractApplication::Options PDFToolInfoJavaScriptApplication::getOptionsFlags() const +{ + return ConsoleFormat | OpenDocument | PageSelector; +} + +} // namespace pdftool diff --git a/PdfTool/pdftoolinfojavascript.h b/PdfTool/pdftoolinfojavascript.h new file mode 100644 index 0000000..03e04b7 --- /dev/null +++ b/PdfTool/pdftoolinfojavascript.h @@ -0,0 +1,36 @@ +// Copyright (C) 2020 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + +#ifndef PDFTOOLINFOJAVASCRIPT_H +#define PDFTOOLINFOJAVASCRIPT_H + +#include "pdftoolabstractapplication.h" + +namespace pdftool +{ + +class PDFToolInfoJavaScriptApplication : public PDFToolAbstractApplication +{ +public: + virtual QString getStandardString(StandardString standardString) const override; + virtual int execute(const PDFToolOptions& options) override; + virtual Options getOptionsFlags() const override; +}; + +} // namespace pdftool + +#endif // PDFTOOLINFOJAVASCRIPT_H