diff --git a/Pdf4QtLib/CMakeLists.txt b/Pdf4QtLib/CMakeLists.txt index 8589a39..399deb8 100644 --- a/Pdf4QtLib/CMakeLists.txt +++ b/Pdf4QtLib/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(Pdf4QtLib SHARED sources/pdfrenderingerrorswidget.ui sources/pdfselectpagesdialog.ui sources/pdfobjecteditorwidget_impl.h + sources/pdfdocumentsanitizer.h + sources/pdfdocumentsanitizer.cpp cmaps.qrc ) diff --git a/Pdf4QtLib/sources/pdfdocumentbuilder.cpp b/Pdf4QtLib/sources/pdfdocumentbuilder.cpp index 6ff160f..843654f 100644 --- a/Pdf4QtLib/sources/pdfdocumentbuilder.cpp +++ b/Pdf4QtLib/sources/pdfdocumentbuilder.cpp @@ -669,6 +669,13 @@ PDFDocumentBuilder::PDFDocumentBuilder(const PDFDocument* document) : } +PDFDocumentBuilder::PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version) : + m_storage(storage), + m_version(version) +{ + +} + void PDFDocumentBuilder::reset() { *this = PDFDocumentBuilder(); @@ -5414,6 +5421,20 @@ void PDFDocumentBuilder::updateTrailerDictionary(PDFInteger objectCount) } +void PDFDocumentBuilder::removePageThumbnail(PDFObjectReference pageReference) +{ + PDFObjectFactory objectBuilder; + + objectBuilder.beginDictionary(); + objectBuilder.beginDictionaryItem("Thumb"); + objectBuilder << PDFObject(); + objectBuilder.endDictionaryItem(); + objectBuilder.endDictionary(); + PDFObject updatedPageObject = objectBuilder.takeObject(); + mergeTo(pageReference, updatedPageObject); +} + + /* END GENERATED CODE */ } // namespace pdf diff --git a/Pdf4QtLib/sources/pdfdocumentbuilder.h b/Pdf4QtLib/sources/pdfdocumentbuilder.h index b7f33fc..30f4551 100644 --- a/Pdf4QtLib/sources/pdfdocumentbuilder.h +++ b/Pdf4QtLib/sources/pdfdocumentbuilder.h @@ -323,6 +323,9 @@ public: /// Creates a new document as modification of old document explicit PDFDocumentBuilder(const PDFDocument* document); + /// Creates a new document from storage + explicit PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version); + /// Resets the object to the initial state. /// \warning All data are lost void reset(); @@ -491,8 +494,8 @@ public: PDFObjectReference createActionGoToDocumentPart(PDFObjectReference documentPart); - /// Creates embedded GoTo action. When executed, action points to destination in another document, - /// which is embedded in this document. + /// Creates embedded GoTo action. When executed, action points to destination in another document, which + /// is embedded in this document. /// \param fileSpecification File specification /// \param destination Destination in a embedded document /// \param newWindow Open document in new window @@ -541,8 +544,8 @@ public: bool newWindow); - /// Creates launch action. Launch action executes document opening or printing. This variant for - /// Windows operating system, where additional parameters can be specified. + /// Creates launch action. Launch action executes document opening or printing. This variant for Windows + /// operating system, where additional parameters can be specified. /// \param fileName File name /// \param defaultDirectory Default directory /// \param action Action to be performed. Valid values are 'open' or 'print'. @@ -555,8 +558,8 @@ public: bool newWindow); - /// Creates named action. Named actions are some predefined actions that interactive PDF processor - /// shall support. Valid values are NextPage, PrevPage, FirstPage, LastPage. + /// Creates named action. Named actions are some predefined actions that interactive PDF processor shall + /// support. Valid values are NextPage, PrevPage, FirstPage, LastPage. /// \param name Predefined name PDFObjectReference createActionNamed(QByteArray name); @@ -581,8 +584,7 @@ public: PDFObjectReference createActionResetForm(); - /// Creates reset interactive form action, which resets all fields except those specified in a given list of - /// fields. + /// Creates reset interactive form action, which resets all fields except those specified in a given list of fields. /// \param fields Fields to be excluded from reset PDFObjectReference createActionResetFormExcludedFields(PDFObjectReferenceVector fields); @@ -641,9 +643,8 @@ public: PDFObjectReference createActionURI(QString URL); - /// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer - /// reviews the document, and he wants to mark, that some text should be inserted, he uses this - /// annotation). + /// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer reviews the + /// document, and he wants to mark, that some text should be inserted, he uses this annotation). /// \param page Page to which is annotation added /// \param rectangle Area in which is caret displayed /// \param borderWidth Border width @@ -660,15 +661,15 @@ public: QString contents); - /// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with - /// border width. Popup annotation can be attached to this annotation. + /// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with border + /// width. Popup annotation can be attached to this annotation. /// \param page Page to which is annotation added /// \param rectangle Area in which is circle/ellipse displayed /// \param borderWidth Width of the border line of circle/ellipse - /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, + /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then + /// use invalid QColor. + /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border, /// then use invalid QColor. - /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a - /// border, then use invalid QColor. /// \param title Title (it is displayed as title of popup window) /// \param subject Subject (short description of the subject being adressed by the annotation) /// \param contents Contents (text displayed, for example, in the marked annotation dialog) @@ -697,15 +698,15 @@ public: QString description); - /// Free text annotation displays text directly on a page. Text appears directly on the page, in the - /// same way, as standard text in PDF document. Free text annotations are usually used to comment - /// the document. Free text annotation can also have callout line, with, or without a knee. Specify - /// start/end point parameters of this function to get callout line. + /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way, + /// as standard text in PDF document. Free text annotations are usually used to comment the document. + /// Free text annotation can also have callout line, with, or without a knee. Specify start/end point + /// parameters of this function to get callout line. /// \param page Page to which is annotation added - /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both - /// callout line and text rectangle. - /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to - /// match bounding rectangle. + /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout + /// line and text rectangle. + /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match + /// bounding rectangle. /// \param title Title /// \param subject Subject /// \param contents Contents (text displayed) @@ -729,9 +730,9 @@ public: AnnotationLineEnding endLineType); - /// Free text annotation displays text directly on a page. Text appears directly on the page, in the - /// same way, as standard text in PDF document. Free text annotations are usually used to comment - /// the document. Free text annotation can also have callout line, with, or without a knee. + /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way, + /// as standard text in PDF document. Free text annotations are usually used to comment the document. + /// Free text annotation can also have callout line, with, or without a knee. /// \param page Page to which is annotation added /// \param rectangle Area in which is text displayed /// \param title Title @@ -746,15 +747,15 @@ public: TextAlignment textAlignment); - /// Free text annotation displays text directly on a page. Text appears directly on the page, in the - /// same way, as standard text in PDF document. Free text annotations are usually used to comment - /// the document. Free text annotation can also have callout line, with, or without a knee. Specify - /// start/end point parameters of this function to get callout line. + /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way, + /// as standard text in PDF document. Free text annotations are usually used to comment the document. + /// Free text annotation can also have callout line, with, or without a knee. Specify start/end point + /// parameters of this function to get callout line. /// \param page Page to which is annotation added - /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both - /// callout line and text rectangle. - /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to - /// match bounding rectangle. + /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout + /// line and text rectangle. + /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match + /// bounding rectangle. /// \param title Title /// \param subject Subject /// \param contents Contents (text displayed) @@ -776,9 +777,9 @@ public: AnnotationLineEnding endLineType); - /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain - /// window to be opened (and commented). This annotation is usually used to highlight text, but can - /// also highlight other things, such as images, or other graphics. + /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to + /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight + /// other things, such as images, or other graphics. /// \param page Page to which is annotation added /// \param rectangle Area in which is highlight displayed /// \param color Color @@ -793,9 +794,9 @@ public: QString contents); - /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain - /// window to be opened (and commented). This annotation is usually used to highlight text, but can - /// also highlight other things, such as images, or other graphics. + /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to + /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight + /// other things, such as images, or other graphics. /// \param page Page to which is annotation added /// \param rectangle Area in which is highlight displayed /// \param color Color @@ -804,9 +805,9 @@ public: QColor color); - /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain - /// window to be opened (and commented). This annotation is usually used to highlight text, but can - /// also highlight other things, such as images, or other graphics. + /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to + /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight + /// other things, such as images, or other graphics. /// \param page Page to which is annotation added /// \param quadrilaterals Area in which is highlight displayed /// \param color Color @@ -849,9 +850,9 @@ public: QString contents); - /// Line annotation represents straight line, or some more advanced graphics, such as dimension with - /// text. Line annotations are markup annotations, so they can have popup window. Line endings can - /// be specified. + /// Line annotation represents straight line, or some more advanced graphics, such as dimension with text. + /// Line annotations are markup annotations, so they can have popup window. Line endings can be + /// specified. /// \param page Page to which is annotation added /// \param boundingRect Line annotation bounding rectangle /// \param startPoint Line start @@ -878,9 +879,9 @@ public: AnnotationLineEnding endLineType); - /// Line annotation represents straight line, or some more advanced graphics, such as dimension with - /// text. Line annotations are markup annotations, so they can have popup window. Line endings can - /// be specified. + /// Line annotation represents straight line, or some more advanced graphics, such as dimension with text. + /// Line annotations are markup annotations, so they can have popup window. Line endings can be + /// specified. /// \param page Page to which is annotation added /// \param boundingRect Line annotation bounding rectangle /// \param startPoint Line start @@ -893,12 +894,12 @@ public: /// \param contents Contents (text displayed, for example, in the marked annotation dialog) /// \param startLineType Start line ending type /// \param endLineType End line ending type - /// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of - /// the line perpendicular to the line itself. Value can be either positive, negative or zero. If - /// positive, then extension is in plane that is above the annotation line (in clockwise order), - /// if negative, then it is below the annotation line. - /// \param leaderLineOffset Length of leader line offset, which is the amount of empty space - /// between the endpoints of the annotation and beginning of leader lines + /// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of the line + /// perpendicular to the line itself. Value can be either positive, negative or zero. If positive, then + /// extension is in plane that is above the annotation line (in clockwise order), if negative, then it is + /// below the annotation line. + /// \param leaderLineOffset Length of leader line offset, which is the amount of empty space between the + /// endpoints of the annotation and beginning of leader lines /// \param leaderLineExtension Length of leader line extension, which extends leader lines in 180° /// direction from leader lines (so leader lines continues above drawn line) /// \param displayContents Display contents of the annotation as text along the line @@ -922,9 +923,9 @@ public: bool displayedContentsTopAlign); - /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify - /// action, which can be executed, for example, link can be also in the PDF document (link to some - /// location in document). + /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action, + /// which can be executed, for example, link can be also in the PDF document (link to some location in + /// document). /// \param page Page to which is annotation added /// \param linkRectangle Link rectangle /// \param URL URL to be launched when user clicks on the link @@ -935,9 +936,9 @@ public: LinkHighlightMode highlightMode); - /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify - /// action, which can be executed, for example, link can be also in the PDF document (link to some - /// location in document). + /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action, + /// which can be executed, for example, link can be also in the PDF document (link to some location in + /// document). /// \param page Page to which is annotation added /// \param linkRectangle Link rectangle /// \param action Action to be performed when user clicks on a link @@ -948,9 +949,9 @@ public: LinkHighlightMode highlightMode); - /// Polygon annotation. When opened, they display pop-up window containing the text of associated - /// note (and window title), if popup annotation is attached. Polygon border/fill color can be defined, - /// along with border width. + /// Polygon annotation. When opened, they display pop-up window containing the text of associated note + /// (and window title), if popup annotation is attached. Polygon border/fill color can be defined, along with + /// border width. /// \param page Page to which is annotation added /// \param polygon Polygon /// \param borderWidth Border line width @@ -969,9 +970,9 @@ public: QString contents); - /// Polyline annotation. When opened, they display pop-up window containing the text of associated - /// note (and window title), if popup annotation is attached. Polyline border/fill color can be defined, - /// along with border width. + /// Polyline annotation. When opened, they display pop-up window containing the text of associated note + /// (and window title), if popup annotation is attached. Polyline border/fill color can be defined, along with + /// border width. /// \param page Page to which is annotation added /// \param polyline Polyline /// \param borderWidth Border line width @@ -995,9 +996,9 @@ public: /// Creates a new popup annotation on the page. Popup annotation is represented usually by floating - /// window, which can be opened, or closed. Popup annotation is associated with parent annotation, - /// which can be usually markup annotation. Popup annotation displays parent annotation's texts, for - /// example, title, comment, date etc. + /// window, which can be opened, or closed. Popup annotation is associated with parent annotation, which + /// can be usually markup annotation. Popup annotation displays parent annotation's texts, for example, + /// title, comment, date etc. /// \param page Page to which is annotation added /// \param parentAnnotation Parent annotation (for which is popup window displayed) /// \param rectangle Area on the page, where popup window appears @@ -1026,16 +1027,16 @@ public: QColor color); - /// Square annotation displays rectangle (or square). When opened, they display pop-up window - /// containing the text of associated note (and window title), if popup annotation is attached. Square - /// border/fill color can be defined, along with border width. + /// Square annotation displays rectangle (or square). When opened, they display pop-up window containing + /// the text of associated note (and window title), if popup annotation is attached. Square border/fill color + /// can be defined, along with border width. /// \param page Page to which is annotation added /// \param rectangle Area in which is rectangle displayed /// \param borderWidth Width of the border line of rectangle - /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, + /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then + /// use invalid QColor. + /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border, /// then use invalid QColor. - /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a - /// border, then use invalid QColor. /// \param title Title (it is displayed as title of popup window) /// \param subject Subject (short description of the subject being adressed by the annotation) /// \param contents Contents (text displayed, for example, in the marked annotation dialog) @@ -1049,8 +1050,8 @@ public: QString contents); - /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can - /// contain window to be opened (and commented). + /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain + /// window to be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1059,8 +1060,8 @@ public: QColor color); - /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can - /// contain window to be opened (and commented). + /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain + /// window to be opened (and commented). /// \param page Page to which is annotation added /// \param quadrilaterals Area in which is markup displayed /// \param color Color @@ -1069,8 +1070,8 @@ public: QColor color); - /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can - /// contain window to be opened (and commented). + /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain + /// window to be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1100,8 +1101,8 @@ public: QString contents); - /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1116,8 +1117,8 @@ public: QString contents); - /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1126,8 +1127,8 @@ public: QColor color); - /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param quadrilaterals Area in which is markup displayed /// \param color Color @@ -1136,11 +1137,10 @@ public: QColor color); - /// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document. - /// When closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations - /// do not scale or rotate, they appear independent of zoom/rotate. So, they behave as if flags - /// NoZoom or NoRotate to the annotations are being set. Popup annotation is automatically created - /// for this annotation. + /// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document. When + /// closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations do not scale + /// or rotate, they appear independent of zoom/rotate. So, they behave as if flags NoZoom or NoRotate to + /// the annotations are being set. Popup annotation is automatically created for this annotation. /// \param page Page to which is annotation added /// \param rectangle Area in which is icon displayed /// \param iconType Icon type @@ -1157,8 +1157,8 @@ public: bool open); - /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1167,8 +1167,8 @@ public: QColor color); - /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param quadrilaterals Area in which is markup displayed /// \param color Color @@ -1177,8 +1177,8 @@ public: QColor color); - /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain - /// window to be opened (and commented). + /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to + /// be opened (and commented). /// \param page Page to which is annotation added /// \param rectangle Area in which is markup displayed /// \param color Color @@ -1193,13 +1193,13 @@ public: QString contents); - /// Creates empty catalog. This function is used, when a new document is being created. Do not call - /// this function manually. + /// Creates empty catalog. This function is used, when a new document is being created. Do not call this + /// function manually. PDFObjectReference createCatalog(); - /// Creates page tree root for the catalog. This function is only called when new document is being - /// created. Do not call this function manually. + /// Creates page tree root for the catalog. This function is only called when new document is being created. + /// Do not call this function manually. PDFObjectReference createCatalogPageTreeRoot(); @@ -1258,8 +1258,7 @@ public: /// Creates signature dictionary used for preparation in signing process. Can define parameters of the /// signature. /// \param filter Filter (for example, Adobe.PPKLite, Entrust.PPKEF, CiCi.SignIt, ...) - /// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1, - /// ETSI.CAdES.detached, ...) + /// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1, ETSI.CAdES.detached, ...) /// \param contents Contents (reserved data for signature). /// \param signingTime Signing date/time /// \param byteRangeItem Item which will fill byte range array. @@ -1270,8 +1269,8 @@ public: PDFInteger byteRangeItem); - /// This function is used to create a new trailer dictionary, when blank document is created. Do not - /// call this function manually. + /// This function is used to create a new trailer dictionary, when blank document is created. Do not call this + /// function manually. /// \param catalog Reference to document catalog PDFObject createTrailerDictionary(PDFObjectReference catalog); @@ -1358,9 +1357,9 @@ public: bool isOpen); - /// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are - /// on the upper side of quadrilateral, and the last two points are on the lower side of quadrilateral. - /// Quadrilaterals are represented as unclosed polygon with 4 * n vertices. + /// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are on the + /// upper side of quadrilateral, and the last two points are on the lower side of quadrilateral. Quadrilaterals + /// are represented as unclosed polygon with 4 * n vertices. /// \param annotation Annotation /// \param quadrilaterals Quadrilaterals void setAnnotationQuadPoints(PDFObjectReference annotation, @@ -1461,9 +1460,9 @@ public: PDFInteger topIndex); - /// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if - /// you use this function, annotation widgets, which are attached to this form field, should also be - /// updated (for example, appearance state and sometimes appearance streams). + /// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if you use + /// this function, annotation widgets, which are attached to this form field, should also be updated (for + /// example, appearance state and sometimes appearance streams). /// \param formField Form field /// \param value Value void setFormFieldValue(PDFObjectReference formField, @@ -1476,9 +1475,9 @@ public: /// Set document language. - /// \param language Document language. It should be a language identifier, as defined in ISO 639 - /// and ISO 3166. For example, "en-US", where first two letter means language code (en = - /// english), and the latter two is country code (US - United States). + /// \param language Document language. It should be a language identifier, as defined in ISO 639 and + /// ISO 3166. For example, "en-US", where first two letter means language code (en = english), and + /// the latter two is country code (US - United States). void setLanguage(QString language); @@ -1494,17 +1493,16 @@ public: QRectF box); - /// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production - /// environment. Default value is the page's crop box. + /// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production environment. + /// Default value is the page's crop box. /// \param page Page /// \param box Box void setPageBleedBox(PDFObjectReference page, QRectF box); - /// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped - /// to this region, graphics outside of clipping box will not be printed. Default value is same, as media - /// box. + /// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped to + /// this region, graphics outside of clipping box will not be printed. Default value is same, as media box. /// \param page Page /// \param box Box void setPageCropBox(PDFObjectReference page, @@ -1518,8 +1516,8 @@ public: PDFObjectReference documentPart); - /// Sets media box to the page. The media box defines size of physical medium, onto which the page - /// is to be printed. + /// Sets media box to the page. The media box defines size of physical medium, onto which the page is to be + /// printed. /// \param page Page /// \param box Box void setPageMediaBox(PDFObjectReference page, @@ -1561,12 +1559,17 @@ public: QString reasonText); - /// This function is used to update trailer dictionary. Must be called each time the final document is - /// being built. + /// This function is used to update trailer dictionary. Must be called each time the final document is being + /// built. /// \param objectCount Number of objects (including empty ones) void updateTrailerDictionary(PDFInteger objectCount); + /// + /// \param pageReference Removes page thumbnail. + void removePageThumbnail(PDFObjectReference pageReference); + + /* END GENERATED CODE */ private: diff --git a/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp b/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp new file mode 100644 index 0000000..e3be937 --- /dev/null +++ b/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp @@ -0,0 +1,306 @@ +// Copyright (C) 2023 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#include "pdfdocumentsanitizer.h" +#include "pdfvisitor.h" +#include "pdfexecutionpolicy.h" +#include "pdfoptimizer.h" +#include "pdfdocumentbuilder.h" + +namespace pdf +{ + +class PDFRemoveMetadataVisitor : public PDFUpdateObjectVisitor +{ +public: + explicit PDFRemoveMetadataVisitor(const PDFObjectStorage* storage, std::atomic* counter) : + PDFUpdateObjectVisitor(storage), + m_counter(counter) + { + + } + + virtual void visitDictionary(const PDFDictionary* dictionary) override; + +private: + std::atomic* m_counter; +}; + +void PDFRemoveMetadataVisitor::visitDictionary(const PDFDictionary* dictionary) +{ + Q_ASSERT(dictionary); + + std::vector entries; + entries.reserve(dictionary->getCount()); + + for (size_t i = 0, count = dictionary->getCount(); i < count; ++i) + { + dictionary->getValue(i).accept(this); + Q_ASSERT(!m_objectStack.empty()); + if (dictionary->getKey(i) != "Metadata") + { + entries.emplace_back(dictionary->getKey(i), m_objectStack.back()); + } + else + { + ++*m_counter; + } + m_objectStack.pop_back(); + } + + m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries)))); +} + +PDFDocumentSanitizer::PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent) : + QObject(parent), + m_flags(flags) +{ + +} + +void PDFDocumentSanitizer::sanitize() +{ + Q_EMIT sanitizationStarted(); + + if (m_flags.testFlag(DocumentInfo)) + { + performSanitizeDocumentInfo(); + } + + if (m_flags.testFlag(Metadata)) + { + performSanitizeMetadata(); + } + + if (m_flags.testFlag(Bookmarks)) + { + performSanitizeBookmarks(); + } + + if (m_flags.testFlag(FileAttachments)) + { + performSanitizeFileAttachments(); + } + + if (m_flags.testFlag(EmbeddedSearchIndex)) + { + performSanitizeEmbeddedSearchIndex(); + } + + if (m_flags.testFlag(MarkupAnnotations)) + { + performSanitizeMarkupAnnotations(); + } + + if (m_flags.testFlag(PageThumbnails)) + { + performSanitizePageThumbnails(); + } + + // Optimize - remove unused objects + PDFOptimizer optimizer(PDFOptimizer::OptimizationFlags(PDFOptimizer::RemoveUnusedObjects | PDFOptimizer::ShrinkObjectStorage | PDFOptimizer::RemoveNullObjects), nullptr); + optimizer.setStorage(m_storage); + optimizer.optimize(); + m_storage = optimizer.takeStorage(); + + Q_EMIT sanitizationFinished(); +} + +PDFDocumentSanitizer::SanitizationFlags PDFDocumentSanitizer::getFlags() const +{ + return m_flags; +} + +void PDFDocumentSanitizer::setFlags(SanitizationFlags flags) +{ + m_flags = flags; +} + +void PDFDocumentSanitizer::performSanitizeDocumentInfo() +{ + PDFObjectReference emptyDocumentInfoReference = m_storage.addObject(PDFObject()); + + PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0)); + const bool hasDocumentInfo = builder.getDocumentInfo().isValid(); + builder.setDocumentInfo(emptyDocumentInfoReference); + PDFDocument document = builder.build(); + m_storage = document.getStorage(); + + if (hasDocumentInfo) + { + Q_EMIT sanitizationProgress(tr("Document info was removed.")); + } +} + +void PDFDocumentSanitizer::performSanitizeMetadata() +{ + std::atomic counter = 0; + + PDFObjectStorage::PDFObjects objects = m_storage.getObjects(); + auto processEntry = [this, &counter](PDFObjectStorage::Entry& entry) + { + PDFRemoveMetadataVisitor visitor(&m_storage, &counter); + entry.object.accept(&visitor); + entry.object = visitor.getObject(); + }; + + PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry); + m_storage.setObjects(qMove(objects)); + Q_EMIT sanitizationProgress(tr("Metadata streams removed: %1").arg(counter)); +} + +void PDFDocumentSanitizer::performSanitizeBookmarks() +{ + PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0)); + PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference()); + const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject); + const bool hasOutline = catalogDictionary && catalogDictionary->hasKey("Outlines"); + + if (hasOutline) + { + builder.removeOutline(); + PDFDocument document = builder.build(); + m_storage = document.getStorage(); + Q_EMIT sanitizationProgress(tr("Outline was removed.")); + } +} + +void PDFDocumentSanitizer::performSanitizeFileAttachments() +{ + auto filter = [](const PDFAnnotation* annotation) + { + return annotation->getType() == AnnotationType::FileAttachment; + }; + removeAnnotations(filter, tr("File attachments removed: %1.")); +} + +void PDFDocumentSanitizer::performSanitizeEmbeddedSearchIndex() +{ + PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0)); + PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference()); + const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject); + const bool hasPieceInfo = catalogDictionary && catalogDictionary->hasKey("PieceInfo"); + + if (hasPieceInfo) + { + PDFObject pieceInfoObject = builder.getObject(catalogDictionary->get("PieceInfo")); + const PDFDictionary* pieceInfoDictionary = builder.getDictionaryFromObject(pieceInfoObject); + if (pieceInfoDictionary->hasKey("SearchIndex")) + { + PDFDictionary dictionaryCopy = *pieceInfoDictionary; + dictionaryCopy.setEntry(PDFInplaceOrMemoryString("SearchIndex"), PDFObject()); + pieceInfoObject = PDFObject::createDictionary(std::make_shared(qMove(dictionaryCopy))); + + PDFObjectFactory factory; + factory.beginDictionary(); + factory.beginDictionaryItem("PieceInfo"); + factory << pieceInfoObject; + factory.endDictionaryItem(); + factory.endDictionary(); + PDFObject newCatalog = factory.takeObject(); + builder.mergeTo(builder.getCatalogReference(), std::move(newCatalog)); + PDFDocument document = builder.build(); + m_storage = document.getStorage(); + Q_EMIT sanitizationProgress(tr("Search index was removed.")); + } + } +} + +void PDFDocumentSanitizer::performSanitizeMarkupAnnotations() +{ + auto filter = [](const PDFAnnotation* annotation) + { + return annotation->asMarkupAnnotation() != nullptr; + }; + removeAnnotations(filter, tr("Markup annotations removed: %1.")); +} + +void PDFDocumentSanitizer::performSanitizePageThumbnails() +{ + PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0)); + builder.flattenPageTree(); + std::vector pageReferences = builder.getPages(); + std::vector pagesWithThumbnail; + + for (const PDFObjectReference& pageReference : pageReferences) + { + const PDFDictionary* pageDictionary = builder.getDictionaryFromObject(builder.getObjectByReference(pageReference)); + if (pageDictionary && pageDictionary->hasKey("Thumb")) + { + pagesWithThumbnail.push_back(pageReference); + } + } + + if (!pagesWithThumbnail.empty()) + { + for (const auto& pageReference : pagesWithThumbnail) + { + builder.removePageThumbnail(pageReference); + } + + PDFDocument document = builder.build(); + m_storage = document.getStorage(); + Q_EMIT sanitizationProgress(tr("Page thumbnails removed: %1.").arg(pagesWithThumbnail.size())); + } +} + +void PDFDocumentSanitizer::removeAnnotations(const std::function& filter, + QString message) +{ + PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0)); + builder.flattenPageTree(); + std::vector pageReferences = builder.getPages(); + std::vector> annotationsToBeRemoved; + + PDFDocumentDataLoaderDecorator loader(&m_storage); + for (const PDFObjectReference pageReference : pageReferences) + { + const PDFObject& pageObject = m_storage.getObjectByReference(pageReference); + const PDFDictionary* pageDictionary = m_storage.getDictionaryFromObject(pageObject); + + if (!pageDictionary) + { + continue; + } + + std::vector annotationReferences = loader.readReferenceArrayFromDictionary(pageDictionary, "Annots"); + for (const PDFObjectReference& annotationReference : annotationReferences) + { + PDFAnnotationPtr annotation = PDFAnnotation::parse(&m_storage, annotationReference); + if (filter(annotation.get())) + { + annotationsToBeRemoved.emplace_back(pageReference, annotationReference); + } + } + } + + if (!annotationsToBeRemoved.empty()) + { + for (const auto& item : annotationsToBeRemoved) + { + const PDFObjectReference pageReference = item.first; + const PDFObjectReference annotationReference = item.second; + builder.removeAnnotation(pageReference, annotationReference); + } + + PDFDocument document = builder.build(); + m_storage = document.getStorage(); + Q_EMIT sanitizationProgress(message.arg(annotationsToBeRemoved.size())); + } +} + +} // namespace pdf diff --git a/Pdf4QtLib/sources/pdfdocumentsanitizer.h b/Pdf4QtLib/sources/pdfdocumentsanitizer.h new file mode 100644 index 0000000..7113385 --- /dev/null +++ b/Pdf4QtLib/sources/pdfdocumentsanitizer.h @@ -0,0 +1,99 @@ +// Copyright (C) 2023 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#ifndef PDFDOCUMENTSANITIZER_H +#define PDFDOCUMENTSANITIZER_H + +#include "pdfdocument.h" + +namespace pdf +{ +class PDFAnnotation; + +/// Class for sanitizing documents. Can remove sensitive content from the document, +/// except the content streams. Sanitization is configurable, user can specify, +/// which content should be removed. +class PDF4QTLIBSHARED_EXPORT PDFDocumentSanitizer : public QObject +{ + Q_OBJECT + +public: + + enum SanitizationFlag + { + None = 0x0000, ///< No sanitization is performed + DocumentInfo = 0x0001, ///< Remove document information + Metadata = 0x0002, ///< Remove all metadata streams in all objects + Bookmarks = 0x0004, ///< Remove bookmarks + FileAttachments = 0x0008, ///< Remove file attachments + EmbeddedSearchIndex = 0x0010, ///< Remove embedded search index + MarkupAnnotations = 0x0020, ///< Remove markup annotations from all pages + PageThumbnails = 0x0040, ///< Remove page thumbnails + All = 0xFFFF, ///< All sanitization turned on + }; + Q_DECLARE_FLAGS(SanitizationFlags, SanitizationFlag) + + explicit PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent); + + /// Set document, which should be sanitized + /// \param document Document to be sanitized + void setDocument(const PDFDocument* document) { setStorage(document->getStorage()); } + + /// Set storage directly (storage must be valid and filled with objects) + /// \param storage Storage + void setStorage(const PDFObjectStorage& storage) { m_storage = storage; } + + /// Perform document sanitization. During optimization process, various + /// signals are emitted to view progress. + void sanitize(); + + /// Returns object storage used for optimization + const PDFObjectStorage& getStorage() const { return m_storage; } + + /// Returns object storage by move semantics, old object storage is destroyed + PDFObjectStorage takeStorage() { return qMove(m_storage); } + + /// Returns sanitized document. Object storage is cleared after + /// this function call. + PDFDocument takeSanitizedDocument() { return PDFDocument(qMove(m_storage), PDFVersion(2, 0)); } + + SanitizationFlags getFlags() const; + void setFlags(SanitizationFlags flags); + +signals: + void sanitizationStarted(); + void sanitizationProgress(QString progressText); + void sanitizationFinished(); + +private: + void performSanitizeDocumentInfo(); + void performSanitizeMetadata(); + void performSanitizeBookmarks(); + void performSanitizeFileAttachments(); + void performSanitizeEmbeddedSearchIndex(); + void performSanitizeMarkupAnnotations(); + void performSanitizePageThumbnails(); + + void removeAnnotations(const std::function& filter, QString message); + + SanitizationFlags m_flags; + PDFObjectStorage m_storage; +}; + +} // namespace pdf + +#endif // PDFDOCUMENTSANITIZER_H diff --git a/Pdf4QtLib/sources/pdfoptimizer.cpp b/Pdf4QtLib/sources/pdfoptimizer.cpp index 7331148..0a0b342 100644 --- a/Pdf4QtLib/sources/pdfoptimizer.cpp +++ b/Pdf4QtLib/sources/pdfoptimizer.cpp @@ -28,120 +28,6 @@ namespace pdf { -class PDFUpdateObjectVisitor : public PDFAbstractVisitor -{ -public: - explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) : - m_storage(storage) - { - m_objectStack.reserve(32); - } - - virtual void visitNull() override; - virtual void visitBool(bool value) override; - virtual void visitInt(PDFInteger value) override; - virtual void visitReal(PDFReal value) override; - virtual void visitString(PDFStringRef string) override; - virtual void visitName(PDFStringRef name) override; - virtual void visitArray(const PDFArray* array) override; - virtual void visitDictionary(const PDFDictionary* dictionary) override; - virtual void visitStream(const PDFStream* stream) override; - virtual void visitReference(const PDFObjectReference reference) override; - - PDFObject getObject(); - -protected: - const PDFObjectStorage* m_storage; - std::vector m_objectStack; -}; - -void PDFUpdateObjectVisitor::visitNull() -{ - m_objectStack.push_back(PDFObject::createNull()); -} - -void PDFUpdateObjectVisitor::visitBool(bool value) -{ - m_objectStack.push_back(PDFObject::createBool(value)); -} - -void PDFUpdateObjectVisitor::visitInt(PDFInteger value) -{ - m_objectStack.push_back(PDFObject::createInteger(value)); -} - -void PDFUpdateObjectVisitor::visitReal(PDFReal value) -{ - m_objectStack.push_back(PDFObject::createReal(value)); -} - -void PDFUpdateObjectVisitor::visitString(PDFStringRef string) -{ - m_objectStack.push_back(PDFObject::createString(string)); -} - -void PDFUpdateObjectVisitor::visitName(PDFStringRef name) -{ - m_objectStack.push_back(PDFObject::createName(name)); -} - -void PDFUpdateObjectVisitor::visitArray(const PDFArray* array) -{ - acceptArray(array); - - // We have all objects on the stack - Q_ASSERT(array->getCount() <= m_objectStack.size()); - - auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount()); - std::vector objects(it, m_objectStack.cend()); - PDFObject object = PDFObject::createArray(std::make_shared(qMove(objects))); - m_objectStack.erase(it, m_objectStack.cend()); - m_objectStack.push_back(object); -} - -void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary) -{ - Q_ASSERT(dictionary); - - std::vector entries; - entries.reserve(dictionary->getCount()); - - for (size_t i = 0, count = dictionary->getCount(); i < count; ++i) - { - dictionary->getValue(i).accept(this); - Q_ASSERT(!m_objectStack.empty()); - entries.emplace_back(dictionary->getKey(i), m_objectStack.back()); - m_objectStack.pop_back(); - } - - m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries)))); -} - -void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream) -{ - const PDFDictionary* dictionary = stream->getDictionary(); - - visitDictionary(dictionary); - - Q_ASSERT(!m_objectStack.empty()); - PDFObject dictionaryObject = m_objectStack.back(); - m_objectStack.pop_back(); - - PDFDictionary newDictionary(*dictionaryObject.getDictionary()); - m_objectStack.push_back(PDFObject::createStream(std::make_shared(qMove(newDictionary), QByteArray(*stream->getContent())))); -} - -void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference) -{ - m_objectStack.push_back(PDFObject::createReference(reference)); -} - -PDFObject PDFUpdateObjectVisitor::getObject() -{ - Q_ASSERT(m_objectStack.size() == 1); - return qMove(m_objectStack.back()); -} - class PDFRemoveSimpleObjectsVisitor : public PDFUpdateObjectVisitor { public: diff --git a/Pdf4QtLib/sources/pdfvisitor.cpp b/Pdf4QtLib/sources/pdfvisitor.cpp index 168b063..925e289 100644 --- a/Pdf4QtLib/sources/pdfvisitor.cpp +++ b/Pdf4QtLib/sources/pdfvisitor.cpp @@ -192,4 +192,91 @@ void PDFStatisticsCollector::collectStatisticsOfSimpleObject(PDFObject::Type typ statistics.memoryConsumptionEstimate += sizeof(PDFObject); } +void PDFUpdateObjectVisitor::visitNull() +{ + m_objectStack.push_back(PDFObject::createNull()); +} + +void PDFUpdateObjectVisitor::visitBool(bool value) +{ + m_objectStack.push_back(PDFObject::createBool(value)); +} + +void PDFUpdateObjectVisitor::visitInt(PDFInteger value) +{ + m_objectStack.push_back(PDFObject::createInteger(value)); +} + +void PDFUpdateObjectVisitor::visitReal(PDFReal value) +{ + m_objectStack.push_back(PDFObject::createReal(value)); +} + +void PDFUpdateObjectVisitor::visitString(PDFStringRef string) +{ + m_objectStack.push_back(PDFObject::createString(string)); +} + +void PDFUpdateObjectVisitor::visitName(PDFStringRef name) +{ + m_objectStack.push_back(PDFObject::createName(name)); +} + +void PDFUpdateObjectVisitor::visitArray(const PDFArray* array) +{ + acceptArray(array); + + // We have all objects on the stack + Q_ASSERT(array->getCount() <= m_objectStack.size()); + + auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount()); + std::vector objects(it, m_objectStack.cend()); + PDFObject object = PDFObject::createArray(std::make_shared(qMove(objects))); + m_objectStack.erase(it, m_objectStack.cend()); + m_objectStack.push_back(object); +} + +void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary) +{ + Q_ASSERT(dictionary); + + std::vector entries; + entries.reserve(dictionary->getCount()); + + for (size_t i = 0, count = dictionary->getCount(); i < count; ++i) + { + dictionary->getValue(i).accept(this); + Q_ASSERT(!m_objectStack.empty()); + entries.emplace_back(dictionary->getKey(i), m_objectStack.back()); + m_objectStack.pop_back(); + } + + m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries)))); +} + +void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream) +{ + const PDFDictionary* dictionary = stream->getDictionary(); + + visitDictionary(dictionary); + + Q_ASSERT(!m_objectStack.empty()); + PDFObject dictionaryObject = m_objectStack.back(); + m_objectStack.pop_back(); + + PDFDictionary newDictionary(*dictionaryObject.getDictionary()); + m_objectStack.push_back(PDFObject::createStream(std::make_shared(qMove(newDictionary), QByteArray(*stream->getContent())))); +} + +void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference) +{ + m_objectStack.push_back(PDFObject::createReference(reference)); +} + +PDFObject PDFUpdateObjectVisitor::getObject() +{ + Q_ASSERT(m_objectStack.size() == 1); + return qMove(m_objectStack.back()); +} + } // namespace pdf diff --git a/Pdf4QtLib/sources/pdfvisitor.h b/Pdf4QtLib/sources/pdfvisitor.h index 8d76a2f..56ee23c 100644 --- a/Pdf4QtLib/sources/pdfvisitor.h +++ b/Pdf4QtLib/sources/pdfvisitor.h @@ -206,6 +206,33 @@ struct PDFApplyVisitorImpl } }; +class PDFUpdateObjectVisitor : public PDFAbstractVisitor +{ +public: + explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) : + m_storage(storage) + { + m_objectStack.reserve(32); + } + + virtual void visitNull() override; + virtual void visitBool(bool value) override; + virtual void visitInt(PDFInteger value) override; + virtual void visitReal(PDFReal value) override; + virtual void visitString(PDFStringRef string) override; + virtual void visitName(PDFStringRef name) override; + virtual void visitArray(const PDFArray* array) override; + virtual void visitDictionary(const PDFDictionary* dictionary) override; + virtual void visitStream(const PDFStream* stream) override; + virtual void visitReference(const PDFObjectReference reference) override; + + PDFObject getObject(); + +protected: + const PDFObjectStorage* m_storage; + std::vector m_objectStack; +}; + } // namespace pdf #endif // PDFVISITOR_H diff --git a/Pdf4QtViewer/CMakeLists.txt b/Pdf4QtViewer/CMakeLists.txt index 29862e4..386afd0 100644 --- a/Pdf4QtViewer/CMakeLists.txt +++ b/Pdf4QtViewer/CMakeLists.txt @@ -44,6 +44,9 @@ add_library(Pdf4QtViewer SHARED pdfviewermainwindow.ui pdfviewermainwindowlite.ui pdfviewersettingsdialog.ui + pdfsanitizedocumentdialog.ui + pdfsanitizedocumentdialog.cpp + pdfsanitizedocumentdialog.h pdf4qtviewer.qrc ) diff --git a/Pdf4QtViewer/pdfprogramcontroller.cpp b/Pdf4QtViewer/pdfprogramcontroller.cpp index 1725cdb..20560fc 100644 --- a/Pdf4QtViewer/pdfprogramcontroller.cpp +++ b/Pdf4QtViewer/pdfprogramcontroller.cpp @@ -32,6 +32,7 @@ #include "pdfundoredomanager.h" #include "pdfrendertoimagesdialog.h" #include "pdfoptimizedocumentdialog.h" +#include "pdfsanitizedocumentdialog.h" #include "pdfviewersettingsdialog.h" #include "pdfaboutdialog.h" #include "pdfrenderingerrorswidget.h" @@ -447,6 +448,10 @@ void PDFProgramController::initialize(Features features, { connect(action, &QAction::triggered, this, &PDFProgramController::onActionOptimizeTriggered); } + if (QAction* action = m_actionManager->getAction(PDFActionManager::Sanitize)) + { + connect(action, &QAction::triggered, this, &PDFProgramController::onActionSanitizeTriggered); + } if (QAction* action = m_actionManager->getAction(PDFActionManager::Encryption)) { connect(action, &QAction::triggered, this, &PDFProgramController::onActionEncryptionTriggered); @@ -1173,6 +1178,18 @@ void PDFProgramController::onActionOptimizeTriggered() } } +void PDFProgramController::onActionSanitizeTriggered() +{ + PDFSanitizeDocumentDialog dialog(m_pdfDocument.data(), m_mainWindow); + + if (dialog.exec() == QDialog::Accepted) + { + pdf::PDFDocumentPointer pointer(new pdf::PDFDocument(dialog.takeSanitizedDocument())); + pdf::PDFModifiedDocument document(qMove(pointer), m_optionalContentActivity, pdf::PDFModifiedDocument::Reset); + onDocumentModified(qMove(document)); + } +} + void PDFProgramController::onActionEncryptionTriggered() { auto queryPassword = [this](bool* ok) @@ -1492,6 +1509,7 @@ void PDFProgramController::updateActionsAvailability() m_actionManager->setEnabled(PDFActionManager::Print, hasValidDocument && canPrint); m_actionManager->setEnabled(PDFActionManager::RenderToImages, hasValidDocument && canPrint); m_actionManager->setEnabled(PDFActionManager::Optimize, hasValidDocument); + m_actionManager->setEnabled(PDFActionManager::Sanitize, hasValidDocument); m_actionManager->setEnabled(PDFActionManager::Encryption, hasValidDocument); m_actionManager->setEnabled(PDFActionManager::Save, hasValidDocument); m_actionManager->setEnabled(PDFActionManager::SaveAs, hasValidDocument); diff --git a/Pdf4QtViewer/pdfprogramcontroller.h b/Pdf4QtViewer/pdfprogramcontroller.h index 319cee6..9572ce1 100644 --- a/Pdf4QtViewer/pdfprogramcontroller.h +++ b/Pdf4QtViewer/pdfprogramcontroller.h @@ -105,6 +105,7 @@ public: SendByMail, RenderToImages, Optimize, + Sanitize, Encryption, FitPage, FitWidth, @@ -327,6 +328,7 @@ private: void onActionSendByEMailTriggered(); void onActionRenderToImagesTriggered(); void onActionOptimizeTriggered(); + void onActionSanitizeTriggered(); void onActionEncryptionTriggered(); void onActionFitPageTriggered(); void onActionFitWidthTriggered(); diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp b/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp new file mode 100644 index 0000000..93e34e3 --- /dev/null +++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2023 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#include "pdfsanitizedocumentdialog.h" +#include "ui_pdfsanitizedocumentdialog.h" + +#include "pdfwidgetutils.h" +#include "pdfdocumentwriter.h" +#include "pdfdbgheap.h" + +#include +#include +#include +#include + +namespace pdfviewer +{ + +PDFSanitizeDocumentDialog::PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent) : + QDialog(parent), + ui(new Ui::PDFSanitizeDocumentDialog), + m_document(document), + m_sanitizer(pdf::PDFDocumentSanitizer::All, nullptr), + m_sanitizeButton(nullptr), + m_sanitizationInProgress(false), + m_wasSanitized(false) +{ + ui->setupUi(this); + + auto addCheckBox = [this](QString text, pdf::PDFDocumentSanitizer::SanitizationFlag flag) + { + QCheckBox* checkBox = new QCheckBox(text, this); + checkBox->setChecked(m_sanitizer.getFlags().testFlag(flag)); + connect(checkBox, &QCheckBox::clicked, this, [this, flag](bool checked) { m_sanitizer.setFlags(m_sanitizer.getFlags().setFlag(flag, checked)); }); + ui->groupBoxLayout->addWidget(checkBox); + }; + + addCheckBox(tr("Remove document info"), pdf::PDFDocumentSanitizer::DocumentInfo); + addCheckBox(tr("Remove all metadata"), pdf::PDFDocumentSanitizer::Metadata); + addCheckBox(tr("Remove outline (bookmarks)"), pdf::PDFDocumentSanitizer::Bookmarks); + addCheckBox(tr("Remove file attachments"), pdf::PDFDocumentSanitizer::FileAttachments); + addCheckBox(tr("Remove embedded search index"), pdf::PDFDocumentSanitizer::EmbeddedSearchIndex); + addCheckBox(tr("Remove comments and other markup annotations"), pdf::PDFDocumentSanitizer::MarkupAnnotations); + addCheckBox(tr("Remove page thumbnails"), pdf::PDFDocumentSanitizer::PageThumbnails); + + m_sanitizeButton = ui->buttonBox->addButton(tr("Sanitize"), QDialogButtonBox::ActionRole); + + connect(m_sanitizeButton, &QPushButton::clicked, this, &PDFSanitizeDocumentDialog::onSanitizeButtonClicked); + connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationStarted, this, &PDFSanitizeDocumentDialog::onSanitizationStarted); + connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationProgress, this, &PDFSanitizeDocumentDialog::onSanitizationProgress); + connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationFinished, this, &PDFSanitizeDocumentDialog::onSanitizationFinished); + connect(this, &PDFSanitizeDocumentDialog::displaySanitizationInfo, this, &PDFSanitizeDocumentDialog::onDisplaySanitizationInfo); + + pdf::PDFWidgetUtils::scaleWidget(this, QSize(640, 380)); + updateUi(); + pdf::PDFWidgetUtils::style(this); +} + +PDFSanitizeDocumentDialog::~PDFSanitizeDocumentDialog() +{ + Q_ASSERT(!m_sanitizationInProgress); + Q_ASSERT(!m_future.isRunning()); + + delete ui; +} + +void PDFSanitizeDocumentDialog::sanitize() +{ + QElapsedTimer timer; + timer.start(); + + m_sanitizer.setDocument(m_document); + m_sanitizer.sanitize(); + m_sanitizedDocument = m_sanitizer.takeSanitizedDocument(); + + qreal msecsElapsed = timer.nsecsElapsed() / 1000000.0; + timer.invalidate(); + + m_sanitizationInfo.msecsElapsed = msecsElapsed; + m_sanitizationInfo.bytesBeforeSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(m_document); + m_sanitizationInfo.bytesAfterSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(&m_sanitizedDocument); + Q_EMIT displaySanitizationInfo(); +} + +void PDFSanitizeDocumentDialog::onSanitizeButtonClicked() +{ + Q_ASSERT(!m_sanitizationInProgress); + Q_ASSERT(!m_future.isRunning()); + + m_sanitizationInProgress = true; + m_future = QtConcurrent::run([this]() { sanitize(); }); + updateUi(); +} + +void PDFSanitizeDocumentDialog::onSanitizationStarted() +{ + Q_ASSERT(m_sanitizationInProgress); + ui->logTextEdit->setPlainText(tr("Sanitization started!")); +} + +void PDFSanitizeDocumentDialog::onSanitizationProgress(QString progressText) +{ + Q_ASSERT(m_sanitizationInProgress); + ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), progressText)); +} + +void PDFSanitizeDocumentDialog::onSanitizationFinished() +{ + ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), tr("Sanitization finished!"))); + m_future.waitForFinished(); + m_sanitizationInProgress = false; + m_wasSanitized = true; + updateUi(); +} + +void PDFSanitizeDocumentDialog::onDisplaySanitizationInfo() +{ + QStringList texts; + texts << tr("Sanitized in %1 msecs").arg(m_sanitizationInfo.msecsElapsed); + if (m_sanitizationInfo.bytesBeforeSanitization != -1 && + m_sanitizationInfo.bytesAfterSanitization != -1) + { + texts << tr("Bytes before sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization); + texts << tr("Bytes after sanitization: %1").arg(m_sanitizationInfo.bytesAfterSanitization); + texts << tr("Bytes saved by sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization - m_sanitizationInfo.bytesAfterSanitization); + + qreal ratio = 100.0; + if (m_sanitizationInfo.bytesBeforeSanitization > 0) + { + ratio = 100.0 * qreal(m_sanitizationInfo.bytesAfterSanitization) / qreal(m_sanitizationInfo.bytesBeforeSanitization); + } + + texts << tr("Compression ratio: %1 %").arg(ratio); + } + ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), texts.join("\n"))); +} + +void PDFSanitizeDocumentDialog::updateUi() +{ + for (QCheckBox* checkBox : findChildren(QString(), Qt::FindChildrenRecursively)) + { + checkBox->setEnabled(!m_sanitizationInProgress); + } + + ui->buttonBox->button(QDialogButtonBox::Ok)->setEnabled(m_wasSanitized && !m_sanitizationInProgress); + ui->buttonBox->button(QDialogButtonBox::Cancel)->setEnabled(!m_sanitizationInProgress); + m_sanitizeButton->setEnabled(!m_sanitizationInProgress); +} + +} // namespace pdfviewer diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.h b/Pdf4QtViewer/pdfsanitizedocumentdialog.h new file mode 100644 index 0000000..c85623c --- /dev/null +++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.h @@ -0,0 +1,77 @@ +// Copyright (C) 2023 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#ifndef PDFSANITIZEDOCUMENTDIALOG_H +#define PDFSANITIZEDOCUMENTDIALOG_H + +#include "pdfdocumentsanitizer.h" + +#include +#include + +namespace Ui +{ +class PDFSanitizeDocumentDialog; +} + +namespace pdfviewer +{ + +class PDFSanitizeDocumentDialog : public QDialog +{ + Q_OBJECT + +public: + explicit PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent); + virtual ~PDFSanitizeDocumentDialog() override; + + pdf::PDFDocument takeSanitizedDocument() { return qMove(m_sanitizedDocument); } + +signals: + void displaySanitizationInfo(); + +private: + void sanitize(); + void onSanitizeButtonClicked(); + void onSanitizationStarted(); + void onSanitizationProgress(QString progressText); + void onSanitizationFinished(); + void onDisplaySanitizationInfo(); + + void updateUi(); + + struct SanitizationInfo + { + qreal msecsElapsed = 0.0; + qint64 bytesBeforeSanitization = -1; + qint64 bytesAfterSanitization = -1; + }; + + Ui::PDFSanitizeDocumentDialog* ui; + const pdf::PDFDocument* m_document; + pdf::PDFDocumentSanitizer m_sanitizer; + QPushButton* m_sanitizeButton; + bool m_sanitizationInProgress; + bool m_wasSanitized; + QFuture m_future; + pdf::PDFDocument m_sanitizedDocument; + SanitizationInfo m_sanitizationInfo; +}; + +} // namespace pdfviewer + +#endif // PDFSANITIZEDOCUMENTDIALOG_H diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.ui b/Pdf4QtViewer/pdfsanitizedocumentdialog.ui new file mode 100644 index 0000000..f2d8e0e --- /dev/null +++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.ui @@ -0,0 +1,82 @@ + + + PDFSanitizeDocumentDialog + + + + 0 + 0 + 741 + 530 + + + + Document sanitization + + + + + + Sanitization Settings + + + + + + + + false + + + true + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + buttonBox + accepted() + PDFSanitizeDocumentDialog + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + PDFSanitizeDocumentDialog + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/Pdf4QtViewer/pdfviewermainwindow.cpp b/Pdf4QtViewer/pdfviewermainwindow.cpp index 39bbbdf..760ebe7 100644 --- a/Pdf4QtViewer/pdfviewermainwindow.cpp +++ b/Pdf4QtViewer/pdfviewermainwindow.cpp @@ -24,7 +24,6 @@ #include "pdfviewersettingsdialog.h" #include "pdfdocumentpropertiesdialog.h" #include "pdfrendertoimagesdialog.h" -#include "pdfoptimizedocumentdialog.h" #include "pdfdbgheap.h" #include "pdfdocumentreader.h" @@ -161,6 +160,7 @@ PDFViewerMainWindow::PDFViewerMainWindow(QWidget* parent) : m_actionManager->setAction(PDFActionManager::SendByMail, ui->actionSend_by_E_Mail); m_actionManager->setAction(PDFActionManager::RenderToImages, ui->actionRender_to_Images); m_actionManager->setAction(PDFActionManager::Optimize, ui->actionOptimize); + m_actionManager->setAction(PDFActionManager::Sanitize, ui->actionSanitize); m_actionManager->setAction(PDFActionManager::Encryption, ui->actionEncryption); m_actionManager->setAction(PDFActionManager::FitPage, ui->actionFitPage); m_actionManager->setAction(PDFActionManager::FitWidth, ui->actionFitWidth); diff --git a/Pdf4QtViewer/pdfviewermainwindow.ui b/Pdf4QtViewer/pdfviewermainwindow.ui index 1a051b9..4888098 100644 --- a/Pdf4QtViewer/pdfviewermainwindow.ui +++ b/Pdf4QtViewer/pdfviewermainwindow.ui @@ -20,7 +20,7 @@ 0 0 770 - 37 + 21 @@ -142,6 +142,7 @@ + @@ -635,6 +636,9 @@ Optimize... + + Optimizes document to reduce file size. + @@ -924,6 +928,14 @@ Certificates... + + + Sanitize... + + + Sanitize document to remove sensitive information. + + diff --git a/Pdf4QtViewer/pdfviewermainwindowlite.cpp b/Pdf4QtViewer/pdfviewermainwindowlite.cpp index 5f21f21..b33625b 100644 --- a/Pdf4QtViewer/pdfviewermainwindowlite.cpp +++ b/Pdf4QtViewer/pdfviewermainwindowlite.cpp @@ -24,7 +24,6 @@ #include "pdfviewersettingsdialog.h" #include "pdfdocumentpropertiesdialog.h" #include "pdfrendertoimagesdialog.h" -#include "pdfoptimizedocumentdialog.h" #include "pdfdbgheap.h" #include "pdfdocumentreader.h" diff --git a/RELEASES.txt b/RELEASES.txt index 115f8ab..0d8026e 100644 --- a/RELEASES.txt +++ b/RELEASES.txt @@ -1,4 +1,5 @@ CURRENT: + - Issue #40: Sanitization of documents V: 1.3.2 1.2.2023 - Issue #39: Code signed installation diff --git a/generated_code_definition.xml b/generated_code_definition.xml index fa6ec83..9a9ae5c 100644 --- a/generated_code_definition.xml +++ b/generated_code_definition.xml @@ -12163,5 +12163,62 @@ updateDocumentInfo(qMove(updatedInfoDictionary)); This function is used to update trailer dictionary. Must be called each time the final document is being built. _void + + + + + + + + + + pageReference + _PDFObjectReference + Removes page thumbnail. + + + Parameters + + _void + + + + + + + + + + + + Thumb + DictionaryItemSimple + PDFObject() + + + + Dictionary + + + + CreateObject + updatedPageObject + _PDFObject + + + + + + Code + + _void + mergeTo(pageReference, updatedPageObject); + + + Structure + removePageThumbnail + + _void +