diff --git a/Pdf4QtLib/CMakeLists.txt b/Pdf4QtLib/CMakeLists.txt
index 8589a39..399deb8 100644
--- a/Pdf4QtLib/CMakeLists.txt
+++ b/Pdf4QtLib/CMakeLists.txt
@@ -98,6 +98,8 @@ add_library(Pdf4QtLib SHARED
sources/pdfrenderingerrorswidget.ui
sources/pdfselectpagesdialog.ui
sources/pdfobjecteditorwidget_impl.h
+ sources/pdfdocumentsanitizer.h
+ sources/pdfdocumentsanitizer.cpp
cmaps.qrc
)
diff --git a/Pdf4QtLib/sources/pdfdocumentbuilder.cpp b/Pdf4QtLib/sources/pdfdocumentbuilder.cpp
index 6ff160f..843654f 100644
--- a/Pdf4QtLib/sources/pdfdocumentbuilder.cpp
+++ b/Pdf4QtLib/sources/pdfdocumentbuilder.cpp
@@ -669,6 +669,13 @@ PDFDocumentBuilder::PDFDocumentBuilder(const PDFDocument* document) :
}
+PDFDocumentBuilder::PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version) :
+ m_storage(storage),
+ m_version(version)
+{
+
+}
+
void PDFDocumentBuilder::reset()
{
*this = PDFDocumentBuilder();
@@ -5414,6 +5421,20 @@ void PDFDocumentBuilder::updateTrailerDictionary(PDFInteger objectCount)
}
+void PDFDocumentBuilder::removePageThumbnail(PDFObjectReference pageReference)
+{
+ PDFObjectFactory objectBuilder;
+
+ objectBuilder.beginDictionary();
+ objectBuilder.beginDictionaryItem("Thumb");
+ objectBuilder << PDFObject();
+ objectBuilder.endDictionaryItem();
+ objectBuilder.endDictionary();
+ PDFObject updatedPageObject = objectBuilder.takeObject();
+ mergeTo(pageReference, updatedPageObject);
+}
+
+
/* END GENERATED CODE */
} // namespace pdf
diff --git a/Pdf4QtLib/sources/pdfdocumentbuilder.h b/Pdf4QtLib/sources/pdfdocumentbuilder.h
index b7f33fc..30f4551 100644
--- a/Pdf4QtLib/sources/pdfdocumentbuilder.h
+++ b/Pdf4QtLib/sources/pdfdocumentbuilder.h
@@ -323,6 +323,9 @@ public:
/// Creates a new document as modification of old document
explicit PDFDocumentBuilder(const PDFDocument* document);
+ /// Creates a new document from storage
+ explicit PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version);
+
/// Resets the object to the initial state.
/// \warning All data are lost
void reset();
@@ -491,8 +494,8 @@ public:
PDFObjectReference createActionGoToDocumentPart(PDFObjectReference documentPart);
- /// Creates embedded GoTo action. When executed, action points to destination in another document,
- /// which is embedded in this document.
+ /// Creates embedded GoTo action. When executed, action points to destination in another document, which
+ /// is embedded in this document.
/// \param fileSpecification File specification
/// \param destination Destination in a embedded document
/// \param newWindow Open document in new window
@@ -541,8 +544,8 @@ public:
bool newWindow);
- /// Creates launch action. Launch action executes document opening or printing. This variant for
- /// Windows operating system, where additional parameters can be specified.
+ /// Creates launch action. Launch action executes document opening or printing. This variant for Windows
+ /// operating system, where additional parameters can be specified.
/// \param fileName File name
/// \param defaultDirectory Default directory
/// \param action Action to be performed. Valid values are 'open' or 'print'.
@@ -555,8 +558,8 @@ public:
bool newWindow);
- /// Creates named action. Named actions are some predefined actions that interactive PDF processor
- /// shall support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
+ /// Creates named action. Named actions are some predefined actions that interactive PDF processor shall
+ /// support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
/// \param name Predefined name
PDFObjectReference createActionNamed(QByteArray name);
@@ -581,8 +584,7 @@ public:
PDFObjectReference createActionResetForm();
- /// Creates reset interactive form action, which resets all fields except those specified in a given list of
- /// fields.
+ /// Creates reset interactive form action, which resets all fields except those specified in a given list of fields.
/// \param fields Fields to be excluded from reset
PDFObjectReference createActionResetFormExcludedFields(PDFObjectReferenceVector fields);
@@ -641,9 +643,8 @@ public:
PDFObjectReference createActionURI(QString URL);
- /// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer
- /// reviews the document, and he wants to mark, that some text should be inserted, he uses this
- /// annotation).
+ /// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer reviews the
+ /// document, and he wants to mark, that some text should be inserted, he uses this annotation).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is caret displayed
/// \param borderWidth Border width
@@ -660,15 +661,15 @@ public:
QString contents);
- /// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with
- /// border width. Popup annotation can be attached to this annotation.
+ /// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with border
+ /// width. Popup annotation can be attached to this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is circle/ellipse displayed
/// \param borderWidth Width of the border line of circle/ellipse
- /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
+ /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
+ /// use invalid QColor.
+ /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
- /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
- /// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@@ -697,15 +698,15 @@ public:
QString description);
- /// Free text annotation displays text directly on a page. Text appears directly on the page, in the
- /// same way, as standard text in PDF document. Free text annotations are usually used to comment
- /// the document. Free text annotation can also have callout line, with, or without a knee. Specify
- /// start/end point parameters of this function to get callout line.
+ /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
+ /// as standard text in PDF document. Free text annotations are usually used to comment the document.
+ /// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
+ /// parameters of this function to get callout line.
/// \param page Page to which is annotation added
- /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
- /// callout line and text rectangle.
- /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
- /// match bounding rectangle.
+ /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
+ /// line and text rectangle.
+ /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
+ /// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@@ -729,9 +730,9 @@ public:
AnnotationLineEnding endLineType);
- /// Free text annotation displays text directly on a page. Text appears directly on the page, in the
- /// same way, as standard text in PDF document. Free text annotations are usually used to comment
- /// the document. Free text annotation can also have callout line, with, or without a knee.
+ /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
+ /// as standard text in PDF document. Free text annotations are usually used to comment the document.
+ /// Free text annotation can also have callout line, with, or without a knee.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is text displayed
/// \param title Title
@@ -746,15 +747,15 @@ public:
TextAlignment textAlignment);
- /// Free text annotation displays text directly on a page. Text appears directly on the page, in the
- /// same way, as standard text in PDF document. Free text annotations are usually used to comment
- /// the document. Free text annotation can also have callout line, with, or without a knee. Specify
- /// start/end point parameters of this function to get callout line.
+ /// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
+ /// as standard text in PDF document. Free text annotations are usually used to comment the document.
+ /// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
+ /// parameters of this function to get callout line.
/// \param page Page to which is annotation added
- /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
- /// callout line and text rectangle.
- /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
- /// match bounding rectangle.
+ /// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
+ /// line and text rectangle.
+ /// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
+ /// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@@ -776,9 +777,9 @@ public:
AnnotationLineEnding endLineType);
- /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
- /// window to be opened (and commented). This annotation is usually used to highlight text, but can
- /// also highlight other things, such as images, or other graphics.
+ /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
+ /// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@@ -793,9 +794,9 @@ public:
QString contents);
- /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
- /// window to be opened (and commented). This annotation is usually used to highlight text, but can
- /// also highlight other things, such as images, or other graphics.
+ /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
+ /// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@@ -804,9 +805,9 @@ public:
QColor color);
- /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
- /// window to be opened (and commented). This annotation is usually used to highlight text, but can
- /// also highlight other things, such as images, or other graphics.
+ /// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
+ /// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is highlight displayed
/// \param color Color
@@ -849,9 +850,9 @@ public:
QString contents);
- /// Line annotation represents straight line, or some more advanced graphics, such as dimension with
- /// text. Line annotations are markup annotations, so they can have popup window. Line endings can
- /// be specified.
+ /// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
+ /// Line annotations are markup annotations, so they can have popup window. Line endings can be
+ /// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@@ -878,9 +879,9 @@ public:
AnnotationLineEnding endLineType);
- /// Line annotation represents straight line, or some more advanced graphics, such as dimension with
- /// text. Line annotations are markup annotations, so they can have popup window. Line endings can
- /// be specified.
+ /// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
+ /// Line annotations are markup annotations, so they can have popup window. Line endings can be
+ /// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@@ -893,12 +894,12 @@ public:
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
/// \param startLineType Start line ending type
/// \param endLineType End line ending type
- /// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of
- /// the line perpendicular to the line itself. Value can be either positive, negative or zero. If
- /// positive, then extension is in plane that is above the annotation line (in clockwise order),
- /// if negative, then it is below the annotation line.
- /// \param leaderLineOffset Length of leader line offset, which is the amount of empty space
- /// between the endpoints of the annotation and beginning of leader lines
+ /// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of the line
+ /// perpendicular to the line itself. Value can be either positive, negative or zero. If positive, then
+ /// extension is in plane that is above the annotation line (in clockwise order), if negative, then it is
+ /// below the annotation line.
+ /// \param leaderLineOffset Length of leader line offset, which is the amount of empty space between the
+ /// endpoints of the annotation and beginning of leader lines
/// \param leaderLineExtension Length of leader line extension, which extends leader lines in 180°
/// direction from leader lines (so leader lines continues above drawn line)
/// \param displayContents Display contents of the annotation as text along the line
@@ -922,9 +923,9 @@ public:
bool displayedContentsTopAlign);
- /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
- /// action, which can be executed, for example, link can be also in the PDF document (link to some
- /// location in document).
+ /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
+ /// which can be executed, for example, link can be also in the PDF document (link to some location in
+ /// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param URL URL to be launched when user clicks on the link
@@ -935,9 +936,9 @@ public:
LinkHighlightMode highlightMode);
- /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
- /// action, which can be executed, for example, link can be also in the PDF document (link to some
- /// location in document).
+ /// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
+ /// which can be executed, for example, link can be also in the PDF document (link to some location in
+ /// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param action Action to be performed when user clicks on a link
@@ -948,9 +949,9 @@ public:
LinkHighlightMode highlightMode);
- /// Polygon annotation. When opened, they display pop-up window containing the text of associated
- /// note (and window title), if popup annotation is attached. Polygon border/fill color can be defined,
- /// along with border width.
+ /// Polygon annotation. When opened, they display pop-up window containing the text of associated note
+ /// (and window title), if popup annotation is attached. Polygon border/fill color can be defined, along with
+ /// border width.
/// \param page Page to which is annotation added
/// \param polygon Polygon
/// \param borderWidth Border line width
@@ -969,9 +970,9 @@ public:
QString contents);
- /// Polyline annotation. When opened, they display pop-up window containing the text of associated
- /// note (and window title), if popup annotation is attached. Polyline border/fill color can be defined,
- /// along with border width.
+ /// Polyline annotation. When opened, they display pop-up window containing the text of associated note
+ /// (and window title), if popup annotation is attached. Polyline border/fill color can be defined, along with
+ /// border width.
/// \param page Page to which is annotation added
/// \param polyline Polyline
/// \param borderWidth Border line width
@@ -995,9 +996,9 @@ public:
/// Creates a new popup annotation on the page. Popup annotation is represented usually by floating
- /// window, which can be opened, or closed. Popup annotation is associated with parent annotation,
- /// which can be usually markup annotation. Popup annotation displays parent annotation's texts, for
- /// example, title, comment, date etc.
+ /// window, which can be opened, or closed. Popup annotation is associated with parent annotation, which
+ /// can be usually markup annotation. Popup annotation displays parent annotation's texts, for example,
+ /// title, comment, date etc.
/// \param page Page to which is annotation added
/// \param parentAnnotation Parent annotation (for which is popup window displayed)
/// \param rectangle Area on the page, where popup window appears
@@ -1026,16 +1027,16 @@ public:
QColor color);
- /// Square annotation displays rectangle (or square). When opened, they display pop-up window
- /// containing the text of associated note (and window title), if popup annotation is attached. Square
- /// border/fill color can be defined, along with border width.
+ /// Square annotation displays rectangle (or square). When opened, they display pop-up window containing
+ /// the text of associated note (and window title), if popup annotation is attached. Square border/fill color
+ /// can be defined, along with border width.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is rectangle displayed
/// \param borderWidth Width of the border line of rectangle
- /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
+ /// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
+ /// use invalid QColor.
+ /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
- /// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
- /// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@@ -1049,8 +1050,8 @@ public:
QString contents);
- /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
- /// contain window to be opened (and commented).
+ /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
+ /// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1059,8 +1060,8 @@ public:
QColor color);
- /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
- /// contain window to be opened (and commented).
+ /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
+ /// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1069,8 +1070,8 @@ public:
QColor color);
- /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
- /// contain window to be opened (and commented).
+ /// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
+ /// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1100,8 +1101,8 @@ public:
QString contents);
- /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1116,8 +1117,8 @@ public:
QString contents);
- /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1126,8 +1127,8 @@ public:
QColor color);
- /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1136,11 +1137,10 @@ public:
QColor color);
- /// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document.
- /// When closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations
- /// do not scale or rotate, they appear independent of zoom/rotate. So, they behave as if flags
- /// NoZoom or NoRotate to the annotations are being set. Popup annotation is automatically created
- /// for this annotation.
+ /// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document. When
+ /// closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations do not scale
+ /// or rotate, they appear independent of zoom/rotate. So, they behave as if flags NoZoom or NoRotate to
+ /// the annotations are being set. Popup annotation is automatically created for this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is icon displayed
/// \param iconType Icon type
@@ -1157,8 +1157,8 @@ public:
bool open);
- /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1167,8 +1167,8 @@ public:
QColor color);
- /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1177,8 +1177,8 @@ public:
QColor color);
- /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
- /// window to be opened (and commented).
+ /// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
+ /// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1193,13 +1193,13 @@ public:
QString contents);
- /// Creates empty catalog. This function is used, when a new document is being created. Do not call
- /// this function manually.
+ /// Creates empty catalog. This function is used, when a new document is being created. Do not call this
+ /// function manually.
PDFObjectReference createCatalog();
- /// Creates page tree root for the catalog. This function is only called when new document is being
- /// created. Do not call this function manually.
+ /// Creates page tree root for the catalog. This function is only called when new document is being created.
+ /// Do not call this function manually.
PDFObjectReference createCatalogPageTreeRoot();
@@ -1258,8 +1258,7 @@ public:
/// Creates signature dictionary used for preparation in signing process. Can define parameters of the
/// signature.
/// \param filter Filter (for example, Adobe.PPKLite, Entrust.PPKEF, CiCi.SignIt, ...)
- /// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1,
- /// ETSI.CAdES.detached, ...)
+ /// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1, ETSI.CAdES.detached, ...)
/// \param contents Contents (reserved data for signature).
/// \param signingTime Signing date/time
/// \param byteRangeItem Item which will fill byte range array.
@@ -1270,8 +1269,8 @@ public:
PDFInteger byteRangeItem);
- /// This function is used to create a new trailer dictionary, when blank document is created. Do not
- /// call this function manually.
+ /// This function is used to create a new trailer dictionary, when blank document is created. Do not call this
+ /// function manually.
/// \param catalog Reference to document catalog
PDFObject createTrailerDictionary(PDFObjectReference catalog);
@@ -1358,9 +1357,9 @@ public:
bool isOpen);
- /// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are
- /// on the upper side of quadrilateral, and the last two points are on the lower side of quadrilateral.
- /// Quadrilaterals are represented as unclosed polygon with 4 * n vertices.
+ /// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are on the
+ /// upper side of quadrilateral, and the last two points are on the lower side of quadrilateral. Quadrilaterals
+ /// are represented as unclosed polygon with 4 * n vertices.
/// \param annotation Annotation
/// \param quadrilaterals Quadrilaterals
void setAnnotationQuadPoints(PDFObjectReference annotation,
@@ -1461,9 +1460,9 @@ public:
PDFInteger topIndex);
- /// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if
- /// you use this function, annotation widgets, which are attached to this form field, should also be
- /// updated (for example, appearance state and sometimes appearance streams).
+ /// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if you use
+ /// this function, annotation widgets, which are attached to this form field, should also be updated (for
+ /// example, appearance state and sometimes appearance streams).
/// \param formField Form field
/// \param value Value
void setFormFieldValue(PDFObjectReference formField,
@@ -1476,9 +1475,9 @@ public:
/// Set document language.
- /// \param language Document language. It should be a language identifier, as defined in ISO 639
- /// and ISO 3166. For example, "en-US", where first two letter means language code (en =
- /// english), and the latter two is country code (US - United States).
+ /// \param language Document language. It should be a language identifier, as defined in ISO 639 and
+ /// ISO 3166. For example, "en-US", where first two letter means language code (en = english), and
+ /// the latter two is country code (US - United States).
void setLanguage(QString language);
@@ -1494,17 +1493,16 @@ public:
QRectF box);
- /// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production
- /// environment. Default value is the page's crop box.
+ /// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production environment.
+ /// Default value is the page's crop box.
/// \param page Page
/// \param box Box
void setPageBleedBox(PDFObjectReference page,
QRectF box);
- /// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped
- /// to this region, graphics outside of clipping box will not be printed. Default value is same, as media
- /// box.
+ /// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped to
+ /// this region, graphics outside of clipping box will not be printed. Default value is same, as media box.
/// \param page Page
/// \param box Box
void setPageCropBox(PDFObjectReference page,
@@ -1518,8 +1516,8 @@ public:
PDFObjectReference documentPart);
- /// Sets media box to the page. The media box defines size of physical medium, onto which the page
- /// is to be printed.
+ /// Sets media box to the page. The media box defines size of physical medium, onto which the page is to be
+ /// printed.
/// \param page Page
/// \param box Box
void setPageMediaBox(PDFObjectReference page,
@@ -1561,12 +1559,17 @@ public:
QString reasonText);
- /// This function is used to update trailer dictionary. Must be called each time the final document is
- /// being built.
+ /// This function is used to update trailer dictionary. Must be called each time the final document is being
+ /// built.
/// \param objectCount Number of objects (including empty ones)
void updateTrailerDictionary(PDFInteger objectCount);
+ ///
+ /// \param pageReference Removes page thumbnail.
+ void removePageThumbnail(PDFObjectReference pageReference);
+
+
/* END GENERATED CODE */
private:
diff --git a/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp b/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp
new file mode 100644
index 0000000..e3be937
--- /dev/null
+++ b/Pdf4QtLib/sources/pdfdocumentsanitizer.cpp
@@ -0,0 +1,306 @@
+// Copyright (C) 2023 Jakub Melka
+//
+// This file is part of PDF4QT.
+//
+// PDF4QT is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// with the written consent of the copyright owner, any later version.
+//
+// PDF4QT is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with PDF4QT. If not, see .
+
+#include "pdfdocumentsanitizer.h"
+#include "pdfvisitor.h"
+#include "pdfexecutionpolicy.h"
+#include "pdfoptimizer.h"
+#include "pdfdocumentbuilder.h"
+
+namespace pdf
+{
+
+class PDFRemoveMetadataVisitor : public PDFUpdateObjectVisitor
+{
+public:
+ explicit PDFRemoveMetadataVisitor(const PDFObjectStorage* storage, std::atomic* counter) :
+ PDFUpdateObjectVisitor(storage),
+ m_counter(counter)
+ {
+
+ }
+
+ virtual void visitDictionary(const PDFDictionary* dictionary) override;
+
+private:
+ std::atomic* m_counter;
+};
+
+void PDFRemoveMetadataVisitor::visitDictionary(const PDFDictionary* dictionary)
+{
+ Q_ASSERT(dictionary);
+
+ std::vector entries;
+ entries.reserve(dictionary->getCount());
+
+ for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
+ {
+ dictionary->getValue(i).accept(this);
+ Q_ASSERT(!m_objectStack.empty());
+ if (dictionary->getKey(i) != "Metadata")
+ {
+ entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
+ }
+ else
+ {
+ ++*m_counter;
+ }
+ m_objectStack.pop_back();
+ }
+
+ m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries))));
+}
+
+PDFDocumentSanitizer::PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent) :
+ QObject(parent),
+ m_flags(flags)
+{
+
+}
+
+void PDFDocumentSanitizer::sanitize()
+{
+ Q_EMIT sanitizationStarted();
+
+ if (m_flags.testFlag(DocumentInfo))
+ {
+ performSanitizeDocumentInfo();
+ }
+
+ if (m_flags.testFlag(Metadata))
+ {
+ performSanitizeMetadata();
+ }
+
+ if (m_flags.testFlag(Bookmarks))
+ {
+ performSanitizeBookmarks();
+ }
+
+ if (m_flags.testFlag(FileAttachments))
+ {
+ performSanitizeFileAttachments();
+ }
+
+ if (m_flags.testFlag(EmbeddedSearchIndex))
+ {
+ performSanitizeEmbeddedSearchIndex();
+ }
+
+ if (m_flags.testFlag(MarkupAnnotations))
+ {
+ performSanitizeMarkupAnnotations();
+ }
+
+ if (m_flags.testFlag(PageThumbnails))
+ {
+ performSanitizePageThumbnails();
+ }
+
+ // Optimize - remove unused objects
+ PDFOptimizer optimizer(PDFOptimizer::OptimizationFlags(PDFOptimizer::RemoveUnusedObjects | PDFOptimizer::ShrinkObjectStorage | PDFOptimizer::RemoveNullObjects), nullptr);
+ optimizer.setStorage(m_storage);
+ optimizer.optimize();
+ m_storage = optimizer.takeStorage();
+
+ Q_EMIT sanitizationFinished();
+}
+
+PDFDocumentSanitizer::SanitizationFlags PDFDocumentSanitizer::getFlags() const
+{
+ return m_flags;
+}
+
+void PDFDocumentSanitizer::setFlags(SanitizationFlags flags)
+{
+ m_flags = flags;
+}
+
+void PDFDocumentSanitizer::performSanitizeDocumentInfo()
+{
+ PDFObjectReference emptyDocumentInfoReference = m_storage.addObject(PDFObject());
+
+ PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
+ const bool hasDocumentInfo = builder.getDocumentInfo().isValid();
+ builder.setDocumentInfo(emptyDocumentInfoReference);
+ PDFDocument document = builder.build();
+ m_storage = document.getStorage();
+
+ if (hasDocumentInfo)
+ {
+ Q_EMIT sanitizationProgress(tr("Document info was removed."));
+ }
+}
+
+void PDFDocumentSanitizer::performSanitizeMetadata()
+{
+ std::atomic counter = 0;
+
+ PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
+ auto processEntry = [this, &counter](PDFObjectStorage::Entry& entry)
+ {
+ PDFRemoveMetadataVisitor visitor(&m_storage, &counter);
+ entry.object.accept(&visitor);
+ entry.object = visitor.getObject();
+ };
+
+ PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry);
+ m_storage.setObjects(qMove(objects));
+ Q_EMIT sanitizationProgress(tr("Metadata streams removed: %1").arg(counter));
+}
+
+void PDFDocumentSanitizer::performSanitizeBookmarks()
+{
+ PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
+ PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
+ const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
+ const bool hasOutline = catalogDictionary && catalogDictionary->hasKey("Outlines");
+
+ if (hasOutline)
+ {
+ builder.removeOutline();
+ PDFDocument document = builder.build();
+ m_storage = document.getStorage();
+ Q_EMIT sanitizationProgress(tr("Outline was removed."));
+ }
+}
+
+void PDFDocumentSanitizer::performSanitizeFileAttachments()
+{
+ auto filter = [](const PDFAnnotation* annotation)
+ {
+ return annotation->getType() == AnnotationType::FileAttachment;
+ };
+ removeAnnotations(filter, tr("File attachments removed: %1."));
+}
+
+void PDFDocumentSanitizer::performSanitizeEmbeddedSearchIndex()
+{
+ PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
+ PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
+ const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
+ const bool hasPieceInfo = catalogDictionary && catalogDictionary->hasKey("PieceInfo");
+
+ if (hasPieceInfo)
+ {
+ PDFObject pieceInfoObject = builder.getObject(catalogDictionary->get("PieceInfo"));
+ const PDFDictionary* pieceInfoDictionary = builder.getDictionaryFromObject(pieceInfoObject);
+ if (pieceInfoDictionary->hasKey("SearchIndex"))
+ {
+ PDFDictionary dictionaryCopy = *pieceInfoDictionary;
+ dictionaryCopy.setEntry(PDFInplaceOrMemoryString("SearchIndex"), PDFObject());
+ pieceInfoObject = PDFObject::createDictionary(std::make_shared(qMove(dictionaryCopy)));
+
+ PDFObjectFactory factory;
+ factory.beginDictionary();
+ factory.beginDictionaryItem("PieceInfo");
+ factory << pieceInfoObject;
+ factory.endDictionaryItem();
+ factory.endDictionary();
+ PDFObject newCatalog = factory.takeObject();
+ builder.mergeTo(builder.getCatalogReference(), std::move(newCatalog));
+ PDFDocument document = builder.build();
+ m_storage = document.getStorage();
+ Q_EMIT sanitizationProgress(tr("Search index was removed."));
+ }
+ }
+}
+
+void PDFDocumentSanitizer::performSanitizeMarkupAnnotations()
+{
+ auto filter = [](const PDFAnnotation* annotation)
+ {
+ return annotation->asMarkupAnnotation() != nullptr;
+ };
+ removeAnnotations(filter, tr("Markup annotations removed: %1."));
+}
+
+void PDFDocumentSanitizer::performSanitizePageThumbnails()
+{
+ PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
+ builder.flattenPageTree();
+ std::vector pageReferences = builder.getPages();
+ std::vector pagesWithThumbnail;
+
+ for (const PDFObjectReference& pageReference : pageReferences)
+ {
+ const PDFDictionary* pageDictionary = builder.getDictionaryFromObject(builder.getObjectByReference(pageReference));
+ if (pageDictionary && pageDictionary->hasKey("Thumb"))
+ {
+ pagesWithThumbnail.push_back(pageReference);
+ }
+ }
+
+ if (!pagesWithThumbnail.empty())
+ {
+ for (const auto& pageReference : pagesWithThumbnail)
+ {
+ builder.removePageThumbnail(pageReference);
+ }
+
+ PDFDocument document = builder.build();
+ m_storage = document.getStorage();
+ Q_EMIT sanitizationProgress(tr("Page thumbnails removed: %1.").arg(pagesWithThumbnail.size()));
+ }
+}
+
+void PDFDocumentSanitizer::removeAnnotations(const std::function& filter,
+ QString message)
+{
+ PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
+ builder.flattenPageTree();
+ std::vector pageReferences = builder.getPages();
+ std::vector> annotationsToBeRemoved;
+
+ PDFDocumentDataLoaderDecorator loader(&m_storage);
+ for (const PDFObjectReference pageReference : pageReferences)
+ {
+ const PDFObject& pageObject = m_storage.getObjectByReference(pageReference);
+ const PDFDictionary* pageDictionary = m_storage.getDictionaryFromObject(pageObject);
+
+ if (!pageDictionary)
+ {
+ continue;
+ }
+
+ std::vector annotationReferences = loader.readReferenceArrayFromDictionary(pageDictionary, "Annots");
+ for (const PDFObjectReference& annotationReference : annotationReferences)
+ {
+ PDFAnnotationPtr annotation = PDFAnnotation::parse(&m_storage, annotationReference);
+ if (filter(annotation.get()))
+ {
+ annotationsToBeRemoved.emplace_back(pageReference, annotationReference);
+ }
+ }
+ }
+
+ if (!annotationsToBeRemoved.empty())
+ {
+ for (const auto& item : annotationsToBeRemoved)
+ {
+ const PDFObjectReference pageReference = item.first;
+ const PDFObjectReference annotationReference = item.second;
+ builder.removeAnnotation(pageReference, annotationReference);
+ }
+
+ PDFDocument document = builder.build();
+ m_storage = document.getStorage();
+ Q_EMIT sanitizationProgress(message.arg(annotationsToBeRemoved.size()));
+ }
+}
+
+} // namespace pdf
diff --git a/Pdf4QtLib/sources/pdfdocumentsanitizer.h b/Pdf4QtLib/sources/pdfdocumentsanitizer.h
new file mode 100644
index 0000000..7113385
--- /dev/null
+++ b/Pdf4QtLib/sources/pdfdocumentsanitizer.h
@@ -0,0 +1,99 @@
+// Copyright (C) 2023 Jakub Melka
+//
+// This file is part of PDF4QT.
+//
+// PDF4QT is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// with the written consent of the copyright owner, any later version.
+//
+// PDF4QT is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with PDF4QT. If not, see .
+
+#ifndef PDFDOCUMENTSANITIZER_H
+#define PDFDOCUMENTSANITIZER_H
+
+#include "pdfdocument.h"
+
+namespace pdf
+{
+class PDFAnnotation;
+
+/// Class for sanitizing documents. Can remove sensitive content from the document,
+/// except the content streams. Sanitization is configurable, user can specify,
+/// which content should be removed.
+class PDF4QTLIBSHARED_EXPORT PDFDocumentSanitizer : public QObject
+{
+ Q_OBJECT
+
+public:
+
+ enum SanitizationFlag
+ {
+ None = 0x0000, ///< No sanitization is performed
+ DocumentInfo = 0x0001, ///< Remove document information
+ Metadata = 0x0002, ///< Remove all metadata streams in all objects
+ Bookmarks = 0x0004, ///< Remove bookmarks
+ FileAttachments = 0x0008, ///< Remove file attachments
+ EmbeddedSearchIndex = 0x0010, ///< Remove embedded search index
+ MarkupAnnotations = 0x0020, ///< Remove markup annotations from all pages
+ PageThumbnails = 0x0040, ///< Remove page thumbnails
+ All = 0xFFFF, ///< All sanitization turned on
+ };
+ Q_DECLARE_FLAGS(SanitizationFlags, SanitizationFlag)
+
+ explicit PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent);
+
+ /// Set document, which should be sanitized
+ /// \param document Document to be sanitized
+ void setDocument(const PDFDocument* document) { setStorage(document->getStorage()); }
+
+ /// Set storage directly (storage must be valid and filled with objects)
+ /// \param storage Storage
+ void setStorage(const PDFObjectStorage& storage) { m_storage = storage; }
+
+ /// Perform document sanitization. During optimization process, various
+ /// signals are emitted to view progress.
+ void sanitize();
+
+ /// Returns object storage used for optimization
+ const PDFObjectStorage& getStorage() const { return m_storage; }
+
+ /// Returns object storage by move semantics, old object storage is destroyed
+ PDFObjectStorage takeStorage() { return qMove(m_storage); }
+
+ /// Returns sanitized document. Object storage is cleared after
+ /// this function call.
+ PDFDocument takeSanitizedDocument() { return PDFDocument(qMove(m_storage), PDFVersion(2, 0)); }
+
+ SanitizationFlags getFlags() const;
+ void setFlags(SanitizationFlags flags);
+
+signals:
+ void sanitizationStarted();
+ void sanitizationProgress(QString progressText);
+ void sanitizationFinished();
+
+private:
+ void performSanitizeDocumentInfo();
+ void performSanitizeMetadata();
+ void performSanitizeBookmarks();
+ void performSanitizeFileAttachments();
+ void performSanitizeEmbeddedSearchIndex();
+ void performSanitizeMarkupAnnotations();
+ void performSanitizePageThumbnails();
+
+ void removeAnnotations(const std::function& filter, QString message);
+
+ SanitizationFlags m_flags;
+ PDFObjectStorage m_storage;
+};
+
+} // namespace pdf
+
+#endif // PDFDOCUMENTSANITIZER_H
diff --git a/Pdf4QtLib/sources/pdfoptimizer.cpp b/Pdf4QtLib/sources/pdfoptimizer.cpp
index 7331148..0a0b342 100644
--- a/Pdf4QtLib/sources/pdfoptimizer.cpp
+++ b/Pdf4QtLib/sources/pdfoptimizer.cpp
@@ -28,120 +28,6 @@
namespace pdf
{
-class PDFUpdateObjectVisitor : public PDFAbstractVisitor
-{
-public:
- explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
- m_storage(storage)
- {
- m_objectStack.reserve(32);
- }
-
- virtual void visitNull() override;
- virtual void visitBool(bool value) override;
- virtual void visitInt(PDFInteger value) override;
- virtual void visitReal(PDFReal value) override;
- virtual void visitString(PDFStringRef string) override;
- virtual void visitName(PDFStringRef name) override;
- virtual void visitArray(const PDFArray* array) override;
- virtual void visitDictionary(const PDFDictionary* dictionary) override;
- virtual void visitStream(const PDFStream* stream) override;
- virtual void visitReference(const PDFObjectReference reference) override;
-
- PDFObject getObject();
-
-protected:
- const PDFObjectStorage* m_storage;
- std::vector m_objectStack;
-};
-
-void PDFUpdateObjectVisitor::visitNull()
-{
- m_objectStack.push_back(PDFObject::createNull());
-}
-
-void PDFUpdateObjectVisitor::visitBool(bool value)
-{
- m_objectStack.push_back(PDFObject::createBool(value));
-}
-
-void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
-{
- m_objectStack.push_back(PDFObject::createInteger(value));
-}
-
-void PDFUpdateObjectVisitor::visitReal(PDFReal value)
-{
- m_objectStack.push_back(PDFObject::createReal(value));
-}
-
-void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
-{
- m_objectStack.push_back(PDFObject::createString(string));
-}
-
-void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
-{
- m_objectStack.push_back(PDFObject::createName(name));
-}
-
-void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
-{
- acceptArray(array);
-
- // We have all objects on the stack
- Q_ASSERT(array->getCount() <= m_objectStack.size());
-
- auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
- std::vector objects(it, m_objectStack.cend());
- PDFObject object = PDFObject::createArray(std::make_shared(qMove(objects)));
- m_objectStack.erase(it, m_objectStack.cend());
- m_objectStack.push_back(object);
-}
-
-void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
-{
- Q_ASSERT(dictionary);
-
- std::vector entries;
- entries.reserve(dictionary->getCount());
-
- for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
- {
- dictionary->getValue(i).accept(this);
- Q_ASSERT(!m_objectStack.empty());
- entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
- m_objectStack.pop_back();
- }
-
- m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries))));
-}
-
-void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
-{
- const PDFDictionary* dictionary = stream->getDictionary();
-
- visitDictionary(dictionary);
-
- Q_ASSERT(!m_objectStack.empty());
- PDFObject dictionaryObject = m_objectStack.back();
- m_objectStack.pop_back();
-
- PDFDictionary newDictionary(*dictionaryObject.getDictionary());
- m_objectStack.push_back(PDFObject::createStream(std::make_shared(qMove(newDictionary), QByteArray(*stream->getContent()))));
-}
-
-void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
-{
- m_objectStack.push_back(PDFObject::createReference(reference));
-}
-
-PDFObject PDFUpdateObjectVisitor::getObject()
-{
- Q_ASSERT(m_objectStack.size() == 1);
- return qMove(m_objectStack.back());
-}
-
class PDFRemoveSimpleObjectsVisitor : public PDFUpdateObjectVisitor
{
public:
diff --git a/Pdf4QtLib/sources/pdfvisitor.cpp b/Pdf4QtLib/sources/pdfvisitor.cpp
index 168b063..925e289 100644
--- a/Pdf4QtLib/sources/pdfvisitor.cpp
+++ b/Pdf4QtLib/sources/pdfvisitor.cpp
@@ -192,4 +192,91 @@ void PDFStatisticsCollector::collectStatisticsOfSimpleObject(PDFObject::Type typ
statistics.memoryConsumptionEstimate += sizeof(PDFObject);
}
+void PDFUpdateObjectVisitor::visitNull()
+{
+ m_objectStack.push_back(PDFObject::createNull());
+}
+
+void PDFUpdateObjectVisitor::visitBool(bool value)
+{
+ m_objectStack.push_back(PDFObject::createBool(value));
+}
+
+void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
+{
+ m_objectStack.push_back(PDFObject::createInteger(value));
+}
+
+void PDFUpdateObjectVisitor::visitReal(PDFReal value)
+{
+ m_objectStack.push_back(PDFObject::createReal(value));
+}
+
+void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
+{
+ m_objectStack.push_back(PDFObject::createString(string));
+}
+
+void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
+{
+ m_objectStack.push_back(PDFObject::createName(name));
+}
+
+void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
+{
+ acceptArray(array);
+
+ // We have all objects on the stack
+ Q_ASSERT(array->getCount() <= m_objectStack.size());
+
+ auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
+ std::vector objects(it, m_objectStack.cend());
+ PDFObject object = PDFObject::createArray(std::make_shared(qMove(objects)));
+ m_objectStack.erase(it, m_objectStack.cend());
+ m_objectStack.push_back(object);
+}
+
+void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
+{
+ Q_ASSERT(dictionary);
+
+ std::vector entries;
+ entries.reserve(dictionary->getCount());
+
+ for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
+ {
+ dictionary->getValue(i).accept(this);
+ Q_ASSERT(!m_objectStack.empty());
+ entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
+ m_objectStack.pop_back();
+ }
+
+ m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries))));
+}
+
+void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
+{
+ const PDFDictionary* dictionary = stream->getDictionary();
+
+ visitDictionary(dictionary);
+
+ Q_ASSERT(!m_objectStack.empty());
+ PDFObject dictionaryObject = m_objectStack.back();
+ m_objectStack.pop_back();
+
+ PDFDictionary newDictionary(*dictionaryObject.getDictionary());
+ m_objectStack.push_back(PDFObject::createStream(std::make_shared(qMove(newDictionary), QByteArray(*stream->getContent()))));
+}
+
+void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
+{
+ m_objectStack.push_back(PDFObject::createReference(reference));
+}
+
+PDFObject PDFUpdateObjectVisitor::getObject()
+{
+ Q_ASSERT(m_objectStack.size() == 1);
+ return qMove(m_objectStack.back());
+}
+
} // namespace pdf
diff --git a/Pdf4QtLib/sources/pdfvisitor.h b/Pdf4QtLib/sources/pdfvisitor.h
index 8d76a2f..56ee23c 100644
--- a/Pdf4QtLib/sources/pdfvisitor.h
+++ b/Pdf4QtLib/sources/pdfvisitor.h
@@ -206,6 +206,33 @@ struct PDFApplyVisitorImpl
}
};
+class PDFUpdateObjectVisitor : public PDFAbstractVisitor
+{
+public:
+ explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
+ m_storage(storage)
+ {
+ m_objectStack.reserve(32);
+ }
+
+ virtual void visitNull() override;
+ virtual void visitBool(bool value) override;
+ virtual void visitInt(PDFInteger value) override;
+ virtual void visitReal(PDFReal value) override;
+ virtual void visitString(PDFStringRef string) override;
+ virtual void visitName(PDFStringRef name) override;
+ virtual void visitArray(const PDFArray* array) override;
+ virtual void visitDictionary(const PDFDictionary* dictionary) override;
+ virtual void visitStream(const PDFStream* stream) override;
+ virtual void visitReference(const PDFObjectReference reference) override;
+
+ PDFObject getObject();
+
+protected:
+ const PDFObjectStorage* m_storage;
+ std::vector m_objectStack;
+};
+
} // namespace pdf
#endif // PDFVISITOR_H
diff --git a/Pdf4QtViewer/CMakeLists.txt b/Pdf4QtViewer/CMakeLists.txt
index 29862e4..386afd0 100644
--- a/Pdf4QtViewer/CMakeLists.txt
+++ b/Pdf4QtViewer/CMakeLists.txt
@@ -44,6 +44,9 @@ add_library(Pdf4QtViewer SHARED
pdfviewermainwindow.ui
pdfviewermainwindowlite.ui
pdfviewersettingsdialog.ui
+ pdfsanitizedocumentdialog.ui
+ pdfsanitizedocumentdialog.cpp
+ pdfsanitizedocumentdialog.h
pdf4qtviewer.qrc
)
diff --git a/Pdf4QtViewer/pdfprogramcontroller.cpp b/Pdf4QtViewer/pdfprogramcontroller.cpp
index 1725cdb..20560fc 100644
--- a/Pdf4QtViewer/pdfprogramcontroller.cpp
+++ b/Pdf4QtViewer/pdfprogramcontroller.cpp
@@ -32,6 +32,7 @@
#include "pdfundoredomanager.h"
#include "pdfrendertoimagesdialog.h"
#include "pdfoptimizedocumentdialog.h"
+#include "pdfsanitizedocumentdialog.h"
#include "pdfviewersettingsdialog.h"
#include "pdfaboutdialog.h"
#include "pdfrenderingerrorswidget.h"
@@ -447,6 +448,10 @@ void PDFProgramController::initialize(Features features,
{
connect(action, &QAction::triggered, this, &PDFProgramController::onActionOptimizeTriggered);
}
+ if (QAction* action = m_actionManager->getAction(PDFActionManager::Sanitize))
+ {
+ connect(action, &QAction::triggered, this, &PDFProgramController::onActionSanitizeTriggered);
+ }
if (QAction* action = m_actionManager->getAction(PDFActionManager::Encryption))
{
connect(action, &QAction::triggered, this, &PDFProgramController::onActionEncryptionTriggered);
@@ -1173,6 +1178,18 @@ void PDFProgramController::onActionOptimizeTriggered()
}
}
+void PDFProgramController::onActionSanitizeTriggered()
+{
+ PDFSanitizeDocumentDialog dialog(m_pdfDocument.data(), m_mainWindow);
+
+ if (dialog.exec() == QDialog::Accepted)
+ {
+ pdf::PDFDocumentPointer pointer(new pdf::PDFDocument(dialog.takeSanitizedDocument()));
+ pdf::PDFModifiedDocument document(qMove(pointer), m_optionalContentActivity, pdf::PDFModifiedDocument::Reset);
+ onDocumentModified(qMove(document));
+ }
+}
+
void PDFProgramController::onActionEncryptionTriggered()
{
auto queryPassword = [this](bool* ok)
@@ -1492,6 +1509,7 @@ void PDFProgramController::updateActionsAvailability()
m_actionManager->setEnabled(PDFActionManager::Print, hasValidDocument && canPrint);
m_actionManager->setEnabled(PDFActionManager::RenderToImages, hasValidDocument && canPrint);
m_actionManager->setEnabled(PDFActionManager::Optimize, hasValidDocument);
+ m_actionManager->setEnabled(PDFActionManager::Sanitize, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::Encryption, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::Save, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::SaveAs, hasValidDocument);
diff --git a/Pdf4QtViewer/pdfprogramcontroller.h b/Pdf4QtViewer/pdfprogramcontroller.h
index 319cee6..9572ce1 100644
--- a/Pdf4QtViewer/pdfprogramcontroller.h
+++ b/Pdf4QtViewer/pdfprogramcontroller.h
@@ -105,6 +105,7 @@ public:
SendByMail,
RenderToImages,
Optimize,
+ Sanitize,
Encryption,
FitPage,
FitWidth,
@@ -327,6 +328,7 @@ private:
void onActionSendByEMailTriggered();
void onActionRenderToImagesTriggered();
void onActionOptimizeTriggered();
+ void onActionSanitizeTriggered();
void onActionEncryptionTriggered();
void onActionFitPageTriggered();
void onActionFitWidthTriggered();
diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp b/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp
new file mode 100644
index 0000000..93e34e3
--- /dev/null
+++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Jakub Melka
+//
+// This file is part of PDF4QT.
+//
+// PDF4QT is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// with the written consent of the copyright owner, any later version.
+//
+// PDF4QT is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with PDF4QT. If not, see .
+
+#include "pdfsanitizedocumentdialog.h"
+#include "ui_pdfsanitizedocumentdialog.h"
+
+#include "pdfwidgetutils.h"
+#include "pdfdocumentwriter.h"
+#include "pdfdbgheap.h"
+
+#include
+#include
+#include
+#include
+
+namespace pdfviewer
+{
+
+PDFSanitizeDocumentDialog::PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent) :
+ QDialog(parent),
+ ui(new Ui::PDFSanitizeDocumentDialog),
+ m_document(document),
+ m_sanitizer(pdf::PDFDocumentSanitizer::All, nullptr),
+ m_sanitizeButton(nullptr),
+ m_sanitizationInProgress(false),
+ m_wasSanitized(false)
+{
+ ui->setupUi(this);
+
+ auto addCheckBox = [this](QString text, pdf::PDFDocumentSanitizer::SanitizationFlag flag)
+ {
+ QCheckBox* checkBox = new QCheckBox(text, this);
+ checkBox->setChecked(m_sanitizer.getFlags().testFlag(flag));
+ connect(checkBox, &QCheckBox::clicked, this, [this, flag](bool checked) { m_sanitizer.setFlags(m_sanitizer.getFlags().setFlag(flag, checked)); });
+ ui->groupBoxLayout->addWidget(checkBox);
+ };
+
+ addCheckBox(tr("Remove document info"), pdf::PDFDocumentSanitizer::DocumentInfo);
+ addCheckBox(tr("Remove all metadata"), pdf::PDFDocumentSanitizer::Metadata);
+ addCheckBox(tr("Remove outline (bookmarks)"), pdf::PDFDocumentSanitizer::Bookmarks);
+ addCheckBox(tr("Remove file attachments"), pdf::PDFDocumentSanitizer::FileAttachments);
+ addCheckBox(tr("Remove embedded search index"), pdf::PDFDocumentSanitizer::EmbeddedSearchIndex);
+ addCheckBox(tr("Remove comments and other markup annotations"), pdf::PDFDocumentSanitizer::MarkupAnnotations);
+ addCheckBox(tr("Remove page thumbnails"), pdf::PDFDocumentSanitizer::PageThumbnails);
+
+ m_sanitizeButton = ui->buttonBox->addButton(tr("Sanitize"), QDialogButtonBox::ActionRole);
+
+ connect(m_sanitizeButton, &QPushButton::clicked, this, &PDFSanitizeDocumentDialog::onSanitizeButtonClicked);
+ connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationStarted, this, &PDFSanitizeDocumentDialog::onSanitizationStarted);
+ connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationProgress, this, &PDFSanitizeDocumentDialog::onSanitizationProgress);
+ connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationFinished, this, &PDFSanitizeDocumentDialog::onSanitizationFinished);
+ connect(this, &PDFSanitizeDocumentDialog::displaySanitizationInfo, this, &PDFSanitizeDocumentDialog::onDisplaySanitizationInfo);
+
+ pdf::PDFWidgetUtils::scaleWidget(this, QSize(640, 380));
+ updateUi();
+ pdf::PDFWidgetUtils::style(this);
+}
+
+PDFSanitizeDocumentDialog::~PDFSanitizeDocumentDialog()
+{
+ Q_ASSERT(!m_sanitizationInProgress);
+ Q_ASSERT(!m_future.isRunning());
+
+ delete ui;
+}
+
+void PDFSanitizeDocumentDialog::sanitize()
+{
+ QElapsedTimer timer;
+ timer.start();
+
+ m_sanitizer.setDocument(m_document);
+ m_sanitizer.sanitize();
+ m_sanitizedDocument = m_sanitizer.takeSanitizedDocument();
+
+ qreal msecsElapsed = timer.nsecsElapsed() / 1000000.0;
+ timer.invalidate();
+
+ m_sanitizationInfo.msecsElapsed = msecsElapsed;
+ m_sanitizationInfo.bytesBeforeSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(m_document);
+ m_sanitizationInfo.bytesAfterSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(&m_sanitizedDocument);
+ Q_EMIT displaySanitizationInfo();
+}
+
+void PDFSanitizeDocumentDialog::onSanitizeButtonClicked()
+{
+ Q_ASSERT(!m_sanitizationInProgress);
+ Q_ASSERT(!m_future.isRunning());
+
+ m_sanitizationInProgress = true;
+ m_future = QtConcurrent::run([this]() { sanitize(); });
+ updateUi();
+}
+
+void PDFSanitizeDocumentDialog::onSanitizationStarted()
+{
+ Q_ASSERT(m_sanitizationInProgress);
+ ui->logTextEdit->setPlainText(tr("Sanitization started!"));
+}
+
+void PDFSanitizeDocumentDialog::onSanitizationProgress(QString progressText)
+{
+ Q_ASSERT(m_sanitizationInProgress);
+ ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), progressText));
+}
+
+void PDFSanitizeDocumentDialog::onSanitizationFinished()
+{
+ ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), tr("Sanitization finished!")));
+ m_future.waitForFinished();
+ m_sanitizationInProgress = false;
+ m_wasSanitized = true;
+ updateUi();
+}
+
+void PDFSanitizeDocumentDialog::onDisplaySanitizationInfo()
+{
+ QStringList texts;
+ texts << tr("Sanitized in %1 msecs").arg(m_sanitizationInfo.msecsElapsed);
+ if (m_sanitizationInfo.bytesBeforeSanitization != -1 &&
+ m_sanitizationInfo.bytesAfterSanitization != -1)
+ {
+ texts << tr("Bytes before sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization);
+ texts << tr("Bytes after sanitization: %1").arg(m_sanitizationInfo.bytesAfterSanitization);
+ texts << tr("Bytes saved by sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization - m_sanitizationInfo.bytesAfterSanitization);
+
+ qreal ratio = 100.0;
+ if (m_sanitizationInfo.bytesBeforeSanitization > 0)
+ {
+ ratio = 100.0 * qreal(m_sanitizationInfo.bytesAfterSanitization) / qreal(m_sanitizationInfo.bytesBeforeSanitization);
+ }
+
+ texts << tr("Compression ratio: %1 %").arg(ratio);
+ }
+ ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), texts.join("\n")));
+}
+
+void PDFSanitizeDocumentDialog::updateUi()
+{
+ for (QCheckBox* checkBox : findChildren(QString(), Qt::FindChildrenRecursively))
+ {
+ checkBox->setEnabled(!m_sanitizationInProgress);
+ }
+
+ ui->buttonBox->button(QDialogButtonBox::Ok)->setEnabled(m_wasSanitized && !m_sanitizationInProgress);
+ ui->buttonBox->button(QDialogButtonBox::Cancel)->setEnabled(!m_sanitizationInProgress);
+ m_sanitizeButton->setEnabled(!m_sanitizationInProgress);
+}
+
+} // namespace pdfviewer
diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.h b/Pdf4QtViewer/pdfsanitizedocumentdialog.h
new file mode 100644
index 0000000..c85623c
--- /dev/null
+++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Jakub Melka
+//
+// This file is part of PDF4QT.
+//
+// PDF4QT is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// with the written consent of the copyright owner, any later version.
+//
+// PDF4QT is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with PDF4QT. If not, see .
+
+#ifndef PDFSANITIZEDOCUMENTDIALOG_H
+#define PDFSANITIZEDOCUMENTDIALOG_H
+
+#include "pdfdocumentsanitizer.h"
+
+#include
+#include
+
+namespace Ui
+{
+class PDFSanitizeDocumentDialog;
+}
+
+namespace pdfviewer
+{
+
+class PDFSanitizeDocumentDialog : public QDialog
+{
+ Q_OBJECT
+
+public:
+ explicit PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent);
+ virtual ~PDFSanitizeDocumentDialog() override;
+
+ pdf::PDFDocument takeSanitizedDocument() { return qMove(m_sanitizedDocument); }
+
+signals:
+ void displaySanitizationInfo();
+
+private:
+ void sanitize();
+ void onSanitizeButtonClicked();
+ void onSanitizationStarted();
+ void onSanitizationProgress(QString progressText);
+ void onSanitizationFinished();
+ void onDisplaySanitizationInfo();
+
+ void updateUi();
+
+ struct SanitizationInfo
+ {
+ qreal msecsElapsed = 0.0;
+ qint64 bytesBeforeSanitization = -1;
+ qint64 bytesAfterSanitization = -1;
+ };
+
+ Ui::PDFSanitizeDocumentDialog* ui;
+ const pdf::PDFDocument* m_document;
+ pdf::PDFDocumentSanitizer m_sanitizer;
+ QPushButton* m_sanitizeButton;
+ bool m_sanitizationInProgress;
+ bool m_wasSanitized;
+ QFuture m_future;
+ pdf::PDFDocument m_sanitizedDocument;
+ SanitizationInfo m_sanitizationInfo;
+};
+
+} // namespace pdfviewer
+
+#endif // PDFSANITIZEDOCUMENTDIALOG_H
diff --git a/Pdf4QtViewer/pdfsanitizedocumentdialog.ui b/Pdf4QtViewer/pdfsanitizedocumentdialog.ui
new file mode 100644
index 0000000..f2d8e0e
--- /dev/null
+++ b/Pdf4QtViewer/pdfsanitizedocumentdialog.ui
@@ -0,0 +1,82 @@
+
+
+ PDFSanitizeDocumentDialog
+
+
+
+ 0
+ 0
+ 741
+ 530
+
+
+
+ Document sanitization
+
+
+ -
+
+
+ Sanitization Settings
+
+
+
+
+ -
+
+
+ false
+
+
+ true
+
+
+
+ -
+
+
+ Qt::Horizontal
+
+
+ QDialogButtonBox::Cancel|QDialogButtonBox::Ok
+
+
+
+
+
+
+
+
+ buttonBox
+ accepted()
+ PDFSanitizeDocumentDialog
+ accept()
+
+
+ 248
+ 254
+
+
+ 157
+ 274
+
+
+
+
+ buttonBox
+ rejected()
+ PDFSanitizeDocumentDialog
+ reject()
+
+
+ 316
+ 260
+
+
+ 286
+ 274
+
+
+
+
+
diff --git a/Pdf4QtViewer/pdfviewermainwindow.cpp b/Pdf4QtViewer/pdfviewermainwindow.cpp
index 39bbbdf..760ebe7 100644
--- a/Pdf4QtViewer/pdfviewermainwindow.cpp
+++ b/Pdf4QtViewer/pdfviewermainwindow.cpp
@@ -24,7 +24,6 @@
#include "pdfviewersettingsdialog.h"
#include "pdfdocumentpropertiesdialog.h"
#include "pdfrendertoimagesdialog.h"
-#include "pdfoptimizedocumentdialog.h"
#include "pdfdbgheap.h"
#include "pdfdocumentreader.h"
@@ -161,6 +160,7 @@ PDFViewerMainWindow::PDFViewerMainWindow(QWidget* parent) :
m_actionManager->setAction(PDFActionManager::SendByMail, ui->actionSend_by_E_Mail);
m_actionManager->setAction(PDFActionManager::RenderToImages, ui->actionRender_to_Images);
m_actionManager->setAction(PDFActionManager::Optimize, ui->actionOptimize);
+ m_actionManager->setAction(PDFActionManager::Sanitize, ui->actionSanitize);
m_actionManager->setAction(PDFActionManager::Encryption, ui->actionEncryption);
m_actionManager->setAction(PDFActionManager::FitPage, ui->actionFitPage);
m_actionManager->setAction(PDFActionManager::FitWidth, ui->actionFitWidth);
diff --git a/Pdf4QtViewer/pdfviewermainwindow.ui b/Pdf4QtViewer/pdfviewermainwindow.ui
index 1a051b9..4888098 100644
--- a/Pdf4QtViewer/pdfviewermainwindow.ui
+++ b/Pdf4QtViewer/pdfviewermainwindow.ui
@@ -20,7 +20,7 @@
0
0
770
- 37
+ 21
diff --git a/Pdf4QtViewer/pdfviewermainwindowlite.cpp b/Pdf4QtViewer/pdfviewermainwindowlite.cpp
index 5f21f21..b33625b 100644
--- a/Pdf4QtViewer/pdfviewermainwindowlite.cpp
+++ b/Pdf4QtViewer/pdfviewermainwindowlite.cpp
@@ -24,7 +24,6 @@
#include "pdfviewersettingsdialog.h"
#include "pdfdocumentpropertiesdialog.h"
#include "pdfrendertoimagesdialog.h"
-#include "pdfoptimizedocumentdialog.h"
#include "pdfdbgheap.h"
#include "pdfdocumentreader.h"
diff --git a/RELEASES.txt b/RELEASES.txt
index 115f8ab..0d8026e 100644
--- a/RELEASES.txt
+++ b/RELEASES.txt
@@ -1,4 +1,5 @@
CURRENT:
+ - Issue #40: Sanitization of documents
V: 1.3.2 1.2.2023
- Issue #39: Code signed installation
diff --git a/generated_code_definition.xml b/generated_code_definition.xml
index fa6ec83..9a9ae5c 100644
--- a/generated_code_definition.xml
+++ b/generated_code_definition.xml
@@ -12163,5 +12163,62 @@ updateDocumentInfo(qMove(updatedInfoDictionary));
This function is used to update trailer dictionary. Must be called each time the final document is being built.
_void
+
+
+
+
+
+
+
+
+
+ pageReference
+ _PDFObjectReference
+ Removes page thumbnail.
+
+
+ Parameters
+
+ _void
+
+
+
+
+
+
+
+
+
+
+
+ Thumb
+ DictionaryItemSimple
+ PDFObject()
+
+
+
+ Dictionary
+
+
+
+ CreateObject
+ updatedPageObject
+ _PDFObject
+
+
+
+
+
+ Code
+
+ _void
+ mergeTo(pageReference, updatedPageObject);
+
+
+ Structure
+ removePageThumbnail
+
+ _void
+