Issue #40: Sanitize document

This commit is contained in:
Jakub Melka 2023-02-19 18:36:46 +01:00
parent 4a603c80c0
commit 361ee247e5
19 changed files with 1089 additions and 243 deletions

View File

@ -98,6 +98,8 @@ add_library(Pdf4QtLib SHARED
sources/pdfrenderingerrorswidget.ui
sources/pdfselectpagesdialog.ui
sources/pdfobjecteditorwidget_impl.h
sources/pdfdocumentsanitizer.h
sources/pdfdocumentsanitizer.cpp
cmaps.qrc
)

View File

@ -669,6 +669,13 @@ PDFDocumentBuilder::PDFDocumentBuilder(const PDFDocument* document) :
}
PDFDocumentBuilder::PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version) :
m_storage(storage),
m_version(version)
{
}
void PDFDocumentBuilder::reset()
{
*this = PDFDocumentBuilder();
@ -5414,6 +5421,20 @@ void PDFDocumentBuilder::updateTrailerDictionary(PDFInteger objectCount)
}
void PDFDocumentBuilder::removePageThumbnail(PDFObjectReference pageReference)
{
PDFObjectFactory objectBuilder;
objectBuilder.beginDictionary();
objectBuilder.beginDictionaryItem("Thumb");
objectBuilder << PDFObject();
objectBuilder.endDictionaryItem();
objectBuilder.endDictionary();
PDFObject updatedPageObject = objectBuilder.takeObject();
mergeTo(pageReference, updatedPageObject);
}
/* END GENERATED CODE */
} // namespace pdf

View File

@ -323,6 +323,9 @@ public:
/// Creates a new document as modification of old document
explicit PDFDocumentBuilder(const PDFDocument* document);
/// Creates a new document from storage
explicit PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version);
/// Resets the object to the initial state.
/// \warning All data are lost
void reset();
@ -491,8 +494,8 @@ public:
PDFObjectReference createActionGoToDocumentPart(PDFObjectReference documentPart);
/// Creates embedded GoTo action. When executed, action points to destination in another document,
/// which is embedded in this document.
/// Creates embedded GoTo action. When executed, action points to destination in another document, which
/// is embedded in this document.
/// \param fileSpecification File specification
/// \param destination Destination in a embedded document
/// \param newWindow Open document in new window
@ -541,8 +544,8 @@ public:
bool newWindow);
/// Creates launch action. Launch action executes document opening or printing. This variant for
/// Windows operating system, where additional parameters can be specified.
/// Creates launch action. Launch action executes document opening or printing. This variant for Windows
/// operating system, where additional parameters can be specified.
/// \param fileName File name
/// \param defaultDirectory Default directory
/// \param action Action to be performed. Valid values are 'open' or 'print'.
@ -555,8 +558,8 @@ public:
bool newWindow);
/// Creates named action. Named actions are some predefined actions that interactive PDF processor
/// shall support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
/// Creates named action. Named actions are some predefined actions that interactive PDF processor shall
/// support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
/// \param name Predefined name
PDFObjectReference createActionNamed(QByteArray name);
@ -581,8 +584,7 @@ public:
PDFObjectReference createActionResetForm();
/// Creates reset interactive form action, which resets all fields except those specified in a given list of
/// fields.
/// Creates reset interactive form action, which resets all fields except those specified in a given list of fields.
/// \param fields Fields to be excluded from reset
PDFObjectReference createActionResetFormExcludedFields(PDFObjectReferenceVector fields);
@ -641,9 +643,8 @@ public:
PDFObjectReference createActionURI(QString URL);
/// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer
/// reviews the document, and he wants to mark, that some text should be inserted, he uses this
/// annotation).
/// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer reviews the
/// document, and he wants to mark, that some text should be inserted, he uses this annotation).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is caret displayed
/// \param borderWidth Border width
@ -660,15 +661,15 @@ public:
QString contents);
/// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with
/// border width. Popup annotation can be attached to this annotation.
/// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with border
/// width. Popup annotation can be attached to this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is circle/ellipse displayed
/// \param borderWidth Width of the border line of circle/ellipse
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
/// use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
/// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@ -697,15 +698,15 @@ public:
QString description);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee. Specify
/// start/end point parameters of this function to get callout line.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
/// parameters of this function to get callout line.
/// \param page Page to which is annotation added
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
/// callout line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
/// match bounding rectangle.
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
/// line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
/// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@ -729,9 +730,9 @@ public:
AnnotationLineEnding endLineType);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is text displayed
/// \param title Title
@ -746,15 +747,15 @@ public:
TextAlignment textAlignment);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee. Specify
/// start/end point parameters of this function to get callout line.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
/// parameters of this function to get callout line.
/// \param page Page to which is annotation added
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
/// callout line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
/// match bounding rectangle.
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
/// line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
/// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@ -776,9 +777,9 @@ public:
AnnotationLineEnding endLineType);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@ -793,9 +794,9 @@ public:
QString contents);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@ -804,9 +805,9 @@ public:
QColor color);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is highlight displayed
/// \param color Color
@ -849,9 +850,9 @@ public:
QString contents);
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with
/// text. Line annotations are markup annotations, so they can have popup window. Line endings can
/// be specified.
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
/// Line annotations are markup annotations, so they can have popup window. Line endings can be
/// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@ -878,9 +879,9 @@ public:
AnnotationLineEnding endLineType);
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with
/// text. Line annotations are markup annotations, so they can have popup window. Line endings can
/// be specified.
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
/// Line annotations are markup annotations, so they can have popup window. Line endings can be
/// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@ -893,12 +894,12 @@ public:
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
/// \param startLineType Start line ending type
/// \param endLineType End line ending type
/// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of
/// the line perpendicular to the line itself. Value can be either positive, negative or zero. If
/// positive, then extension is in plane that is above the annotation line (in clockwise order),
/// if negative, then it is below the annotation line.
/// \param leaderLineOffset Length of leader line offset, which is the amount of empty space
/// between the endpoints of the annotation and beginning of leader lines
/// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of the line
/// perpendicular to the line itself. Value can be either positive, negative or zero. If positive, then
/// extension is in plane that is above the annotation line (in clockwise order), if negative, then it is
/// below the annotation line.
/// \param leaderLineOffset Length of leader line offset, which is the amount of empty space between the
/// endpoints of the annotation and beginning of leader lines
/// \param leaderLineExtension Length of leader line extension, which extends leader lines in 180°
/// direction from leader lines (so leader lines continues above drawn line)
/// \param displayContents Display contents of the annotation as text along the line
@ -922,9 +923,9 @@ public:
bool displayedContentsTopAlign);
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
/// action, which can be executed, for example, link can be also in the PDF document (link to some
/// location in document).
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
/// which can be executed, for example, link can be also in the PDF document (link to some location in
/// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param URL URL to be launched when user clicks on the link
@ -935,9 +936,9 @@ public:
LinkHighlightMode highlightMode);
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
/// action, which can be executed, for example, link can be also in the PDF document (link to some
/// location in document).
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
/// which can be executed, for example, link can be also in the PDF document (link to some location in
/// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param action Action to be performed when user clicks on a link
@ -948,9 +949,9 @@ public:
LinkHighlightMode highlightMode);
/// Polygon annotation. When opened, they display pop-up window containing the text of associated
/// note (and window title), if popup annotation is attached. Polygon border/fill color can be defined,
/// along with border width.
/// Polygon annotation. When opened, they display pop-up window containing the text of associated note
/// (and window title), if popup annotation is attached. Polygon border/fill color can be defined, along with
/// border width.
/// \param page Page to which is annotation added
/// \param polygon Polygon
/// \param borderWidth Border line width
@ -969,9 +970,9 @@ public:
QString contents);
/// Polyline annotation. When opened, they display pop-up window containing the text of associated
/// note (and window title), if popup annotation is attached. Polyline border/fill color can be defined,
/// along with border width.
/// Polyline annotation. When opened, they display pop-up window containing the text of associated note
/// (and window title), if popup annotation is attached. Polyline border/fill color can be defined, along with
/// border width.
/// \param page Page to which is annotation added
/// \param polyline Polyline
/// \param borderWidth Border line width
@ -995,9 +996,9 @@ public:
/// Creates a new popup annotation on the page. Popup annotation is represented usually by floating
/// window, which can be opened, or closed. Popup annotation is associated with parent annotation,
/// which can be usually markup annotation. Popup annotation displays parent annotation's texts, for
/// example, title, comment, date etc.
/// window, which can be opened, or closed. Popup annotation is associated with parent annotation, which
/// can be usually markup annotation. Popup annotation displays parent annotation's texts, for example,
/// title, comment, date etc.
/// \param page Page to which is annotation added
/// \param parentAnnotation Parent annotation (for which is popup window displayed)
/// \param rectangle Area on the page, where popup window appears
@ -1026,16 +1027,16 @@ public:
QColor color);
/// Square annotation displays rectangle (or square). When opened, they display pop-up window
/// containing the text of associated note (and window title), if popup annotation is attached. Square
/// border/fill color can be defined, along with border width.
/// Square annotation displays rectangle (or square). When opened, they display pop-up window containing
/// the text of associated note (and window title), if popup annotation is attached. Square border/fill color
/// can be defined, along with border width.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is rectangle displayed
/// \param borderWidth Width of the border line of rectangle
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
/// use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
/// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@ -1049,8 +1050,8 @@ public:
QString contents);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1059,8 +1060,8 @@ public:
QColor color);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@ -1069,8 +1070,8 @@ public:
QColor color);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1100,8 +1101,8 @@ public:
QString contents);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1116,8 +1117,8 @@ public:
QString contents);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1126,8 +1127,8 @@ public:
QColor color);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@ -1136,11 +1137,10 @@ public:
QColor color);
/// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document.
/// When closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations
/// do not scale or rotate, they appear independent of zoom/rotate. So, they behave as if flags
/// NoZoom or NoRotate to the annotations are being set. Popup annotation is automatically created
/// for this annotation.
/// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document. When
/// closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations do not scale
/// or rotate, they appear independent of zoom/rotate. So, they behave as if flags NoZoom or NoRotate to
/// the annotations are being set. Popup annotation is automatically created for this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is icon displayed
/// \param iconType Icon type
@ -1157,8 +1157,8 @@ public:
bool open);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1167,8 +1167,8 @@ public:
QColor color);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@ -1177,8 +1177,8 @@ public:
QColor color);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@ -1193,13 +1193,13 @@ public:
QString contents);
/// Creates empty catalog. This function is used, when a new document is being created. Do not call
/// this function manually.
/// Creates empty catalog. This function is used, when a new document is being created. Do not call this
/// function manually.
PDFObjectReference createCatalog();
/// Creates page tree root for the catalog. This function is only called when new document is being
/// created. Do not call this function manually.
/// Creates page tree root for the catalog. This function is only called when new document is being created.
/// Do not call this function manually.
PDFObjectReference createCatalogPageTreeRoot();
@ -1258,8 +1258,7 @@ public:
/// Creates signature dictionary used for preparation in signing process. Can define parameters of the
/// signature.
/// \param filter Filter (for example, Adobe.PPKLite, Entrust.PPKEF, CiCi.SignIt, ...)
/// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1,
/// ETSI.CAdES.detached, ...)
/// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1, ETSI.CAdES.detached, ...)
/// \param contents Contents (reserved data for signature).
/// \param signingTime Signing date/time
/// \param byteRangeItem Item which will fill byte range array.
@ -1270,8 +1269,8 @@ public:
PDFInteger byteRangeItem);
/// This function is used to create a new trailer dictionary, when blank document is created. Do not
/// call this function manually.
/// This function is used to create a new trailer dictionary, when blank document is created. Do not call this
/// function manually.
/// \param catalog Reference to document catalog
PDFObject createTrailerDictionary(PDFObjectReference catalog);
@ -1358,9 +1357,9 @@ public:
bool isOpen);
/// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are
/// on the upper side of quadrilateral, and the last two points are on the lower side of quadrilateral.
/// Quadrilaterals are represented as unclosed polygon with 4 * n vertices.
/// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are on the
/// upper side of quadrilateral, and the last two points are on the lower side of quadrilateral. Quadrilaterals
/// are represented as unclosed polygon with 4 * n vertices.
/// \param annotation Annotation
/// \param quadrilaterals Quadrilaterals
void setAnnotationQuadPoints(PDFObjectReference annotation,
@ -1461,9 +1460,9 @@ public:
PDFInteger topIndex);
/// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if
/// you use this function, annotation widgets, which are attached to this form field, should also be
/// updated (for example, appearance state and sometimes appearance streams).
/// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if you use
/// this function, annotation widgets, which are attached to this form field, should also be updated (for
/// example, appearance state and sometimes appearance streams).
/// \param formField Form field
/// \param value Value
void setFormFieldValue(PDFObjectReference formField,
@ -1476,9 +1475,9 @@ public:
/// Set document language.
/// \param language Document language. It should be a language identifier, as defined in ISO 639
/// and ISO 3166. For example, "en-US", where first two letter means language code (en =
/// english), and the latter two is country code (US - United States).
/// \param language Document language. It should be a language identifier, as defined in ISO 639 and
/// ISO 3166. For example, "en-US", where first two letter means language code (en = english), and
/// the latter two is country code (US - United States).
void setLanguage(QString language);
@ -1494,17 +1493,16 @@ public:
QRectF box);
/// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production
/// environment. Default value is the page's crop box.
/// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production environment.
/// Default value is the page's crop box.
/// \param page Page
/// \param box Box
void setPageBleedBox(PDFObjectReference page,
QRectF box);
/// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped
/// to this region, graphics outside of clipping box will not be printed. Default value is same, as media
/// box.
/// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped to
/// this region, graphics outside of clipping box will not be printed. Default value is same, as media box.
/// \param page Page
/// \param box Box
void setPageCropBox(PDFObjectReference page,
@ -1518,8 +1516,8 @@ public:
PDFObjectReference documentPart);
/// Sets media box to the page. The media box defines size of physical medium, onto which the page
/// is to be printed.
/// Sets media box to the page. The media box defines size of physical medium, onto which the page is to be
/// printed.
/// \param page Page
/// \param box Box
void setPageMediaBox(PDFObjectReference page,
@ -1561,12 +1559,17 @@ public:
QString reasonText);
/// This function is used to update trailer dictionary. Must be called each time the final document is
/// being built.
/// This function is used to update trailer dictionary. Must be called each time the final document is being
/// built.
/// \param objectCount Number of objects (including empty ones)
void updateTrailerDictionary(PDFInteger objectCount);
///
/// \param pageReference Removes page thumbnail.
void removePageThumbnail(PDFObjectReference pageReference);
/* END GENERATED CODE */
private:

View File

@ -0,0 +1,306 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "pdfdocumentsanitizer.h"
#include "pdfvisitor.h"
#include "pdfexecutionpolicy.h"
#include "pdfoptimizer.h"
#include "pdfdocumentbuilder.h"
namespace pdf
{
class PDFRemoveMetadataVisitor : public PDFUpdateObjectVisitor
{
public:
explicit PDFRemoveMetadataVisitor(const PDFObjectStorage* storage, std::atomic<PDFInteger>* counter) :
PDFUpdateObjectVisitor(storage),
m_counter(counter)
{
}
virtual void visitDictionary(const PDFDictionary* dictionary) override;
private:
std::atomic<PDFInteger>* m_counter;
};
void PDFRemoveMetadataVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
if (dictionary->getKey(i) != "Metadata")
{
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
}
else
{
++*m_counter;
}
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
PDFDocumentSanitizer::PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent) :
QObject(parent),
m_flags(flags)
{
}
void PDFDocumentSanitizer::sanitize()
{
Q_EMIT sanitizationStarted();
if (m_flags.testFlag(DocumentInfo))
{
performSanitizeDocumentInfo();
}
if (m_flags.testFlag(Metadata))
{
performSanitizeMetadata();
}
if (m_flags.testFlag(Bookmarks))
{
performSanitizeBookmarks();
}
if (m_flags.testFlag(FileAttachments))
{
performSanitizeFileAttachments();
}
if (m_flags.testFlag(EmbeddedSearchIndex))
{
performSanitizeEmbeddedSearchIndex();
}
if (m_flags.testFlag(MarkupAnnotations))
{
performSanitizeMarkupAnnotations();
}
if (m_flags.testFlag(PageThumbnails))
{
performSanitizePageThumbnails();
}
// Optimize - remove unused objects
PDFOptimizer optimizer(PDFOptimizer::OptimizationFlags(PDFOptimizer::RemoveUnusedObjects | PDFOptimizer::ShrinkObjectStorage | PDFOptimizer::RemoveNullObjects), nullptr);
optimizer.setStorage(m_storage);
optimizer.optimize();
m_storage = optimizer.takeStorage();
Q_EMIT sanitizationFinished();
}
PDFDocumentSanitizer::SanitizationFlags PDFDocumentSanitizer::getFlags() const
{
return m_flags;
}
void PDFDocumentSanitizer::setFlags(SanitizationFlags flags)
{
m_flags = flags;
}
void PDFDocumentSanitizer::performSanitizeDocumentInfo()
{
PDFObjectReference emptyDocumentInfoReference = m_storage.addObject(PDFObject());
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
const bool hasDocumentInfo = builder.getDocumentInfo().isValid();
builder.setDocumentInfo(emptyDocumentInfoReference);
PDFDocument document = builder.build();
m_storage = document.getStorage();
if (hasDocumentInfo)
{
Q_EMIT sanitizationProgress(tr("Document info was removed."));
}
}
void PDFDocumentSanitizer::performSanitizeMetadata()
{
std::atomic<PDFInteger> counter = 0;
PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
auto processEntry = [this, &counter](PDFObjectStorage::Entry& entry)
{
PDFRemoveMetadataVisitor visitor(&m_storage, &counter);
entry.object.accept(&visitor);
entry.object = visitor.getObject();
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry);
m_storage.setObjects(qMove(objects));
Q_EMIT sanitizationProgress(tr("Metadata streams removed: %1").arg(counter));
}
void PDFDocumentSanitizer::performSanitizeBookmarks()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
const bool hasOutline = catalogDictionary && catalogDictionary->hasKey("Outlines");
if (hasOutline)
{
builder.removeOutline();
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Outline was removed."));
}
}
void PDFDocumentSanitizer::performSanitizeFileAttachments()
{
auto filter = [](const PDFAnnotation* annotation)
{
return annotation->getType() == AnnotationType::FileAttachment;
};
removeAnnotations(filter, tr("File attachments removed: %1."));
}
void PDFDocumentSanitizer::performSanitizeEmbeddedSearchIndex()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
const bool hasPieceInfo = catalogDictionary && catalogDictionary->hasKey("PieceInfo");
if (hasPieceInfo)
{
PDFObject pieceInfoObject = builder.getObject(catalogDictionary->get("PieceInfo"));
const PDFDictionary* pieceInfoDictionary = builder.getDictionaryFromObject(pieceInfoObject);
if (pieceInfoDictionary->hasKey("SearchIndex"))
{
PDFDictionary dictionaryCopy = *pieceInfoDictionary;
dictionaryCopy.setEntry(PDFInplaceOrMemoryString("SearchIndex"), PDFObject());
pieceInfoObject = PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(dictionaryCopy)));
PDFObjectFactory factory;
factory.beginDictionary();
factory.beginDictionaryItem("PieceInfo");
factory << pieceInfoObject;
factory.endDictionaryItem();
factory.endDictionary();
PDFObject newCatalog = factory.takeObject();
builder.mergeTo(builder.getCatalogReference(), std::move(newCatalog));
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Search index was removed."));
}
}
}
void PDFDocumentSanitizer::performSanitizeMarkupAnnotations()
{
auto filter = [](const PDFAnnotation* annotation)
{
return annotation->asMarkupAnnotation() != nullptr;
};
removeAnnotations(filter, tr("Markup annotations removed: %1."));
}
void PDFDocumentSanitizer::performSanitizePageThumbnails()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
builder.flattenPageTree();
std::vector<PDFObjectReference> pageReferences = builder.getPages();
std::vector<PDFObjectReference> pagesWithThumbnail;
for (const PDFObjectReference& pageReference : pageReferences)
{
const PDFDictionary* pageDictionary = builder.getDictionaryFromObject(builder.getObjectByReference(pageReference));
if (pageDictionary && pageDictionary->hasKey("Thumb"))
{
pagesWithThumbnail.push_back(pageReference);
}
}
if (!pagesWithThumbnail.empty())
{
for (const auto& pageReference : pagesWithThumbnail)
{
builder.removePageThumbnail(pageReference);
}
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Page thumbnails removed: %1.").arg(pagesWithThumbnail.size()));
}
}
void PDFDocumentSanitizer::removeAnnotations(const std::function<bool (const PDFAnnotation*)>& filter,
QString message)
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
builder.flattenPageTree();
std::vector<PDFObjectReference> pageReferences = builder.getPages();
std::vector<std::pair<PDFObjectReference, PDFObjectReference>> annotationsToBeRemoved;
PDFDocumentDataLoaderDecorator loader(&m_storage);
for (const PDFObjectReference pageReference : pageReferences)
{
const PDFObject& pageObject = m_storage.getObjectByReference(pageReference);
const PDFDictionary* pageDictionary = m_storage.getDictionaryFromObject(pageObject);
if (!pageDictionary)
{
continue;
}
std::vector<PDFObjectReference> annotationReferences = loader.readReferenceArrayFromDictionary(pageDictionary, "Annots");
for (const PDFObjectReference& annotationReference : annotationReferences)
{
PDFAnnotationPtr annotation = PDFAnnotation::parse(&m_storage, annotationReference);
if (filter(annotation.get()))
{
annotationsToBeRemoved.emplace_back(pageReference, annotationReference);
}
}
}
if (!annotationsToBeRemoved.empty())
{
for (const auto& item : annotationsToBeRemoved)
{
const PDFObjectReference pageReference = item.first;
const PDFObjectReference annotationReference = item.second;
builder.removeAnnotation(pageReference, annotationReference);
}
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(message.arg(annotationsToBeRemoved.size()));
}
}
} // namespace pdf

View File

@ -0,0 +1,99 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFDOCUMENTSANITIZER_H
#define PDFDOCUMENTSANITIZER_H
#include "pdfdocument.h"
namespace pdf
{
class PDFAnnotation;
/// Class for sanitizing documents. Can remove sensitive content from the document,
/// except the content streams. Sanitization is configurable, user can specify,
/// which content should be removed.
class PDF4QTLIBSHARED_EXPORT PDFDocumentSanitizer : public QObject
{
Q_OBJECT
public:
enum SanitizationFlag
{
None = 0x0000, ///< No sanitization is performed
DocumentInfo = 0x0001, ///< Remove document information
Metadata = 0x0002, ///< Remove all metadata streams in all objects
Bookmarks = 0x0004, ///< Remove bookmarks
FileAttachments = 0x0008, ///< Remove file attachments
EmbeddedSearchIndex = 0x0010, ///< Remove embedded search index
MarkupAnnotations = 0x0020, ///< Remove markup annotations from all pages
PageThumbnails = 0x0040, ///< Remove page thumbnails
All = 0xFFFF, ///< All sanitization turned on
};
Q_DECLARE_FLAGS(SanitizationFlags, SanitizationFlag)
explicit PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent);
/// Set document, which should be sanitized
/// \param document Document to be sanitized
void setDocument(const PDFDocument* document) { setStorage(document->getStorage()); }
/// Set storage directly (storage must be valid and filled with objects)
/// \param storage Storage
void setStorage(const PDFObjectStorage& storage) { m_storage = storage; }
/// Perform document sanitization. During optimization process, various
/// signals are emitted to view progress.
void sanitize();
/// Returns object storage used for optimization
const PDFObjectStorage& getStorage() const { return m_storage; }
/// Returns object storage by move semantics, old object storage is destroyed
PDFObjectStorage takeStorage() { return qMove(m_storage); }
/// Returns sanitized document. Object storage is cleared after
/// this function call.
PDFDocument takeSanitizedDocument() { return PDFDocument(qMove(m_storage), PDFVersion(2, 0)); }
SanitizationFlags getFlags() const;
void setFlags(SanitizationFlags flags);
signals:
void sanitizationStarted();
void sanitizationProgress(QString progressText);
void sanitizationFinished();
private:
void performSanitizeDocumentInfo();
void performSanitizeMetadata();
void performSanitizeBookmarks();
void performSanitizeFileAttachments();
void performSanitizeEmbeddedSearchIndex();
void performSanitizeMarkupAnnotations();
void performSanitizePageThumbnails();
void removeAnnotations(const std::function<bool(const PDFAnnotation*)>& filter, QString message);
SanitizationFlags m_flags;
PDFObjectStorage m_storage;
};
} // namespace pdf
#endif // PDFDOCUMENTSANITIZER_H

View File

@ -28,120 +28,6 @@
namespace pdf
{
class PDFUpdateObjectVisitor : public PDFAbstractVisitor
{
public:
explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
m_storage(storage)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
protected:
const PDFObjectStorage* m_storage;
std::vector<PDFObject> m_objectStack;
};
void PDFUpdateObjectVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFUpdateObjectVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFUpdateObjectVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
{
const PDFDictionary* dictionary = stream->getDictionary();
visitDictionary(dictionary);
Q_ASSERT(!m_objectStack.empty());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
PDFDictionary newDictionary(*dictionaryObject.getDictionary());
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(newDictionary), QByteArray(*stream->getContent()))));
}
void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(reference));
}
PDFObject PDFUpdateObjectVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
class PDFRemoveSimpleObjectsVisitor : public PDFUpdateObjectVisitor
{
public:

View File

@ -192,4 +192,91 @@ void PDFStatisticsCollector::collectStatisticsOfSimpleObject(PDFObject::Type typ
statistics.memoryConsumptionEstimate += sizeof(PDFObject);
}
void PDFUpdateObjectVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFUpdateObjectVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFUpdateObjectVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
{
const PDFDictionary* dictionary = stream->getDictionary();
visitDictionary(dictionary);
Q_ASSERT(!m_objectStack.empty());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
PDFDictionary newDictionary(*dictionaryObject.getDictionary());
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(newDictionary), QByteArray(*stream->getContent()))));
}
void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(reference));
}
PDFObject PDFUpdateObjectVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
} // namespace pdf

View File

@ -206,6 +206,33 @@ struct PDFApplyVisitorImpl<Visitor, PDFAbstractVisitor::Strategy::Sequential>
}
};
class PDFUpdateObjectVisitor : public PDFAbstractVisitor
{
public:
explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
m_storage(storage)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
protected:
const PDFObjectStorage* m_storage;
std::vector<PDFObject> m_objectStack;
};
} // namespace pdf
#endif // PDFVISITOR_H

View File

@ -44,6 +44,9 @@ add_library(Pdf4QtViewer SHARED
pdfviewermainwindow.ui
pdfviewermainwindowlite.ui
pdfviewersettingsdialog.ui
pdfsanitizedocumentdialog.ui
pdfsanitizedocumentdialog.cpp
pdfsanitizedocumentdialog.h
pdf4qtviewer.qrc
)

View File

@ -32,6 +32,7 @@
#include "pdfundoredomanager.h"
#include "pdfrendertoimagesdialog.h"
#include "pdfoptimizedocumentdialog.h"
#include "pdfsanitizedocumentdialog.h"
#include "pdfviewersettingsdialog.h"
#include "pdfaboutdialog.h"
#include "pdfrenderingerrorswidget.h"
@ -447,6 +448,10 @@ void PDFProgramController::initialize(Features features,
{
connect(action, &QAction::triggered, this, &PDFProgramController::onActionOptimizeTriggered);
}
if (QAction* action = m_actionManager->getAction(PDFActionManager::Sanitize))
{
connect(action, &QAction::triggered, this, &PDFProgramController::onActionSanitizeTriggered);
}
if (QAction* action = m_actionManager->getAction(PDFActionManager::Encryption))
{
connect(action, &QAction::triggered, this, &PDFProgramController::onActionEncryptionTriggered);
@ -1173,6 +1178,18 @@ void PDFProgramController::onActionOptimizeTriggered()
}
}
void PDFProgramController::onActionSanitizeTriggered()
{
PDFSanitizeDocumentDialog dialog(m_pdfDocument.data(), m_mainWindow);
if (dialog.exec() == QDialog::Accepted)
{
pdf::PDFDocumentPointer pointer(new pdf::PDFDocument(dialog.takeSanitizedDocument()));
pdf::PDFModifiedDocument document(qMove(pointer), m_optionalContentActivity, pdf::PDFModifiedDocument::Reset);
onDocumentModified(qMove(document));
}
}
void PDFProgramController::onActionEncryptionTriggered()
{
auto queryPassword = [this](bool* ok)
@ -1492,6 +1509,7 @@ void PDFProgramController::updateActionsAvailability()
m_actionManager->setEnabled(PDFActionManager::Print, hasValidDocument && canPrint);
m_actionManager->setEnabled(PDFActionManager::RenderToImages, hasValidDocument && canPrint);
m_actionManager->setEnabled(PDFActionManager::Optimize, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::Sanitize, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::Encryption, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::Save, hasValidDocument);
m_actionManager->setEnabled(PDFActionManager::SaveAs, hasValidDocument);

View File

@ -105,6 +105,7 @@ public:
SendByMail,
RenderToImages,
Optimize,
Sanitize,
Encryption,
FitPage,
FitWidth,
@ -327,6 +328,7 @@ private:
void onActionSendByEMailTriggered();
void onActionRenderToImagesTriggered();
void onActionOptimizeTriggered();
void onActionSanitizeTriggered();
void onActionEncryptionTriggered();
void onActionFitPageTriggered();
void onActionFitWidthTriggered();

View File

@ -0,0 +1,164 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "pdfsanitizedocumentdialog.h"
#include "ui_pdfsanitizedocumentdialog.h"
#include "pdfwidgetutils.h"
#include "pdfdocumentwriter.h"
#include "pdfdbgheap.h"
#include <QCheckBox>
#include <QPushButton>
#include <QElapsedTimer>
#include <QtConcurrent/QtConcurrent>
namespace pdfviewer
{
PDFSanitizeDocumentDialog::PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent) :
QDialog(parent),
ui(new Ui::PDFSanitizeDocumentDialog),
m_document(document),
m_sanitizer(pdf::PDFDocumentSanitizer::All, nullptr),
m_sanitizeButton(nullptr),
m_sanitizationInProgress(false),
m_wasSanitized(false)
{
ui->setupUi(this);
auto addCheckBox = [this](QString text, pdf::PDFDocumentSanitizer::SanitizationFlag flag)
{
QCheckBox* checkBox = new QCheckBox(text, this);
checkBox->setChecked(m_sanitizer.getFlags().testFlag(flag));
connect(checkBox, &QCheckBox::clicked, this, [this, flag](bool checked) { m_sanitizer.setFlags(m_sanitizer.getFlags().setFlag(flag, checked)); });
ui->groupBoxLayout->addWidget(checkBox);
};
addCheckBox(tr("Remove document info"), pdf::PDFDocumentSanitizer::DocumentInfo);
addCheckBox(tr("Remove all metadata"), pdf::PDFDocumentSanitizer::Metadata);
addCheckBox(tr("Remove outline (bookmarks)"), pdf::PDFDocumentSanitizer::Bookmarks);
addCheckBox(tr("Remove file attachments"), pdf::PDFDocumentSanitizer::FileAttachments);
addCheckBox(tr("Remove embedded search index"), pdf::PDFDocumentSanitizer::EmbeddedSearchIndex);
addCheckBox(tr("Remove comments and other markup annotations"), pdf::PDFDocumentSanitizer::MarkupAnnotations);
addCheckBox(tr("Remove page thumbnails"), pdf::PDFDocumentSanitizer::PageThumbnails);
m_sanitizeButton = ui->buttonBox->addButton(tr("Sanitize"), QDialogButtonBox::ActionRole);
connect(m_sanitizeButton, &QPushButton::clicked, this, &PDFSanitizeDocumentDialog::onSanitizeButtonClicked);
connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationStarted, this, &PDFSanitizeDocumentDialog::onSanitizationStarted);
connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationProgress, this, &PDFSanitizeDocumentDialog::onSanitizationProgress);
connect(&m_sanitizer, &pdf::PDFDocumentSanitizer::sanitizationFinished, this, &PDFSanitizeDocumentDialog::onSanitizationFinished);
connect(this, &PDFSanitizeDocumentDialog::displaySanitizationInfo, this, &PDFSanitizeDocumentDialog::onDisplaySanitizationInfo);
pdf::PDFWidgetUtils::scaleWidget(this, QSize(640, 380));
updateUi();
pdf::PDFWidgetUtils::style(this);
}
PDFSanitizeDocumentDialog::~PDFSanitizeDocumentDialog()
{
Q_ASSERT(!m_sanitizationInProgress);
Q_ASSERT(!m_future.isRunning());
delete ui;
}
void PDFSanitizeDocumentDialog::sanitize()
{
QElapsedTimer timer;
timer.start();
m_sanitizer.setDocument(m_document);
m_sanitizer.sanitize();
m_sanitizedDocument = m_sanitizer.takeSanitizedDocument();
qreal msecsElapsed = timer.nsecsElapsed() / 1000000.0;
timer.invalidate();
m_sanitizationInfo.msecsElapsed = msecsElapsed;
m_sanitizationInfo.bytesBeforeSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(m_document);
m_sanitizationInfo.bytesAfterSanitization = pdf::PDFDocumentWriter::getDocumentFileSize(&m_sanitizedDocument);
Q_EMIT displaySanitizationInfo();
}
void PDFSanitizeDocumentDialog::onSanitizeButtonClicked()
{
Q_ASSERT(!m_sanitizationInProgress);
Q_ASSERT(!m_future.isRunning());
m_sanitizationInProgress = true;
m_future = QtConcurrent::run([this]() { sanitize(); });
updateUi();
}
void PDFSanitizeDocumentDialog::onSanitizationStarted()
{
Q_ASSERT(m_sanitizationInProgress);
ui->logTextEdit->setPlainText(tr("Sanitization started!"));
}
void PDFSanitizeDocumentDialog::onSanitizationProgress(QString progressText)
{
Q_ASSERT(m_sanitizationInProgress);
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), progressText));
}
void PDFSanitizeDocumentDialog::onSanitizationFinished()
{
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), tr("Sanitization finished!")));
m_future.waitForFinished();
m_sanitizationInProgress = false;
m_wasSanitized = true;
updateUi();
}
void PDFSanitizeDocumentDialog::onDisplaySanitizationInfo()
{
QStringList texts;
texts << tr("Sanitized in %1 msecs").arg(m_sanitizationInfo.msecsElapsed);
if (m_sanitizationInfo.bytesBeforeSanitization != -1 &&
m_sanitizationInfo.bytesAfterSanitization != -1)
{
texts << tr("Bytes before sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization);
texts << tr("Bytes after sanitization: %1").arg(m_sanitizationInfo.bytesAfterSanitization);
texts << tr("Bytes saved by sanitization: %1").arg(m_sanitizationInfo.bytesBeforeSanitization - m_sanitizationInfo.bytesAfterSanitization);
qreal ratio = 100.0;
if (m_sanitizationInfo.bytesBeforeSanitization > 0)
{
ratio = 100.0 * qreal(m_sanitizationInfo.bytesAfterSanitization) / qreal(m_sanitizationInfo.bytesBeforeSanitization);
}
texts << tr("Compression ratio: %1 %").arg(ratio);
}
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), texts.join("\n")));
}
void PDFSanitizeDocumentDialog::updateUi()
{
for (QCheckBox* checkBox : findChildren<QCheckBox*>(QString(), Qt::FindChildrenRecursively))
{
checkBox->setEnabled(!m_sanitizationInProgress);
}
ui->buttonBox->button(QDialogButtonBox::Ok)->setEnabled(m_wasSanitized && !m_sanitizationInProgress);
ui->buttonBox->button(QDialogButtonBox::Cancel)->setEnabled(!m_sanitizationInProgress);
m_sanitizeButton->setEnabled(!m_sanitizationInProgress);
}
} // namespace pdfviewer

View File

@ -0,0 +1,77 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFSANITIZEDOCUMENTDIALOG_H
#define PDFSANITIZEDOCUMENTDIALOG_H
#include "pdfdocumentsanitizer.h"
#include <QDialog>
#include <QFuture>
namespace Ui
{
class PDFSanitizeDocumentDialog;
}
namespace pdfviewer
{
class PDFSanitizeDocumentDialog : public QDialog
{
Q_OBJECT
public:
explicit PDFSanitizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent);
virtual ~PDFSanitizeDocumentDialog() override;
pdf::PDFDocument takeSanitizedDocument() { return qMove(m_sanitizedDocument); }
signals:
void displaySanitizationInfo();
private:
void sanitize();
void onSanitizeButtonClicked();
void onSanitizationStarted();
void onSanitizationProgress(QString progressText);
void onSanitizationFinished();
void onDisplaySanitizationInfo();
void updateUi();
struct SanitizationInfo
{
qreal msecsElapsed = 0.0;
qint64 bytesBeforeSanitization = -1;
qint64 bytesAfterSanitization = -1;
};
Ui::PDFSanitizeDocumentDialog* ui;
const pdf::PDFDocument* m_document;
pdf::PDFDocumentSanitizer m_sanitizer;
QPushButton* m_sanitizeButton;
bool m_sanitizationInProgress;
bool m_wasSanitized;
QFuture<void> m_future;
pdf::PDFDocument m_sanitizedDocument;
SanitizationInfo m_sanitizationInfo;
};
} // namespace pdfviewer
#endif // PDFSANITIZEDOCUMENTDIALOG_H

View File

@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>PDFSanitizeDocumentDialog</class>
<widget class="QDialog" name="PDFSanitizeDocumentDialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>741</width>
<height>530</height>
</rect>
</property>
<property name="windowTitle">
<string>Document sanitization</string>
</property>
<layout class="QVBoxLayout" name="dialogLayout">
<item>
<widget class="QGroupBox" name="sanitizationSettingsGroupBox">
<property name="title">
<string>Sanitization Settings</string>
</property>
<layout class="QVBoxLayout" name="groupBoxLayout"/>
</widget>
</item>
<item>
<widget class="QPlainTextEdit" name="logTextEdit">
<property name="undoRedoEnabled">
<bool>false</bool>
</property>
<property name="readOnly">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>buttonBox</sender>
<signal>accepted()</signal>
<receiver>PDFSanitizeDocumentDialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
</hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>PDFSanitizeDocumentDialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -24,7 +24,6 @@
#include "pdfviewersettingsdialog.h"
#include "pdfdocumentpropertiesdialog.h"
#include "pdfrendertoimagesdialog.h"
#include "pdfoptimizedocumentdialog.h"
#include "pdfdbgheap.h"
#include "pdfdocumentreader.h"
@ -161,6 +160,7 @@ PDFViewerMainWindow::PDFViewerMainWindow(QWidget* parent) :
m_actionManager->setAction(PDFActionManager::SendByMail, ui->actionSend_by_E_Mail);
m_actionManager->setAction(PDFActionManager::RenderToImages, ui->actionRender_to_Images);
m_actionManager->setAction(PDFActionManager::Optimize, ui->actionOptimize);
m_actionManager->setAction(PDFActionManager::Sanitize, ui->actionSanitize);
m_actionManager->setAction(PDFActionManager::Encryption, ui->actionEncryption);
m_actionManager->setAction(PDFActionManager::FitPage, ui->actionFitPage);
m_actionManager->setAction(PDFActionManager::FitWidth, ui->actionFitWidth);

View File

@ -20,7 +20,7 @@
<x>0</x>
<y>0</y>
<width>770</width>
<height>37</height>
<height>21</height>
</rect>
</property>
<widget class="QMenu" name="menuFile">
@ -142,6 +142,7 @@
<addaction name="separator"/>
<addaction name="actionEncryption"/>
<addaction name="actionOptimize"/>
<addaction name="actionSanitize"/>
<addaction name="separator"/>
</widget>
<widget class="QMenu" name="menuInsert">
@ -635,6 +636,9 @@
<property name="text">
<string>Optimize...</string>
</property>
<property name="statusTip">
<string>Optimizes document to reduce file size.</string>
</property>
</action>
<action name="actionSave_As">
<property name="icon">
@ -924,6 +928,14 @@
<string>Certificates...</string>
</property>
</action>
<action name="actionSanitize">
<property name="text">
<string>Sanitize...</string>
</property>
<property name="toolTip">
<string>Sanitize document to remove sensitive information.</string>
</property>
</action>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources>

View File

@ -24,7 +24,6 @@
#include "pdfviewersettingsdialog.h"
#include "pdfdocumentpropertiesdialog.h"
#include "pdfrendertoimagesdialog.h"
#include "pdfoptimizedocumentdialog.h"
#include "pdfdbgheap.h"
#include "pdfdocumentreader.h"

View File

@ -1,4 +1,5 @@
CURRENT:
- Issue #40: Sanitization of documents
V: 1.3.2 1.2.2023
- Issue #39: Code signed installation

View File

@ -12163,5 +12163,62 @@ updateDocumentInfo(qMove(updatedInfoDictionary));</property>
<property name="functionDescription">This function is used to update trailer dictionary. Must be called each time the final document is being built.</property>
<property name="returnType">_void</property>
</QObject>
<QObject class="codegen::GeneratedFunction">
<property name="objectName"></property>
<property name="items">
<QObject class="codegen::GeneratedAction">
<property name="objectName"></property>
<property name="items">
<QObject class="codegen::GeneratedParameter">
<property name="objectName"></property>
<property name="items"/>
<property name="parameterName">pageReference</property>
<property name="parameterType">_PDFObjectReference</property>
<property name="parameterDescription">Removes page thumbnail.</property>
</QObject>
</property>
<property name="actionType">Parameters</property>
<property name="variableName"></property>
<property name="variableType">_void</property>
<property name="code"></property>
</QObject>
<QObject class="codegen::GeneratedAction">
<property name="objectName"></property>
<property name="items">
<QObject class="codegen::GeneratedPDFObject">
<property name="objectName"></property>
<property name="items">
<QObject class="codegen::GeneratedPDFObject">
<property name="objectName"></property>
<property name="items"/>
<property name="dictionaryItemName">Thumb</property>
<property name="objectType">DictionaryItemSimple</property>
<property name="value">PDFObject()</property>
</QObject>
</property>
<property name="dictionaryItemName"></property>
<property name="objectType">Dictionary</property>
<property name="value"></property>
</QObject>
</property>
<property name="actionType">CreateObject</property>
<property name="variableName">updatedPageObject</property>
<property name="variableType">_PDFObject</property>
<property name="code"></property>
</QObject>
<QObject class="codegen::GeneratedAction">
<property name="objectName"></property>
<property name="items"/>
<property name="actionType">Code</property>
<property name="variableName"></property>
<property name="variableType">_void</property>
<property name="code">mergeTo(pageReference, updatedPageObject);</property>
</QObject>
</property>
<property name="functionType">Structure</property>
<property name="functionName">removePageThumbnail</property>
<property name="functionDescription"></property>
<property name="returnType">_void</property>
</QObject>
</property>
</root>