Issue #40: Sanitize document

This commit is contained in:
Jakub Melka
2023-02-19 18:36:46 +01:00
parent 4a603c80c0
commit 361ee247e5
19 changed files with 1089 additions and 243 deletions

View File

@@ -98,6 +98,8 @@ add_library(Pdf4QtLib SHARED
sources/pdfrenderingerrorswidget.ui
sources/pdfselectpagesdialog.ui
sources/pdfobjecteditorwidget_impl.h
sources/pdfdocumentsanitizer.h
sources/pdfdocumentsanitizer.cpp
cmaps.qrc
)

View File

@@ -669,6 +669,13 @@ PDFDocumentBuilder::PDFDocumentBuilder(const PDFDocument* document) :
}
PDFDocumentBuilder::PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version) :
m_storage(storage),
m_version(version)
{
}
void PDFDocumentBuilder::reset()
{
*this = PDFDocumentBuilder();
@@ -5414,6 +5421,20 @@ void PDFDocumentBuilder::updateTrailerDictionary(PDFInteger objectCount)
}
void PDFDocumentBuilder::removePageThumbnail(PDFObjectReference pageReference)
{
PDFObjectFactory objectBuilder;
objectBuilder.beginDictionary();
objectBuilder.beginDictionaryItem("Thumb");
objectBuilder << PDFObject();
objectBuilder.endDictionaryItem();
objectBuilder.endDictionary();
PDFObject updatedPageObject = objectBuilder.takeObject();
mergeTo(pageReference, updatedPageObject);
}
/* END GENERATED CODE */
} // namespace pdf

View File

@@ -323,6 +323,9 @@ public:
/// Creates a new document as modification of old document
explicit PDFDocumentBuilder(const PDFDocument* document);
/// Creates a new document from storage
explicit PDFDocumentBuilder(const PDFObjectStorage& storage, PDFVersion version);
/// Resets the object to the initial state.
/// \warning All data are lost
void reset();
@@ -491,8 +494,8 @@ public:
PDFObjectReference createActionGoToDocumentPart(PDFObjectReference documentPart);
/// Creates embedded GoTo action. When executed, action points to destination in another document,
/// which is embedded in this document.
/// Creates embedded GoTo action. When executed, action points to destination in another document, which
/// is embedded in this document.
/// \param fileSpecification File specification
/// \param destination Destination in a embedded document
/// \param newWindow Open document in new window
@@ -541,8 +544,8 @@ public:
bool newWindow);
/// Creates launch action. Launch action executes document opening or printing. This variant for
/// Windows operating system, where additional parameters can be specified.
/// Creates launch action. Launch action executes document opening or printing. This variant for Windows
/// operating system, where additional parameters can be specified.
/// \param fileName File name
/// \param defaultDirectory Default directory
/// \param action Action to be performed. Valid values are 'open' or 'print'.
@@ -555,8 +558,8 @@ public:
bool newWindow);
/// Creates named action. Named actions are some predefined actions that interactive PDF processor
/// shall support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
/// Creates named action. Named actions are some predefined actions that interactive PDF processor shall
/// support. Valid values are NextPage, PrevPage, FirstPage, LastPage.
/// \param name Predefined name
PDFObjectReference createActionNamed(QByteArray name);
@@ -581,8 +584,7 @@ public:
PDFObjectReference createActionResetForm();
/// Creates reset interactive form action, which resets all fields except those specified in a given list of
/// fields.
/// Creates reset interactive form action, which resets all fields except those specified in a given list of fields.
/// \param fields Fields to be excluded from reset
PDFObjectReference createActionResetFormExcludedFields(PDFObjectReferenceVector fields);
@@ -641,9 +643,8 @@ public:
PDFObjectReference createActionURI(QString URL);
/// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer
/// reviews the document, and he wants to mark, that some text should be inserted, he uses this
/// annotation).
/// Caret annotations are used to indicate, where text should be inserted (for example, if reviewer reviews the
/// document, and he wants to mark, that some text should be inserted, he uses this annotation).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is caret displayed
/// \param borderWidth Border width
@@ -660,15 +661,15 @@ public:
QString contents);
/// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with
/// border width. Popup annotation can be attached to this annotation.
/// Circle annotation displays ellipse (or circle). Circle border/fill color can be defined, along with border
/// width. Popup annotation can be attached to this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is circle/ellipse displayed
/// \param borderWidth Width of the border line of circle/ellipse
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
/// use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
/// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@@ -697,15 +698,15 @@ public:
QString description);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee. Specify
/// start/end point parameters of this function to get callout line.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
/// parameters of this function to get callout line.
/// \param page Page to which is annotation added
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
/// callout line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
/// match bounding rectangle.
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
/// line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
/// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@@ -729,9 +730,9 @@ public:
AnnotationLineEnding endLineType);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is text displayed
/// \param title Title
@@ -746,15 +747,15 @@ public:
TextAlignment textAlignment);
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the
/// same way, as standard text in PDF document. Free text annotations are usually used to comment
/// the document. Free text annotation can also have callout line, with, or without a knee. Specify
/// start/end point parameters of this function to get callout line.
/// Free text annotation displays text directly on a page. Text appears directly on the page, in the same way,
/// as standard text in PDF document. Free text annotations are usually used to comment the document.
/// Free text annotation can also have callout line, with, or without a knee. Specify start/end point
/// parameters of this function to get callout line.
/// \param page Page to which is annotation added
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both
/// callout line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to
/// match bounding rectangle.
/// \param boundingRectangle Bounding rectangle of free text annotation. It must contain both callout
/// line and text rectangle.
/// \param textRectangle Rectangle with text, in absolute coordinates. They are then recomputed to match
/// bounding rectangle.
/// \param title Title
/// \param subject Subject
/// \param contents Contents (text displayed)
@@ -776,9 +777,9 @@ public:
AnnotationLineEnding endLineType);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@@ -793,9 +794,9 @@ public:
QString contents);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is highlight displayed
/// \param color Color
@@ -804,9 +805,9 @@ public:
QColor color);
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain
/// window to be opened (and commented). This annotation is usually used to highlight text, but can
/// also highlight other things, such as images, or other graphics.
/// Text markup annotation is used to highlight text. It is a markup annotation, so it can contain window to
/// be opened (and commented). This annotation is usually used to highlight text, but can also highlight
/// other things, such as images, or other graphics.
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is highlight displayed
/// \param color Color
@@ -849,9 +850,9 @@ public:
QString contents);
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with
/// text. Line annotations are markup annotations, so they can have popup window. Line endings can
/// be specified.
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
/// Line annotations are markup annotations, so they can have popup window. Line endings can be
/// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@@ -878,9 +879,9 @@ public:
AnnotationLineEnding endLineType);
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with
/// text. Line annotations are markup annotations, so they can have popup window. Line endings can
/// be specified.
/// Line annotation represents straight line, or some more advanced graphics, such as dimension with text.
/// Line annotations are markup annotations, so they can have popup window. Line endings can be
/// specified.
/// \param page Page to which is annotation added
/// \param boundingRect Line annotation bounding rectangle
/// \param startPoint Line start
@@ -893,12 +894,12 @@ public:
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
/// \param startLineType Start line ending type
/// \param endLineType End line ending type
/// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of
/// the line perpendicular to the line itself. Value can be either positive, negative or zero. If
/// positive, then extension is in plane that is above the annotation line (in clockwise order),
/// if negative, then it is below the annotation line.
/// \param leaderLineOffset Length of leader line offset, which is the amount of empty space
/// between the endpoints of the annotation and beginning of leader lines
/// \param leaderLineLength Length of the leader line. Leader line extends from each endpoint of the line
/// perpendicular to the line itself. Value can be either positive, negative or zero. If positive, then
/// extension is in plane that is above the annotation line (in clockwise order), if negative, then it is
/// below the annotation line.
/// \param leaderLineOffset Length of leader line offset, which is the amount of empty space between the
/// endpoints of the annotation and beginning of leader lines
/// \param leaderLineExtension Length of leader line extension, which extends leader lines in 180°
/// direction from leader lines (so leader lines continues above drawn line)
/// \param displayContents Display contents of the annotation as text along the line
@@ -922,9 +923,9 @@ public:
bool displayedContentsTopAlign);
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
/// action, which can be executed, for example, link can be also in the PDF document (link to some
/// location in document).
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
/// which can be executed, for example, link can be also in the PDF document (link to some location in
/// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param URL URL to be launched when user clicks on the link
@@ -935,9 +936,9 @@ public:
LinkHighlightMode highlightMode);
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify
/// action, which can be executed, for example, link can be also in the PDF document (link to some
/// location in document).
/// Creates new link annotation. It usually represents clickable hypertext link. User can also specify action,
/// which can be executed, for example, link can be also in the PDF document (link to some location in
/// document).
/// \param page Page to which is annotation added
/// \param linkRectangle Link rectangle
/// \param action Action to be performed when user clicks on a link
@@ -948,9 +949,9 @@ public:
LinkHighlightMode highlightMode);
/// Polygon annotation. When opened, they display pop-up window containing the text of associated
/// note (and window title), if popup annotation is attached. Polygon border/fill color can be defined,
/// along with border width.
/// Polygon annotation. When opened, they display pop-up window containing the text of associated note
/// (and window title), if popup annotation is attached. Polygon border/fill color can be defined, along with
/// border width.
/// \param page Page to which is annotation added
/// \param polygon Polygon
/// \param borderWidth Border line width
@@ -969,9 +970,9 @@ public:
QString contents);
/// Polyline annotation. When opened, they display pop-up window containing the text of associated
/// note (and window title), if popup annotation is attached. Polyline border/fill color can be defined,
/// along with border width.
/// Polyline annotation. When opened, they display pop-up window containing the text of associated note
/// (and window title), if popup annotation is attached. Polyline border/fill color can be defined, along with
/// border width.
/// \param page Page to which is annotation added
/// \param polyline Polyline
/// \param borderWidth Border line width
@@ -995,9 +996,9 @@ public:
/// Creates a new popup annotation on the page. Popup annotation is represented usually by floating
/// window, which can be opened, or closed. Popup annotation is associated with parent annotation,
/// which can be usually markup annotation. Popup annotation displays parent annotation's texts, for
/// example, title, comment, date etc.
/// window, which can be opened, or closed. Popup annotation is associated with parent annotation, which
/// can be usually markup annotation. Popup annotation displays parent annotation's texts, for example,
/// title, comment, date etc.
/// \param page Page to which is annotation added
/// \param parentAnnotation Parent annotation (for which is popup window displayed)
/// \param rectangle Area on the page, where popup window appears
@@ -1026,16 +1027,16 @@ public:
QColor color);
/// Square annotation displays rectangle (or square). When opened, they display pop-up window
/// containing the text of associated note (and window title), if popup annotation is attached. Square
/// border/fill color can be defined, along with border width.
/// Square annotation displays rectangle (or square). When opened, they display pop-up window containing
/// the text of associated note (and window title), if popup annotation is attached. Square border/fill color
/// can be defined, along with border width.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is rectangle displayed
/// \param borderWidth Width of the border line of rectangle
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled,
/// \param fillColor Fill color of rectangle (interior color). If you do not want to have area color filled, then
/// use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a border,
/// then use invalid QColor.
/// \param strokeColor Stroke color (color of the rectangle border). If you do not want to have a
/// border, then use invalid QColor.
/// \param title Title (it is displayed as title of popup window)
/// \param subject Subject (short description of the subject being adressed by the annotation)
/// \param contents Contents (text displayed, for example, in the marked annotation dialog)
@@ -1049,8 +1050,8 @@ public:
QString contents);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1059,8 +1060,8 @@ public:
QColor color);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1069,8 +1070,8 @@ public:
QColor color);
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can
/// contain window to be opened (and commented).
/// Text markup annotation is used to squiggly underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1100,8 +1101,8 @@ public:
QString contents);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1116,8 +1117,8 @@ public:
QString contents);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1126,8 +1127,8 @@ public:
QColor color);
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to strikeout text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1136,11 +1137,10 @@ public:
QColor color);
/// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document.
/// When closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations
/// do not scale or rotate, they appear independent of zoom/rotate. So, they behave as if flags
/// NoZoom or NoRotate to the annotations are being set. Popup annotation is automatically created
/// for this annotation.
/// Creates text annotation. Text annotation is "sticky note" attached to a point in the PDF document. When
/// closed, it is displayed as icon, if opened, widget appears with attached text. Text annotations do not scale
/// or rotate, they appear independent of zoom/rotate. So, they behave as if flags NoZoom or NoRotate to
/// the annotations are being set. Popup annotation is automatically created for this annotation.
/// \param page Page to which is annotation added
/// \param rectangle Area in which is icon displayed
/// \param iconType Icon type
@@ -1157,8 +1157,8 @@ public:
bool open);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1167,8 +1167,8 @@ public:
QColor color);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param quadrilaterals Area in which is markup displayed
/// \param color Color
@@ -1177,8 +1177,8 @@ public:
QColor color);
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain
/// window to be opened (and commented).
/// Text markup annotation is used to underline text. It is a markup annotation, so it can contain window to
/// be opened (and commented).
/// \param page Page to which is annotation added
/// \param rectangle Area in which is markup displayed
/// \param color Color
@@ -1193,13 +1193,13 @@ public:
QString contents);
/// Creates empty catalog. This function is used, when a new document is being created. Do not call
/// this function manually.
/// Creates empty catalog. This function is used, when a new document is being created. Do not call this
/// function manually.
PDFObjectReference createCatalog();
/// Creates page tree root for the catalog. This function is only called when new document is being
/// created. Do not call this function manually.
/// Creates page tree root for the catalog. This function is only called when new document is being created.
/// Do not call this function manually.
PDFObjectReference createCatalogPageTreeRoot();
@@ -1258,8 +1258,7 @@ public:
/// Creates signature dictionary used for preparation in signing process. Can define parameters of the
/// signature.
/// \param filter Filter (for example, Adobe.PPKLite, Entrust.PPKEF, CiCi.SignIt, ...)
/// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1,
/// ETSI.CAdES.detached, ...)
/// \param subfilter Subfilter (for example, adbe.pkcs7.detached, adbe.pkcs7.sha1, ETSI.CAdES.detached, ...)
/// \param contents Contents (reserved data for signature).
/// \param signingTime Signing date/time
/// \param byteRangeItem Item which will fill byte range array.
@@ -1270,8 +1269,8 @@ public:
PDFInteger byteRangeItem);
/// This function is used to create a new trailer dictionary, when blank document is created. Do not
/// call this function manually.
/// This function is used to create a new trailer dictionary, when blank document is created. Do not call this
/// function manually.
/// \param catalog Reference to document catalog
PDFObject createTrailerDictionary(PDFObjectReference catalog);
@@ -1358,9 +1357,9 @@ public:
bool isOpen);
/// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are
/// on the upper side of quadrilateral, and the last two points are on the lower side of quadrilateral.
/// Quadrilaterals are represented as unclosed polygon with 4 * n vertices.
/// Sets annotation quadrilaterals. Quadrilaterals are sequence of 4 points, where first two points are on the
/// upper side of quadrilateral, and the last two points are on the lower side of quadrilateral. Quadrilaterals
/// are represented as unclosed polygon with 4 * n vertices.
/// \param annotation Annotation
/// \param quadrilaterals Quadrilaterals
void setAnnotationQuadPoints(PDFObjectReference annotation,
@@ -1461,9 +1460,9 @@ public:
PDFInteger topIndex);
/// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if
/// you use this function, annotation widgets, which are attached to this form field, should also be
/// updated (for example, appearance state and sometimes appearance streams).
/// Sets form field value. Value must be correct for this form field, no checking is performed. Also, if you use
/// this function, annotation widgets, which are attached to this form field, should also be updated (for
/// example, appearance state and sometimes appearance streams).
/// \param formField Form field
/// \param value Value
void setFormFieldValue(PDFObjectReference formField,
@@ -1476,9 +1475,9 @@ public:
/// Set document language.
/// \param language Document language. It should be a language identifier, as defined in ISO 639
/// and ISO 3166. For example, "en-US", where first two letter means language code (en =
/// english), and the latter two is country code (US - United States).
/// \param language Document language. It should be a language identifier, as defined in ISO 639 and
/// ISO 3166. For example, "en-US", where first two letter means language code (en = english), and
/// the latter two is country code (US - United States).
void setLanguage(QString language);
@@ -1494,17 +1493,16 @@ public:
QRectF box);
/// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production
/// environment. Default value is the page's crop box.
/// Sets bleed box to the page. Bleed box is, basically, a clipping box for output in a production environment.
/// Default value is the page's crop box.
/// \param page Page
/// \param box Box
void setPageBleedBox(PDFObjectReference page,
QRectF box);
/// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped
/// to this region, graphics outside of clipping box will not be printed. Default value is same, as media
/// box.
/// Sets crop box to the page. Crop box defines clipping region of the page. Page contents are clipped to
/// this region, graphics outside of clipping box will not be printed. Default value is same, as media box.
/// \param page Page
/// \param box Box
void setPageCropBox(PDFObjectReference page,
@@ -1518,8 +1516,8 @@ public:
PDFObjectReference documentPart);
/// Sets media box to the page. The media box defines size of physical medium, onto which the page
/// is to be printed.
/// Sets media box to the page. The media box defines size of physical medium, onto which the page is to be
/// printed.
/// \param page Page
/// \param box Box
void setPageMediaBox(PDFObjectReference page,
@@ -1561,12 +1559,17 @@ public:
QString reasonText);
/// This function is used to update trailer dictionary. Must be called each time the final document is
/// being built.
/// This function is used to update trailer dictionary. Must be called each time the final document is being
/// built.
/// \param objectCount Number of objects (including empty ones)
void updateTrailerDictionary(PDFInteger objectCount);
///
/// \param pageReference Removes page thumbnail.
void removePageThumbnail(PDFObjectReference pageReference);
/* END GENERATED CODE */
private:

View File

@@ -0,0 +1,306 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "pdfdocumentsanitizer.h"
#include "pdfvisitor.h"
#include "pdfexecutionpolicy.h"
#include "pdfoptimizer.h"
#include "pdfdocumentbuilder.h"
namespace pdf
{
class PDFRemoveMetadataVisitor : public PDFUpdateObjectVisitor
{
public:
explicit PDFRemoveMetadataVisitor(const PDFObjectStorage* storage, std::atomic<PDFInteger>* counter) :
PDFUpdateObjectVisitor(storage),
m_counter(counter)
{
}
virtual void visitDictionary(const PDFDictionary* dictionary) override;
private:
std::atomic<PDFInteger>* m_counter;
};
void PDFRemoveMetadataVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
if (dictionary->getKey(i) != "Metadata")
{
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
}
else
{
++*m_counter;
}
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
PDFDocumentSanitizer::PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent) :
QObject(parent),
m_flags(flags)
{
}
void PDFDocumentSanitizer::sanitize()
{
Q_EMIT sanitizationStarted();
if (m_flags.testFlag(DocumentInfo))
{
performSanitizeDocumentInfo();
}
if (m_flags.testFlag(Metadata))
{
performSanitizeMetadata();
}
if (m_flags.testFlag(Bookmarks))
{
performSanitizeBookmarks();
}
if (m_flags.testFlag(FileAttachments))
{
performSanitizeFileAttachments();
}
if (m_flags.testFlag(EmbeddedSearchIndex))
{
performSanitizeEmbeddedSearchIndex();
}
if (m_flags.testFlag(MarkupAnnotations))
{
performSanitizeMarkupAnnotations();
}
if (m_flags.testFlag(PageThumbnails))
{
performSanitizePageThumbnails();
}
// Optimize - remove unused objects
PDFOptimizer optimizer(PDFOptimizer::OptimizationFlags(PDFOptimizer::RemoveUnusedObjects | PDFOptimizer::ShrinkObjectStorage | PDFOptimizer::RemoveNullObjects), nullptr);
optimizer.setStorage(m_storage);
optimizer.optimize();
m_storage = optimizer.takeStorage();
Q_EMIT sanitizationFinished();
}
PDFDocumentSanitizer::SanitizationFlags PDFDocumentSanitizer::getFlags() const
{
return m_flags;
}
void PDFDocumentSanitizer::setFlags(SanitizationFlags flags)
{
m_flags = flags;
}
void PDFDocumentSanitizer::performSanitizeDocumentInfo()
{
PDFObjectReference emptyDocumentInfoReference = m_storage.addObject(PDFObject());
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
const bool hasDocumentInfo = builder.getDocumentInfo().isValid();
builder.setDocumentInfo(emptyDocumentInfoReference);
PDFDocument document = builder.build();
m_storage = document.getStorage();
if (hasDocumentInfo)
{
Q_EMIT sanitizationProgress(tr("Document info was removed."));
}
}
void PDFDocumentSanitizer::performSanitizeMetadata()
{
std::atomic<PDFInteger> counter = 0;
PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
auto processEntry = [this, &counter](PDFObjectStorage::Entry& entry)
{
PDFRemoveMetadataVisitor visitor(&m_storage, &counter);
entry.object.accept(&visitor);
entry.object = visitor.getObject();
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry);
m_storage.setObjects(qMove(objects));
Q_EMIT sanitizationProgress(tr("Metadata streams removed: %1").arg(counter));
}
void PDFDocumentSanitizer::performSanitizeBookmarks()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
const bool hasOutline = catalogDictionary && catalogDictionary->hasKey("Outlines");
if (hasOutline)
{
builder.removeOutline();
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Outline was removed."));
}
}
void PDFDocumentSanitizer::performSanitizeFileAttachments()
{
auto filter = [](const PDFAnnotation* annotation)
{
return annotation->getType() == AnnotationType::FileAttachment;
};
removeAnnotations(filter, tr("File attachments removed: %1."));
}
void PDFDocumentSanitizer::performSanitizeEmbeddedSearchIndex()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
PDFObject catalogObject = builder.getObjectByReference(builder.getCatalogReference());
const PDFDictionary* catalogDictionary = builder.getDictionaryFromObject(catalogObject);
const bool hasPieceInfo = catalogDictionary && catalogDictionary->hasKey("PieceInfo");
if (hasPieceInfo)
{
PDFObject pieceInfoObject = builder.getObject(catalogDictionary->get("PieceInfo"));
const PDFDictionary* pieceInfoDictionary = builder.getDictionaryFromObject(pieceInfoObject);
if (pieceInfoDictionary->hasKey("SearchIndex"))
{
PDFDictionary dictionaryCopy = *pieceInfoDictionary;
dictionaryCopy.setEntry(PDFInplaceOrMemoryString("SearchIndex"), PDFObject());
pieceInfoObject = PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(dictionaryCopy)));
PDFObjectFactory factory;
factory.beginDictionary();
factory.beginDictionaryItem("PieceInfo");
factory << pieceInfoObject;
factory.endDictionaryItem();
factory.endDictionary();
PDFObject newCatalog = factory.takeObject();
builder.mergeTo(builder.getCatalogReference(), std::move(newCatalog));
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Search index was removed."));
}
}
}
void PDFDocumentSanitizer::performSanitizeMarkupAnnotations()
{
auto filter = [](const PDFAnnotation* annotation)
{
return annotation->asMarkupAnnotation() != nullptr;
};
removeAnnotations(filter, tr("Markup annotations removed: %1."));
}
void PDFDocumentSanitizer::performSanitizePageThumbnails()
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
builder.flattenPageTree();
std::vector<PDFObjectReference> pageReferences = builder.getPages();
std::vector<PDFObjectReference> pagesWithThumbnail;
for (const PDFObjectReference& pageReference : pageReferences)
{
const PDFDictionary* pageDictionary = builder.getDictionaryFromObject(builder.getObjectByReference(pageReference));
if (pageDictionary && pageDictionary->hasKey("Thumb"))
{
pagesWithThumbnail.push_back(pageReference);
}
}
if (!pagesWithThumbnail.empty())
{
for (const auto& pageReference : pagesWithThumbnail)
{
builder.removePageThumbnail(pageReference);
}
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(tr("Page thumbnails removed: %1.").arg(pagesWithThumbnail.size()));
}
}
void PDFDocumentSanitizer::removeAnnotations(const std::function<bool (const PDFAnnotation*)>& filter,
QString message)
{
PDFDocumentBuilder builder(m_storage, PDFVersion(2, 0));
builder.flattenPageTree();
std::vector<PDFObjectReference> pageReferences = builder.getPages();
std::vector<std::pair<PDFObjectReference, PDFObjectReference>> annotationsToBeRemoved;
PDFDocumentDataLoaderDecorator loader(&m_storage);
for (const PDFObjectReference pageReference : pageReferences)
{
const PDFObject& pageObject = m_storage.getObjectByReference(pageReference);
const PDFDictionary* pageDictionary = m_storage.getDictionaryFromObject(pageObject);
if (!pageDictionary)
{
continue;
}
std::vector<PDFObjectReference> annotationReferences = loader.readReferenceArrayFromDictionary(pageDictionary, "Annots");
for (const PDFObjectReference& annotationReference : annotationReferences)
{
PDFAnnotationPtr annotation = PDFAnnotation::parse(&m_storage, annotationReference);
if (filter(annotation.get()))
{
annotationsToBeRemoved.emplace_back(pageReference, annotationReference);
}
}
}
if (!annotationsToBeRemoved.empty())
{
for (const auto& item : annotationsToBeRemoved)
{
const PDFObjectReference pageReference = item.first;
const PDFObjectReference annotationReference = item.second;
builder.removeAnnotation(pageReference, annotationReference);
}
PDFDocument document = builder.build();
m_storage = document.getStorage();
Q_EMIT sanitizationProgress(message.arg(annotationsToBeRemoved.size()));
}
}
} // namespace pdf

View File

@@ -0,0 +1,99 @@
// Copyright (C) 2023 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFDOCUMENTSANITIZER_H
#define PDFDOCUMENTSANITIZER_H
#include "pdfdocument.h"
namespace pdf
{
class PDFAnnotation;
/// Class for sanitizing documents. Can remove sensitive content from the document,
/// except the content streams. Sanitization is configurable, user can specify,
/// which content should be removed.
class PDF4QTLIBSHARED_EXPORT PDFDocumentSanitizer : public QObject
{
Q_OBJECT
public:
enum SanitizationFlag
{
None = 0x0000, ///< No sanitization is performed
DocumentInfo = 0x0001, ///< Remove document information
Metadata = 0x0002, ///< Remove all metadata streams in all objects
Bookmarks = 0x0004, ///< Remove bookmarks
FileAttachments = 0x0008, ///< Remove file attachments
EmbeddedSearchIndex = 0x0010, ///< Remove embedded search index
MarkupAnnotations = 0x0020, ///< Remove markup annotations from all pages
PageThumbnails = 0x0040, ///< Remove page thumbnails
All = 0xFFFF, ///< All sanitization turned on
};
Q_DECLARE_FLAGS(SanitizationFlags, SanitizationFlag)
explicit PDFDocumentSanitizer(SanitizationFlag flags, QObject* parent);
/// Set document, which should be sanitized
/// \param document Document to be sanitized
void setDocument(const PDFDocument* document) { setStorage(document->getStorage()); }
/// Set storage directly (storage must be valid and filled with objects)
/// \param storage Storage
void setStorage(const PDFObjectStorage& storage) { m_storage = storage; }
/// Perform document sanitization. During optimization process, various
/// signals are emitted to view progress.
void sanitize();
/// Returns object storage used for optimization
const PDFObjectStorage& getStorage() const { return m_storage; }
/// Returns object storage by move semantics, old object storage is destroyed
PDFObjectStorage takeStorage() { return qMove(m_storage); }
/// Returns sanitized document. Object storage is cleared after
/// this function call.
PDFDocument takeSanitizedDocument() { return PDFDocument(qMove(m_storage), PDFVersion(2, 0)); }
SanitizationFlags getFlags() const;
void setFlags(SanitizationFlags flags);
signals:
void sanitizationStarted();
void sanitizationProgress(QString progressText);
void sanitizationFinished();
private:
void performSanitizeDocumentInfo();
void performSanitizeMetadata();
void performSanitizeBookmarks();
void performSanitizeFileAttachments();
void performSanitizeEmbeddedSearchIndex();
void performSanitizeMarkupAnnotations();
void performSanitizePageThumbnails();
void removeAnnotations(const std::function<bool(const PDFAnnotation*)>& filter, QString message);
SanitizationFlags m_flags;
PDFObjectStorage m_storage;
};
} // namespace pdf
#endif // PDFDOCUMENTSANITIZER_H

View File

@@ -28,120 +28,6 @@
namespace pdf
{
class PDFUpdateObjectVisitor : public PDFAbstractVisitor
{
public:
explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
m_storage(storage)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
protected:
const PDFObjectStorage* m_storage;
std::vector<PDFObject> m_objectStack;
};
void PDFUpdateObjectVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFUpdateObjectVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFUpdateObjectVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
{
const PDFDictionary* dictionary = stream->getDictionary();
visitDictionary(dictionary);
Q_ASSERT(!m_objectStack.empty());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
PDFDictionary newDictionary(*dictionaryObject.getDictionary());
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(newDictionary), QByteArray(*stream->getContent()))));
}
void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(reference));
}
PDFObject PDFUpdateObjectVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
class PDFRemoveSimpleObjectsVisitor : public PDFUpdateObjectVisitor
{
public:

View File

@@ -192,4 +192,91 @@ void PDFStatisticsCollector::collectStatisticsOfSimpleObject(PDFObject::Type typ
statistics.memoryConsumptionEstimate += sizeof(PDFObject);
}
void PDFUpdateObjectVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFUpdateObjectVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFUpdateObjectVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFUpdateObjectVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFUpdateObjectVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFUpdateObjectVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFUpdateObjectVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFUpdateObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
Q_ASSERT(!m_objectStack.empty());
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFUpdateObjectVisitor::visitStream(const PDFStream* stream)
{
const PDFDictionary* dictionary = stream->getDictionary();
visitDictionary(dictionary);
Q_ASSERT(!m_objectStack.empty());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
PDFDictionary newDictionary(*dictionaryObject.getDictionary());
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(newDictionary), QByteArray(*stream->getContent()))));
}
void PDFUpdateObjectVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(reference));
}
PDFObject PDFUpdateObjectVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
} // namespace pdf

View File

@@ -206,6 +206,33 @@ struct PDFApplyVisitorImpl<Visitor, PDFAbstractVisitor::Strategy::Sequential>
}
};
class PDFUpdateObjectVisitor : public PDFAbstractVisitor
{
public:
explicit inline PDFUpdateObjectVisitor(const PDFObjectStorage* storage) :
m_storage(storage)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
protected:
const PDFObjectStorage* m_storage;
std::vector<PDFObject> m_objectStack;
};
} // namespace pdf
#endif // PDFVISITOR_H