Merge remote-tracking branch 'remotes/origin/branch/pdfdiff-REBASED'

This commit is contained in:
Jakub Melka
2021-10-18 18:50:04 +02:00
71 changed files with 9508 additions and 64 deletions

View File

@@ -44,11 +44,13 @@ DESTDIR = $$OUT_PWD/..
SOURCES += \
sources/pdfaction.cpp \
sources/pdfadvancedtools.cpp \
sources/pdfalgorithmlcs.cpp \
sources/pdfannotation.cpp \
sources/pdfblendfunction.cpp \
sources/pdfccittfaxdecoder.cpp \
sources/pdfcms.cpp \
sources/pdfcompiler.cpp \
sources/pdfdiff.cpp \
sources/pdfdocumentbuilder.cpp \
sources/pdfdocumentmanipulator.cpp \
sources/pdfdocumenttextflow.cpp \
@@ -110,11 +112,13 @@ SOURCES += \
HEADERS += \
sources/pdfaction.h \
sources/pdfadvancedtools.h \
sources/pdfalgorithmlcs.h \
sources/pdfannotation.h \
sources/pdfblendfunction.h \
sources/pdfccittfaxdecoder.h \
sources/pdfcms.h \
sources/pdfcompiler.h \
sources/pdfdiff.h \
sources/pdfdocumentbuilder.h \
sources/pdfdocumentdrawinterface.h \
sources/pdfdocumentmanipulator.h \

View File

@@ -0,0 +1,177 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "pdfalgorithmlcs.h"
namespace pdf
{
void PDFAlgorithmLongestCommonSubsequenceBase::markSequence(Sequence& sequence,
const std::vector<size_t>& movedItemsLeft,
const std::vector<size_t>& movedItemsRight)
{
Sequence updatedSequence;
Q_ASSERT(std::is_sorted(movedItemsLeft.cbegin(), movedItemsLeft.cend()));
Q_ASSERT(std::is_sorted(movedItemsRight.cbegin(), movedItemsRight.cend()));
for (auto it = sequence.cbegin(); it != sequence.cend();)
{
if (it->isMatch())
{
updatedSequence.push_back(*it);
++it;
continue;
}
Sequence leftItems;
Sequence rightItems;
for (; it != sequence.cend() && !it->isMatch(); ++it)
{
const SequenceItem& currentItem = *it;
Q_ASSERT(currentItem.isLeft() || currentItem.isRight());
if (currentItem.isLeft())
{
if (std::binary_search(movedItemsLeft.cbegin(), movedItemsLeft.cend(), currentItem.index1))
{
SequenceItem item = *it;
item.markMovedLeft();
updatedSequence.push_back(item);
}
else
{
leftItems.push_back(currentItem);
}
}
if (currentItem.isRight())
{
if (std::binary_search(movedItemsRight.cbegin(), movedItemsRight.cend(), currentItem.index2))
{
SequenceItem item = *it;
item.markMovedRight();
updatedSequence.push_back(item);
}
else
{
rightItems.push_back(currentItem);
}
}
}
std::reverse(leftItems.begin(), leftItems.end());
std::reverse(rightItems.begin(), rightItems.end());
bool isReplaced = !leftItems.empty() && !rightItems.empty();
while (!leftItems.empty() && !rightItems.empty())
{
SequenceItem item;
item.index1 = leftItems.back().index1;
item.index2 = rightItems.back().index2;
item.markReplaced();
updatedSequence.push_back(item);
leftItems.pop_back();
rightItems.pop_back();
}
while (!leftItems.empty())
{
SequenceItem item = leftItems.back();
item.markRemoved();
if (isReplaced)
{
item.markReplaced();
}
updatedSequence.push_back(item);
leftItems.pop_back();
}
while (!rightItems.empty())
{
SequenceItem item = rightItems.back();
item.markAdded();
if (isReplaced)
{
item.markReplaced();
}
updatedSequence.push_back(item);
rightItems.pop_back();
}
}
for (SequenceItem& item : updatedSequence)
{
if (item.isMatch() && !item.isRemoved() && !item.isReplaced() && !item.isAdded() && item.index1 != item.index2)
{
item.markMoved();
}
}
sequence = qMove(updatedSequence);
}
PDFAlgorithmLongestCommonSubsequenceBase::SequenceItemRanges PDFAlgorithmLongestCommonSubsequenceBase::getModifiedRanges(Sequence& sequence)
{
SequenceItemRanges result;
for (auto it = sequence.begin(); it != sequence.end();)
{
const SequenceItem& item = *it;
if (!item.isModified())
{
++it;
continue;
}
// Jakub Melka: now, we have iterator pointing on item,
// which has been modified. We will search for modification
// range.
auto itEnd = it;
while (itEnd != sequence.end() && itEnd->isModified())
{
++itEnd;
}
result.emplace_back(it, itEnd);
it = itEnd;
}
return result;
}
PDFAlgorithmLongestCommonSubsequenceBase::SequenceItemFlags PDFAlgorithmLongestCommonSubsequenceBase::collectFlags(const SequenceItemRange& range)
{
SequenceItemFlags flags = 0;
for (auto it = range.first; it != range.second; ++it)
{
flags |= it->flags;
}
return flags;
}
} // namespace pdf

View File

@@ -0,0 +1,260 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFALGORITHMLCS_H
#define PDFALGORITHMLCS_H
#include "pdfglobal.h"
namespace pdf
{
class PDFAlgorithmLongestCommonSubsequenceBase
{
public:
enum SequenceItemFlag
{
None = 0x0000,
MovedLeft = 0x0001, ///< Item has been moved from this position (is present in a sequence no. 1)
MovedRight = 0x0002, ///< Item has been moved to this position (is present in a sequence no. 2)
Moved = 0x0004, ///< Index of item has been changed
Added = 0x0008, ///< Item has been added to a sequence no. 2
Removed = 0x0010, ///< Item has been removed from a sequence no. 1
Replaced = 0x0020, ///< Item has been replaced (or sequence of items has been replaced)
};
Q_DECLARE_FLAGS(SequenceItemFlags, SequenceItemFlag)
struct SequenceItem
{
size_t index1 = std::numeric_limits<size_t>::max();
size_t index2 = std::numeric_limits<size_t>::max();
SequenceItemFlags flags = None;
bool isLeftValid() const { return index1 != std::numeric_limits<size_t>::max(); }
bool isRightValid() const { return index2 != std::numeric_limits<size_t>::max(); }
bool isLeft() const { return isLeftValid() && !isRightValid(); }
bool isRight() const { return isRightValid() && !isLeftValid(); }
bool isMatch() const { return isLeftValid() && isRightValid(); }
bool isMovedLeft() const { return flags.testFlag(MovedLeft); }
bool isMovedRight() const { return flags.testFlag(MovedRight); }
bool isMoved() const { return flags.testFlag(Moved); }
bool isAdded() const { return flags.testFlag(Added); }
bool isRemoved() const { return flags.testFlag(Removed); }
bool isReplaced() const { return flags.testFlag(Replaced); }
bool isModified() const { return isAdded() || isRemoved() || isReplaced(); }
void markMovedLeft() { flags.setFlag(MovedLeft); }
void markMovedRight() { flags.setFlag(MovedRight); }
void markMoved() { flags.setFlag(Moved); }
void markAdded() { flags.setFlag(Added); }
void markRemoved() { flags.setFlag(Removed); }
void markReplaced() { flags.setFlag(Replaced); }
};
using Sequence = typename std::vector<SequenceItem>;
using SequenceIterator = typename Sequence::iterator;
using SequenceItemRange = typename std::pair<SequenceIterator, SequenceIterator>;
using SequenceItemRanges = typename std::vector<SequenceItemRange>;
/// Marks a sequence with set of flags representing added/removed/replaced/moved
/// items. Moved items sequences must be sorted.
/// \param sequence Sequence to be marked
/// \param movedItemsLeft Sorted sequence of left indices, which have been moved
/// \param movedItemsRight sorted sequence of right indices, which have been moved
static void markSequence(Sequence& sequence,
const std::vector<size_t>& movedItemsLeft,
const std::vector<size_t>& movedItemsRight);
/// Returns item ranges, which should be checked - for example,
/// for text modification.
/// \param sequence Sequence
static SequenceItemRanges getModifiedRanges(Sequence& sequence);
/// Collect flags from given item range
/// \param range Range
static SequenceItemFlags collectFlags(const SequenceItemRange& range);
};
/// Algorithm for computing longest common subsequence, on two sequences
/// of objects, which are implementing operator "==" (equal operator).
/// Constructor takes bidirectional iterators to the sequence. So, iterators
/// are requred to be bidirectional.
template<typename Iterator, typename Comparator>
class PDFAlgorithmLongestCommonSubsequence : public PDFAlgorithmLongestCommonSubsequenceBase
{
public:
PDFAlgorithmLongestCommonSubsequence(Iterator it1,
Iterator it1End,
Iterator it2,
Iterator it2End,
Comparator comparator);
void perform();
const Sequence& getSequence() const { return m_sequence; }
private:
Iterator m_it1;
Iterator m_it1End;
Iterator m_it2;
Iterator m_it2End;
size_t m_size1;
size_t m_size2;
size_t m_matrixSize;
Comparator m_comparator;
std::vector<bool> m_backtrackData;
Sequence m_sequence;
};
template<typename Iterator, typename Comparator>
PDFAlgorithmLongestCommonSubsequence<Iterator, Comparator>::PDFAlgorithmLongestCommonSubsequence(Iterator it1,
Iterator it1End,
Iterator it2,
Iterator it2End,
Comparator comparator) :
m_it1(std::move(it1)),
m_it1End(std::move(it1End)),
m_it2(std::move(it2)),
m_it2End(std::move(it2End)),
m_size1(0),
m_size2(0),
m_matrixSize(0),
m_comparator(std::move(comparator))
{
m_size1 = std::distance(m_it1, m_it1End) + 1;
m_size2 = std::distance(m_it2, m_it2End) + 1;
m_matrixSize = m_size1 * m_size2;
}
template<typename Iterator, typename Comparator>
void PDFAlgorithmLongestCommonSubsequence<Iterator, Comparator>::perform()
{
m_backtrackData.resize(m_matrixSize);
m_sequence.clear();
std::vector<size_t> rowTop(m_size1, size_t());
std::vector<size_t> rowBottom(m_size1, size_t());
// Jakub Melka: we will have columns consisting of it1...it1End
// and rows consisting of it2...it2End. We iterate trough rows,
// and for each row, we update longest common subsequence data.
auto it2 = m_it2;
for (size_t i2 = 1; i2 < m_size2; ++i2, ++it2)
{
auto it1 = m_it1;
for (size_t i1 = 1; i1 < m_size1; ++i1, ++it1)
{
if (m_comparator(*it1, *it2))
{
// We have match
rowBottom[i1] = rowTop[i1 - 1] + 1;
}
else
{
const size_t leftCellValue = rowBottom[i1 - 1];
const size_t upperCellValue = rowTop[i1];
bool isLeftBigger = leftCellValue > upperCellValue;
if (isLeftBigger)
{
rowBottom[i1] = leftCellValue;
m_backtrackData[i2 * m_size1 + i1] = true;
}
else
{
rowBottom[i1] = upperCellValue;
m_backtrackData[i2 * m_size1 + i1] = false;
}
}
}
// Bottom row will become top row
std::swap(rowTop, rowBottom);
}
size_t i1 = m_size1 - 1;
size_t i2 = m_size2 - 1;
while (i1 > 0 && i2 > 0)
{
SequenceItem item;
const size_t index1 = i1 - 1;
const size_t index2 = i2 - 1;
auto it1 = std::next(m_it1, index1);
auto it2 = std::next(m_it2, index2);
if (m_comparator(*it1, *it2))
{
item.index1 = index1;
item.index2 = index2;
--i1;
--i2;
}
else
{
if (m_backtrackData[i2 * m_size1 + i1])
{
item.index1 = index1;
--i1;
}
else
{
item.index2 = index2;
--i2;
}
}
m_sequence.push_back(item);
}
while (i1 > 0)
{
SequenceItem item;
const size_t index1 = i1 - 1;
item.index1 = index1;
--i1;
m_sequence.push_back(item);
}
while (i2 > 0)
{
SequenceItem item;
const size_t index2 = i2 - 1;
item.index2 = index2;
--i2;
m_sequence.push_back(item);
}
std::reverse(m_sequence.begin(), m_sequence.end());
}
} // namespace pdf
#endif // PDFALGORITHMLCS_H

View File

@@ -39,12 +39,13 @@ class PDFDocument;
/// to be used in viewer application.
enum class PageLayout
{
SinglePage, ///< Display one page at time (single page on screen)
OneColumn, ///< Displays pages in one column (continuous mode)
TwoColumnLeft, ///< Display pages in two continuous columns, odd numbered pages are on the left
TwoColumnRight, ///< Display pages in two continuous columns, even numbered pages are on the left
TwoPagesLeft, ///< Display two pages on the screen, odd numbered pages are on the left
TwoPagesRight ///< Display two pages on the screen, even numbered pages are on the left
SinglePage, ///< Display one page at time (single page on screen)
OneColumn, ///< Displays pages in one column (continuous mode)
TwoColumnLeft, ///< Display pages in two continuous columns, odd numbered pages are on the left
TwoColumnRight, ///< Display pages in two continuous columns, even numbered pages are on the left
TwoPagesLeft, ///< Display two pages on the screen, odd numbered pages are on the left
TwoPagesRight, ///< Display two pages on the screen, even numbered pages are on the left
Custom ///< Custom layout, multiple columns can be used, -1 as page index means page is omitted
};
/// Specifies, how the document should be displayed in the viewer application.

File diff suppressed because it is too large Load Diff

408
Pdf4QtLib/sources/pdfdiff.h Normal file
View File

@@ -0,0 +1,408 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFDIFF_H
#define PDFDIFF_H
#include "pdfdocument.h"
#include "pdfprogress.h"
#include "pdfutils.h"
#include "pdfalgorithmlcs.h"
#include "pdfdocumenttextflow.h"
#include <QObject>
#include <QFuture>
#include <QFutureWatcher>
#include <atomic>
class QIODevice;
class QXmlStreamWriter;
namespace pdf
{
struct PDFDiffPageContext;
class PDF4QTLIBSHARED_EXPORT PDFDiffResult
{
public:
explicit PDFDiffResult();
enum class Type : uint32_t
{
Invalid = 0x0000,
PageMoved = 0x0001,
PageAdded = 0x0002,
PageRemoved = 0x0004,
RemovedTextCharContent = 0x0008,
RemovedVectorGraphicContent = 0x0010,
RemovedImageContent = 0x0020,
RemovedShadingContent = 0x0040,
AddedTextCharContent = 0x0080,
AddedVectorGraphicContent = 0x0100,
AddedImageContent = 0x0200,
AddedShadingContent = 0x0400,
TextReplaced = 0x0800,
TextAdded = 0x1000,
TextRemoved = 0x2000,
};
struct PageSequenceItem
{
PDFInteger leftPage = -1;
PDFInteger rightPage = -1;
};
using PageSequence = std::vector<PageSequenceItem>;
using RectInfos = std::vector<std::pair<PDFInteger, QRectF>>;
using RectInfosIt = typename RectInfos::const_iterator;
void setResult(PDFOperationResult result) { m_result = std::move(result); }
const PDFOperationResult& getResult() const { return m_result; }
/// Returns true, if some difference was found
bool isChanged() const { return getDifferencesCount() > 0; }
/// Returns true, if no difference was found
bool isSame() const { return !isChanged(); }
/// Returns number of detected changes
size_t getDifferencesCount() const { return m_differences.size(); }
/// Returns message describing difference in a page content
/// \param index Index
QString getMessage(size_t index) const;
/// Returns index of left page (or -1, if difference occured
/// only on a right page)
/// \param index Index
PDFInteger getLeftPage(size_t index) const;
/// Returns index of right page (or -1, if difference occured
/// only on a left page)
/// \param index Index
PDFInteger getRightPage(size_t index) const;
/// Return type of difference
/// \param index Index
Type getType(size_t index) const;
/// Returns text description of type
/// \param index Index
QString getTypeDescription(size_t index) const;
/// Returns iterator range for rectangles of "left" pages of an item
std::pair<RectInfosIt, RectInfosIt> getLeftRectangles(size_t index) const;
/// Returns iterator range for rectangles of "right" pages of an item
std::pair<RectInfosIt, RectInfosIt> getRightRectangles(size_t index) const;
bool isPageMoveAddRemoveDifference(size_t index) const;
bool isPageMoveDifference(size_t index) const;
bool isAddDifference(size_t index) const;
bool isRemoveDifference(size_t index) const;
bool isReplaceDifference(size_t index) const;
bool hasPageMoveDifferences() const { return m_typeFlags & FLAGS_PAGE_MOVE; }
bool hasTextDifferences() const { return m_typeFlags & FLAGS_TEXT; }
bool hasVectorGraphicsDifferences() const { return m_typeFlags & FLAGS_VECTOR_GRAPHICS; }
bool hasImageDifferences() const { return m_typeFlags & FLAGS_IMAGE; }
bool hasShadingDifferences() const { return m_typeFlags & FLAGS_SHADING; }
/// Returns sorted changed page indices from left document
std::vector<PDFInteger> getChangedLeftPageIndices() const;
/// Returns sorted changed page indices from right document
std::vector<PDFInteger> getChangedRightPageIndices() const;
/// Filters results using given critera
/// \param filterPageMoveDifferences Filter page move differences?
/// \param filterTextDifferences Filter text diffferences?
/// \param filterVectorGraphicsDifferences Filter vector graphics differences?
/// \param filterImageDifferences Filter image differences?
/// \param filterShadingDifferences Filter shading differences?
PDFDiffResult filter(bool filterPageMoveDifferences,
bool filterTextDifferences,
bool filterVectorGraphicsDifferences,
bool filterImageDifferences,
bool filterShadingDifferences);
const PageSequence& getPageSequence() const;
void setPageSequence(PageSequence pageSequence);
/// Saves all differences to a XML stream
/// represented by device
/// \param device Output device
void saveToXML(QIODevice* device) const;
/// Saves all differences to a byte array
/// \param byteArray Output byte array
void saveToXML(QByteArray* byteArray) const;
/// Saves all differences to a string
/// \param string Output string
void saveToXML(QString* string) const;
private:
friend class PDFDiff;
static constexpr uint32_t FLAGS_PAGE_MOVE = uint32_t(Type::PageMoved) | uint32_t(Type::PageAdded) | uint32_t(Type::PageRemoved);
static constexpr uint32_t FLAGS_TEXT = uint32_t(Type::RemovedTextCharContent) | uint32_t(Type::AddedTextCharContent) | uint32_t(Type::TextReplaced) | uint32_t(Type::TextAdded) | uint32_t(Type::TextRemoved);
static constexpr uint32_t FLAGS_VECTOR_GRAPHICS = uint32_t(Type::RemovedVectorGraphicContent) | uint32_t(Type::AddedVectorGraphicContent);
static constexpr uint32_t FLAGS_IMAGE = uint32_t(Type::RemovedImageContent) | uint32_t(Type::AddedImageContent);
static constexpr uint32_t FLAGS_SHADING = uint32_t(Type::RemovedShadingContent) | uint32_t(Type::AddedShadingContent);
static constexpr uint32_t FLAGS_TYPE_PAGE_MOVE = uint32_t(Type::PageMoved);
static constexpr uint32_t FLAGS_TYPE_PAGE_MOVE_ADD_REMOVE = uint32_t(Type::PageMoved) | uint32_t(Type::PageAdded) | uint32_t(Type::PageRemoved);
static constexpr uint32_t FLAGS_TYPE_ADD = uint32_t(Type::PageAdded) | uint32_t(Type::AddedTextCharContent) | uint32_t(Type::AddedVectorGraphicContent) | uint32_t(Type::AddedImageContent) | uint32_t(Type::AddedShadingContent) | uint32_t(Type::TextAdded);
static constexpr uint32_t FLAGS_TYPE_REMOVE = uint32_t(Type::PageRemoved) | uint32_t(Type::RemovedTextCharContent) | uint32_t(Type::RemovedVectorGraphicContent) | uint32_t(Type::RemovedImageContent) | uint32_t(Type::RemovedShadingContent) | uint32_t(Type::TextRemoved);
static constexpr uint32_t FLAGS_TYPE_REPLACE = uint32_t(Type::TextReplaced);
void addPageMoved(PDFInteger pageIndex1, PDFInteger pageIndex2);
void addPageAdded(PDFInteger pageIndex);
void addPageRemoved(PDFInteger pageIndex);
void addRemovedTextCharContent(PDFInteger pageIndex, QRectF rect);
void addRemovedVectorGraphicContent(PDFInteger pageIndex, QRectF rect);
void addRemovedImageContent(PDFInteger pageIndex, QRectF rect);
void addRemovedShadingContent(PDFInteger pageIndex, QRectF rect);
void addAddedTextCharContent(PDFInteger pageIndex, QRectF rect);
void addAddedVectorGraphicContent(PDFInteger pageIndex, QRectF rect);
void addAddedImageContent(PDFInteger pageIndex, QRectF rect);
void addAddedShadingContent(PDFInteger pageIndex, QRectF rect);
void addTextAdded(PDFInteger pageIndex, QString text, const RectInfos& rectInfos);
void addTextRemoved(PDFInteger pageIndex, QString text, const RectInfos& rectInfos);
void addTextReplaced(PDFInteger pageIndex1,
PDFInteger pageIndex2,
QString textRemoved,
QString textAdded,
const RectInfos& rectInfos1,
const RectInfos& rectInfos2);
void saveToStream(QXmlStreamWriter* stream) const;
void finalize();
uint32_t getTypeFlags(size_t index) const;
/// Single content difference descriptor. It describes type
/// of difference (such as graphics, image, text change) on a page
/// or on a list of multiple pages.
struct Difference
{
Type type = Type::Invalid;
PDFInteger pageIndex1 = -1;
PDFInteger pageIndex2 = -1;
size_t leftRectIndex = 0;
size_t leftRectCount = 0;
size_t rightRectIndex = 0;
size_t rightRectCount = 0;
int textAddedIndex = -1;
int textRemovedIndex = -1;
};
using Differences = std::vector<Difference>;
void addLeftItem(Type type, PDFInteger pageIndex, QRectF rect);
void addRightItem(Type type, PDFInteger pageIndex, QRectF rect);
void addRectLeft(Difference& difference, QRectF rect);
void addRectRight(Difference& difference, QRectF rect);
Differences m_differences;
RectInfos m_rects; ///< Rectangles with page indices
PDFOperationResult m_result;
QStringList m_strings;
uint32_t m_typeFlags = 0;
PageSequence m_pageSequence;
};
/// Class for result navigation, can go to next, or previous result.
class PDF4QTLIBSHARED_EXPORT PDFDiffResultNavigator : public QObject
{
Q_OBJECT
public:
explicit PDFDiffResultNavigator(QObject* parent);
virtual ~PDFDiffResultNavigator() override;
void setResult(const PDFDiffResult* diffResult);
/// Returns true, if valid result is selected
bool isSelected() const;
/// Returns true if action go to next result can be performed,
/// otherwise false is returned.
bool canGoNext() const;
/// Returns true if action go to previous result can be performed,
/// otherwise false is returned.
bool canGoPrevious() const;
/// Goes to next result. If action cannot be performed,
/// nothing happens and signal is not emitted.
void goNext();
/// Goes to previous result. If action cannot be performed,
/// nothing happens and signal is not emitted.
void goPrevious();
/// Updates selection, if difference result was changed
void update();
/// Selects current index
/// \param currentIndex
void select(size_t currentIndex);
signals:
void selectionChanged(size_t currentIndex);
private:
size_t getLimit() const { return m_diffResult ? m_diffResult->getDifferencesCount() : 0; }
const PDFDiffResult* m_diffResult;
size_t m_currentIndex;
};
/// Diff engine for comparing two pdf documents.
class PDF4QTLIBSHARED_EXPORT PDFDiff : public QObject
{
Q_OBJECT
private:
using BaseClass = QObject;
public:
explicit PDFDiff(QObject* parent);
virtual ~PDFDiff() override;
enum Option
{
None = 0x0000,
Asynchronous = 0x0001, ///< Compare document asynchronously
PC_Text = 0x0002, ///< Use text to compare pages (determine, which pages correspond to each other)
PC_VectorGraphics = 0x0004, ///< Use vector graphics to compare pages (determine, which pages correspond to each other)
PC_Images = 0x0008, ///< Use images to compare pages (determine, which pages correspond to each other)
PC_Mesh = 0x0010, ///< Use mesh to compare pages (determine, which pages correspond to each other)
CompareTextsAsVector = 0x0020, ///< Compare texts as vector graphics
CompareWords = 0x0040, ///< Compare words, not just characters
};
Q_DECLARE_FLAGS(Options, Option)
/// Source document (left)
/// \param leftDocument Document
void setLeftDocument(const PDFDocument* leftDocument);
/// Source document (right)(
/// \param rightDocument Document
void setRightDocument(const PDFDocument* rightDocument);
/// Source pages to be compared (left document)
/// \param pagesForLeftDocument Page indices
void setPagesForLeftDocument(PDFClosedIntervalSet pagesForLeftDocument);
/// Source pages to be compared (right document)
/// \param pagesForRightDocument Page indices
void setPagesForRightDocument(PDFClosedIntervalSet pagesForRightDocument);
/// Sets progress object
/// \param progress Progress object
void setProgress(PDFProgress* progress) { m_progress = progress; }
/// Enables or disables comparator engine option
/// \param option Option
/// \param enable Enable or disable option?
void setOption(Option option, bool enable) { m_options.setFlag(option, enable); }
/// Starts comparator engine. If asynchronous engine option
/// is enabled, then separate thread is started, in which two
/// document is compared, and then signal \p comparationFinished,
/// otherwise this function is blocking until comparation process
/// is finished.
void start();
/// Stops comparator engine. Result data are cleared.
void stop();
/// Returns result of a comparation process
const PDFDiffResult& getResult() const { return m_result; }
PDFDocumentTextFlowFactory::Algorithm getTextAnalysisAlgorithm() const;
void setTextAnalysisAlgorithm(PDFDocumentTextFlowFactory::Algorithm textAnalysisAlgorithm);
signals:
void comparationFinished();
private:
enum Steps
{
StepExtractContentLeftDocument,
StepExtractContentRightDocument,
StepMatchPages,
StepExtractTextLeftDocument,
StepExtractTextRightDocument,
StepCompare,
StepLast
};
PDFDiffResult perform();
void stepProgress();
void performSteps(const std::vector<PDFInteger>& leftPages,
const std::vector<PDFInteger>& rightPages,
PDFDiffResult& result);
void performPageMatching(const std::vector<PDFDiffPageContext>& leftPreparedPages,
const std::vector<PDFDiffPageContext>& rightPreparedPages,
PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence,
std::map<size_t, size_t>& pageMatches);
void performCompare(const std::vector<PDFDiffPageContext>& leftPreparedPages,
const std::vector<PDFDiffPageContext>& rightPreparedPages,
PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence,
const std::map<size_t, size_t>& pageMatches,
PDFDiffResult& result);
void finalizeGraphicsPieces(PDFDiffPageContext& context);
void onComparationPerformed();
/// Calculates real epsilon for a page. Epsilon is used in page
/// comparation process, where points closer that epsilon
/// are recognized as equal.
/// \param page Page
PDFReal calculateEpsilonForPage(const PDFPage* page) const;
PDFProgress* m_progress;
const PDFDocument* m_leftDocument;
const PDFDocument* m_rightDocument;
PDFClosedIntervalSet m_pagesForLeftDocument;
PDFClosedIntervalSet m_pagesForRightDocument;
Options m_options;
PDFReal m_epsilon;
std::atomic_bool m_cancelled;
PDFDiffResult m_result;
PDFDocumentTextFlowFactory::Algorithm m_textAnalysisAlgorithm;
QFuture<PDFDiffResult> m_future;
std::optional<QFutureWatcher<PDFDiffResult>> m_futureWatcher;
};
} // namespace pdf
#endif // PDFDIFF_H

View File

@@ -99,6 +99,31 @@ PDFOperationResult PDFDocumentManipulator::assemble(const AssembledPages& pages)
return true;
}
PDFDocumentManipulator::AssembledPages PDFDocumentManipulator::createAllDocumentPages(int documentIndex, const PDFDocument* document)
{
AssembledPages assembledPages;
size_t pageCount = document->getCatalog()->getPageCount();
for (size_t i = 0; i < pageCount; ++i)
{
pdf::PDFDocumentManipulator::AssembledPage assembledPage;
assembledPage.documentIndex = documentIndex;
assembledPage.imageIndex = -1;
assembledPage.pageIndex = i;
const pdf::PDFPage* page = document->getCatalog()->getPage(i);
const pdf::PageRotation originalPageRotation = page->getPageRotation();
assembledPage.pageRotation = originalPageRotation;
assembledPage.pageSize = page->getMediaBox().size();
assembledPages.emplace_back(assembledPage);
}
return assembledPages;
}
PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::processPages(PDFDocumentBuilder& documentBuilder, const AssembledPages& pages)
{
ProcessedPages processedPages;

View File

@@ -91,6 +91,8 @@ public:
/// \returns Assembled document
PDFDocument&& takeAssembledDocument() { return std::move(m_assembledDocument); }
static AssembledPages createAllDocumentPages(int documentIndex, const PDFDocument* document);
static constexpr AssembledPage createDocumentPage(int documentIndex, int pageIndex, QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ documentIndex, -1, pageIndex, pageSize, pageRotation}; }
static constexpr AssembledPage createImagePage(int imageIndex, QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, imageIndex, -1, pageSize, pageRotation}; }
static constexpr AssembledPage createBlankPage(QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, -1, -1, pageSize, pageRotation}; }

View File

@@ -89,21 +89,22 @@ struct PDFStructureTreeTextItem
};
PDFStructureTreeTextItem() = default;
PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect) :
type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect)
PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect, std::vector<QRectF> characterBoundingRects) :
type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect), characterBoundingRects(std::move(characterBoundingRects))
{
}
static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect); }
static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF()); }
static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF()); }
static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect, std::vector<QRectF> characterBoundingRects) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect, std::move(characterBoundingRects)); }
static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF(), { }); }
static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF(), { }); }
Type type = Type::Text;
const PDFStructureItem* item = nullptr;
QString text;
PDFInteger pageIndex = -1;
QRectF boundingRect;
std::vector<QRectF> characterBoundingRects;
};
using PDFStructureTreeTextSequence = std::vector<PDFStructureTreeTextItem>;
@@ -147,6 +148,7 @@ public:
QRectF boundingRect;
PDFInteger pageIndex = -1;
QString text;
std::vector<QRectF> characterBoundingRects;
};
using TextItems = std::vector<TextItem>;
@@ -204,7 +206,6 @@ protected:
virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override;
virtual void performMarkedContentBegin(const QByteArray& tag, const PDFObject& properties) override;
virtual void performMarkedContentEnd() override;
virtual void performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule) override;
private:
const PDFStructureItem* getStructureTreeItemFromMCID(PDFInteger mcid) const;
@@ -232,33 +233,25 @@ private:
QStringList m_unmatchedText;
PDFStructureTreeTextExtractor::Options m_extractorOptions;
PDFInteger m_pageIndex;
std::vector<QRectF> m_characterBoundingRects;
};
void PDFStructureTreeTextContentProcessor::performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule)
{
if (!text)
{
// Jakub Melka: This should not occur
return;
}
if (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes))
{
return;
}
Q_UNUSED(stroke);
Q_UNUSED(fill);
Q_UNUSED(fillRule);
QMatrix matrix = getCurrentWorldMatrix();
QPainterPath worldPath = matrix.map(path);
m_currentBoundingBox = m_currentBoundingBox.united(worldPath.controlPointRect());
}
void PDFStructureTreeTextContentProcessor::finishText()
{
m_currentText = m_currentText.trimmed();
QString trimmedText = m_currentText.trimmed();
const int index = m_currentText.indexOf(trimmedText);
Q_ASSERT(index != -1);
if (trimmedText.size() < m_currentText.size())
{
// Fix character bounding boxes...
if (m_characterBoundingRects.size() == m_currentText.size())
{
std::vector<QRectF> boundingRects(std::next(m_characterBoundingRects.cbegin(), index), std::next(m_characterBoundingRects.cbegin(), index + trimmedText.length()));
m_characterBoundingRects = std::move(boundingRects);
}
m_currentText = std::move(trimmedText);
}
if (!m_currentText.isEmpty() && (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::SkipArtifact) || !isArtifact()))
{
if (m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::AdjustReversedText) && isReversedText())
@@ -270,11 +263,14 @@ void PDFStructureTreeTextContentProcessor::finishText()
reversed.push_back(*it);
}
m_currentText = qMove(reversed);
std::reverse(m_characterBoundingRects.begin(), m_characterBoundingRects.end());
}
m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText), m_pageIndex, m_currentBoundingBox));
Q_ASSERT(m_currentText.size() == m_characterBoundingRects.size() || m_characterBoundingRects.empty());
m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(std::move(m_currentText), m_pageIndex, m_currentBoundingBox, std::move(m_characterBoundingRects)));
}
m_currentText = QString();
m_currentBoundingBox = QRectF();
m_characterBoundingRects.clear();
}
bool PDFStructureTreeTextContentProcessor::isArtifact() const
@@ -346,6 +342,7 @@ void PDFStructureTreeTextContentProcessor::performMarkedContentEnd()
m_unmatchedText << qMove(m_currentText);
}
m_currentBoundingBox = QRectF();
m_characterBoundingRects.clear();
}
}
@@ -374,8 +371,6 @@ bool PDFStructureTreeTextContentProcessor::isContentKindSuppressed(ContentKind k
switch (kind)
{
case ContentKind::Text:
return !m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes);
case ContentKind::Shapes:
case ContentKind::Images:
case ContentKind::Shading:
@@ -401,6 +396,18 @@ void PDFStructureTreeTextContentProcessor::performOutputCharacter(const PDFTextC
if (!info.character.isNull() && info.character != QChar(QChar::SoftHyphen))
{
m_currentText.push_back(info.character);
QPainterPath worldPath = info.matrix.map(info.outline);
if (!worldPath.isEmpty())
{
QRectF boundingRect = worldPath.controlPointRect();
m_currentBoundingBox = m_currentBoundingBox.united(boundingRect);
m_characterBoundingRects.push_back(boundingRect);
}
else
{
m_characterBoundingRects.push_back(QRectF());
}
}
}
}
@@ -464,17 +471,26 @@ void PDFStructureTreeTextExtractor::perform(const std::vector<PDFInteger>& pageI
switch (sequenceItem.type)
{
case PDFStructureTreeTextItem::Type::StartTag:
{
stack.push(sequenceItem.item);
break;
}
case PDFStructureTreeTextItem::Type::EndTag:
{
stack.pop();
break;
}
case PDFStructureTreeTextItem::Type::Text:
{
if (!stack.empty())
{
m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text });
m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text, sequenceItem.characterBoundingRects });
}
break;
}
default:
break;
}
}
}
@@ -598,7 +614,7 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure
for (const auto& textItem : m_extractor->getText(structureElement))
{
markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text });
m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text, textItem.characterBoundingRects });
}
acceptChildren(structureElement);
@@ -688,7 +704,7 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart });
for (const PDFTextFlow& textFlow : textFlows)
{
flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text });
flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text, textFlow.getBoundingBoxes() });
}
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
@@ -748,7 +764,7 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
{
if (sequenceItem.type == PDFStructureTreeTextItem::Type::Text)
{
flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text });
flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text, sequenceItem.characterBoundingRects });
}
}
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
@@ -1040,4 +1056,36 @@ void PDFDocumentTextFlowEditor::updateModifiedFlag(size_t index)
item->editedItemFlags.setFlag(Modified, isModified);
}
std::map<PDFInteger, PDFDocumentTextFlow> PDFDocumentTextFlow::split(Flags mask) const
{
std::map<PDFInteger, PDFDocumentTextFlow> result;
for (const Item& item : m_items)
{
if (item.flags & mask)
{
result[item.pageIndex].addItem(item);
}
}
return result;
}
void PDFDocumentTextFlow::append(const PDFDocumentTextFlow& textFlow)
{
m_items.insert(m_items.end(), textFlow.m_items.cbegin(), textFlow.m_items.cend());
}
QString PDFDocumentTextFlow::getText() const
{
QStringList texts;
for (const auto& item : m_items)
{
texts << item.text.trimmed();
}
return texts.join(" ");
}
} // namespace pdf

View File

@@ -56,6 +56,7 @@ public:
PDFInteger pageIndex = 0;
QString text;
Flags flags = None;
std::vector<QRectF> characterBoundingRects;
bool isText() const { return flags.testFlag(Text); }
bool isSpecial() const { return !isText(); }
@@ -71,6 +72,9 @@ public:
}
/// Add text item
void addItem(Item item) { m_items.emplace_back(std::move(item)); }
const Items& getItems() const { return m_items; }
/// Returns item at a given index
@@ -83,6 +87,18 @@ public:
/// Returns true, if text flow is empty
bool isEmpty() const { return m_items.empty(); }
/// Split text flow to pages using given mask. Items, which
/// are masked out, are not added.
/// \param mask Mask
std::map<PDFInteger, PDFDocumentTextFlow> split(Flags mask) const;
/// Appends document text flow to this one
/// \param textFlow Text flow
void append(const PDFDocumentTextFlow& textFlow);
/// Returns text concantecated from all items
QString getText() const;
private:
Items m_items;
};

View File

@@ -154,6 +154,20 @@ void PDFDrawSpaceController::setPageRotation(PageRotation pageRotation)
}
}
void PDFDrawSpaceController::setCustomLayout(LayoutItems customLayoutItems)
{
if (m_customLayoutItems != customLayoutItems)
{
m_customLayoutItems = std::move(customLayoutItems);
if (m_pageLayoutMode == PageLayout::Custom)
{
// Recalculate only, if custom layout is active
recalculate();
}
}
}
void PDFDrawSpaceController::recalculate()
{
if (!m_document)
@@ -181,7 +195,7 @@ void PDFDrawSpaceController::recalculate()
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(leftIndex)->getRotatedMediaBoxMM(), m_pageRotation).size();
PDFReal xPos = -pageSize.width() - m_horizontalSpacingMM * 0.5;
QRectF rect(xPos, yPos, pageSize.width(), pageSize.height());
m_layoutItems.emplace_back(blockIndex, leftIndex, rect);
m_layoutItems.emplace_back(blockIndex, leftIndex, -1, rect);
yPosAdvance = qMax(yPosAdvance, pageSize.height());
boundingRect = boundingRect.united(rect);
}
@@ -191,7 +205,7 @@ void PDFDrawSpaceController::recalculate()
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(rightIndex)->getRotatedMediaBoxMM(), m_pageRotation).size();
PDFReal xPos = m_horizontalSpacingMM * 0.5;
QRectF rect(xPos, yPos, pageSize.width(), pageSize.height());
m_layoutItems.emplace_back(blockIndex, rightIndex, rect);
m_layoutItems.emplace_back(blockIndex, rightIndex, -1, rect);
yPosAdvance = qMax(yPosAdvance, pageSize.height());
boundingRect = boundingRect.united(rect);
}
@@ -253,7 +267,7 @@ void PDFDrawSpaceController::recalculate()
{
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(i)->getRotatedMediaBoxMM(), m_pageRotation).size();
QRectF rect(-pageSize.width() * 0.5, -pageSize.height() * 0.5, pageSize.width(), pageSize.height());
m_layoutItems.emplace_back(i, i, rect);
m_layoutItems.emplace_back(i, i, -1, rect);
m_blockItems.emplace_back(rect);
}
@@ -274,7 +288,7 @@ void PDFDrawSpaceController::recalculate()
// Top of current page is at yPos.
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(i)->getRotatedMediaBoxMM(), m_pageRotation).size();
QRectF rect(-pageSize.width() * 0.5, yPos, pageSize.width(), pageSize.height());
m_layoutItems.emplace_back(0, i, rect);
m_layoutItems.emplace_back(0, i, -1, rect);
yPos += pageSize.height() + m_verticalSpacingMM;
boundingRectangle = boundingRectangle.united(rect);
}
@@ -361,6 +375,54 @@ void PDFDrawSpaceController::recalculate()
break;
}
case PageLayout::Custom:
{
m_layoutItems = m_customLayoutItems;
// We do not support page rotation for custom layout
Q_ASSERT(m_pageRotation == PageRotation::None);
// Assure, that layout items are sorted by block and page group
auto comparator = [](const LayoutItem& l, const LayoutItem& r)
{
return std::tie(l.blockIndex, l.groupIndex) < std::tie(r.blockIndex, r.groupIndex);
};
std::stable_sort(m_layoutItems.begin(), m_layoutItems.end(), comparator);
// Now, compute blocks
if (!m_layoutItems.empty())
{
m_blockItems.reserve(m_layoutItems.back().blockIndex + 1);
QRectF currentBoundingRect;
PDFInteger blockIndex = -1;
for (const LayoutItem& layoutItem : m_layoutItems)
{
if (blockIndex != layoutItem.blockIndex)
{
blockIndex = layoutItem.blockIndex;
if (currentBoundingRect.isValid())
{
m_blockItems.push_back(LayoutBlock(currentBoundingRect));
currentBoundingRect = QRectF();
}
}
currentBoundingRect = currentBoundingRect.united(layoutItem.pageRectMM);
}
if (currentBoundingRect.isValid())
{
m_blockItems.push_back(LayoutBlock(currentBoundingRect));
currentBoundingRect = QRectF();
}
}
break;
}
default:
{
Q_ASSERT(false);
@@ -504,7 +566,7 @@ void PDFDrawWidgetProxy::update()
m_layout.items.reserve(items.size());
for (const PDFDrawSpaceController::LayoutItem& item : items)
{
m_layout.items.emplace_back(item.pageIndex, fromDeviceSpace(item.pageRectMM).toRect());
m_layout.items.emplace_back(item.pageIndex, item.groupIndex, fromDeviceSpace(item.pageRectMM).toRect());
}
m_layout.blockRect = fromDeviceSpace(rectangle).toRect();
@@ -700,8 +762,13 @@ void PDFDrawWidgetProxy::drawPages(QPainter* painter, QRect rect, PDFRenderer::F
QRect placedRect = item.pageRect.translated(m_horizontalOffset - m_layout.blockRect.left(), m_verticalOffset - m_layout.blockRect.top());
if (placedRect.intersects(rect))
{
GroupInfo groupInfo = getGroupInfo(item.groupIndex);
// Clear the page space by paper color
painter->fillRect(placedRect, paperColor);
if (groupInfo.drawPaper)
{
painter->fillRect(placedRect, paperColor);
}
const PDFPrecompiledPage* compiledPage = m_compiler->getCompiledPage(item.pageIndex, true);
if (compiledPage && compiledPage->isValid())
@@ -711,7 +778,7 @@ void PDFDrawWidgetProxy::drawPages(QPainter* painter, QRect rect, PDFRenderer::F
const PDFPage* page = m_controller->getDocument()->getCatalog()->getPage(item.pageIndex);
QMatrix matrix = createPagePointToDevicePointMatrix(page, placedRect) * baseMatrix;
compiledPage->draw(painter, page->getCropBox(), matrix, features);
compiledPage->draw(painter, page->getCropBox(), matrix, features, groupInfo.transparency);
PDFTextLayoutGetter layoutGetter = m_textLayoutCompiler->getTextLayoutLazy(item.pageIndex);
// Draw text blocks/text lines, if it is enabled
@@ -939,6 +1006,22 @@ PDFWidgetSnapshot PDFDrawWidgetProxy::getSnapshot() const
return snapshot;
}
void PDFDrawWidgetProxy::setGroupTransparency(PDFInteger groupIndex, bool drawPaper, PDFReal transparency)
{
GroupInfo groupInfo;
groupInfo.drawPaper = drawPaper;
groupInfo.transparency = transparency;
if (groupInfo == GroupInfo())
{
m_groupInfos.erase(groupIndex);
}
else
{
m_groupInfos[groupIndex] = std::move(groupInfo);
}
}
QRect PDFDrawWidgetProxy::getPagesIntersectingRectBoundingBox(QRect rect) const
{
QRect resultRect;
@@ -1156,6 +1239,15 @@ void PDFDrawWidgetProxy::setPageLayout(PageLayout pageLayout)
}
}
void PDFDrawWidgetProxy::setCustomPageLayout(PDFDrawSpaceController::LayoutItems layoutItems)
{
if (m_controller->getCustomLayout() != layoutItems)
{
m_controller->setCustomLayout(std::move(layoutItems));
emit pageLayoutChanged();
}
}
QRectF PDFDrawWidgetProxy::fromDeviceSpace(const QRectF& rect) const
{
Q_ASSERT(rect.isValid());
@@ -1185,6 +1277,9 @@ bool PDFDrawWidgetProxy::isBlockMode() const
case PageLayout::TwoPagesLeft:
case PageLayout::TwoPagesRight:
return true;
case PageLayout::Custom:
return m_controller->getBlockCount() > 1;
}
Q_ASSERT(false);
@@ -1217,6 +1312,10 @@ void PDFDrawWidgetProxy::prefetchPages(PDFInteger pageIndex)
prefetchCount = 2;
break;
case PageLayout::Custom:
prefetchCount = 0;
break;
default:
Q_ASSERT(false);
break;
@@ -1307,6 +1406,17 @@ void PDFDrawWidgetProxy::updateVerticalScrollbarFromOffset()
}
}
PDFDrawWidgetProxy::GroupInfo PDFDrawWidgetProxy::getGroupInfo(int groupIndex) const
{
auto it = m_groupInfos.find(groupIndex);
if (it != m_groupInfos.cend())
{
return it->second;
}
return GroupInfo();
}
PDFWidgetAnnotationManager* PDFDrawWidgetProxy::getAnnotationManager() const
{
return m_widget->getAnnotationManager();

View File

@@ -77,14 +77,17 @@ public:
/// page and page rectangle, in which the page is contained.
struct LayoutItem
{
constexpr inline explicit LayoutItem() : blockIndex(-1), pageIndex(-1) { }
constexpr inline explicit LayoutItem(PDFInteger blockIndex, PDFInteger pageIndex, const QRectF& pageRectMM) :
blockIndex(blockIndex), pageIndex(pageIndex), pageRectMM(pageRectMM) { }
constexpr inline explicit LayoutItem() : blockIndex(-1), pageIndex(-1), groupIndex(-1) { }
constexpr inline explicit LayoutItem(PDFInteger blockIndex, PDFInteger pageIndex, PDFInteger groupIndex, const QRectF& pageRectMM) :
blockIndex(blockIndex), pageIndex(pageIndex), groupIndex(groupIndex), pageRectMM(pageRectMM) { }
bool operator ==(const LayoutItem&) const = default;
bool isValid() const { return pageIndex != -1; }
PDFInteger blockIndex;
PDFInteger pageIndex;
PDFInteger groupIndex; ///< Page group index
QRectF pageRectMM;
};
@@ -123,6 +126,15 @@ public:
/// Sets page rotation
void setPageRotation(PageRotation pageRotation);
/// Set custom layout. Custom layout provides a way how to define
/// custom page layout, including blocks. Block indices must be properly defined,
/// that means block index must start by zero and must be continuous. If this
/// criteria are not fulfilled, behaviour is undefined.
void setCustomLayout(LayoutItems customLayoutItems);
/// Returns custom layout
const LayoutItems& getCustomLayout() const { return m_customLayoutItems; }
signals:
void drawSpaceChanged();
void repaintNeeded();
@@ -155,6 +167,7 @@ private:
PDFReal m_verticalSpacingMM;
PDFReal m_horizontalSpacingMM;
PageRotation m_pageRotation;
LayoutItems m_customLayoutItems;
/// Font cache
PDFFontCache m_fontCache;
@@ -282,6 +295,11 @@ public:
/// \param pageLayout Page layout
void setPageLayout(PageLayout pageLayout);
/// Sets custom page layout. If this function is used, page layout mode
/// must be set to 'Custom'.
/// \param layoutItems Layout items
void setCustomPageLayout(PDFDrawSpaceController::LayoutItems layoutItems);
/// Returns the page layout
PageLayout getPageLayout() const { return m_controller->getPageLayout(); }
@@ -354,13 +372,20 @@ public:
/// Returns snapshot of current view area
PDFWidgetSnapshot getSnapshot() const;
/// Sets page group transparency settings. All pages with a given group index
/// will be displayed with this transparency settings.
/// \param groupIndex Group index
/// \param drawPaper Draw background paper
/// \param transparency Page graphics transparency
void setGroupTransparency(PDFInteger groupIndex, bool drawPaper = true, PDFReal transparency = 1.0);
PDFWidgetAnnotationManager* getAnnotationManager() const;
signals:
void drawSpaceChanged();
void pageLayoutChanged();
void renderingError(PDFInteger pageIndex, const QList<PDFRenderError>& errors);
void renderingError(pdf::PDFInteger pageIndex, const QList<pdf::PDFRenderError>& errors);
void repaintNeeded();
void pageImageChanged(bool all, const std::vector<PDFInteger>& pages);
void textLayoutChanged();
@@ -368,12 +393,13 @@ signals:
private:
struct LayoutItem
{
constexpr inline explicit LayoutItem() : pageIndex(-1) { }
constexpr inline explicit LayoutItem(PDFInteger pageIndex, const QRect& pageRect) :
pageIndex(pageIndex), pageRect(pageRect) { }
constexpr inline explicit LayoutItem() : pageIndex(-1), groupIndex(-1) { }
constexpr inline explicit LayoutItem(PDFInteger pageIndex, PDFInteger groupIndex, const QRect& pageRect) :
pageIndex(pageIndex), groupIndex(groupIndex), pageRect(pageRect) { }
PDFInteger pageIndex;
PDFInteger groupIndex; ///< Used to create group of pages (for transparency and overlay)
QRect pageRect;
};
@@ -389,6 +415,14 @@ private:
QRect blockRect;
};
struct GroupInfo
{
bool operator==(const GroupInfo&) const = default;
bool drawPaper = true;
PDFReal transparency = 1.0;
};
static constexpr size_t INVALID_BLOCK_INDEX = std::numeric_limits<size_t>::max();
// Minimal/maximal zoom is from 8% to 6400 %, according to the PDF 1.7 Reference,
@@ -413,6 +447,8 @@ private:
void updateHorizontalScrollbarFromOffset();
void updateVerticalScrollbarFromOffset();
GroupInfo getGroupInfo(int groupIndex) const;
template<typename T>
struct Range
{
@@ -501,6 +537,11 @@ private:
/// Surface format for OpenGL
QSurfaceFormat m_surfaceFormat;
/// Page group info for rendering. Group of pages
/// can be rendered with transparency or without paper
/// as overlay.
std::map<PDFInteger, GroupInfo> m_groupInfos;
};
} // namespace pdf

View File

@@ -103,7 +103,7 @@ public:
void addInputInterface(IDrawWidgetInputInterface* inputInterface);
signals:
void pageRenderingErrorsChanged(PDFInteger pageIndex, int errorsCount);
void pageRenderingErrorsChanged(pdf::PDFInteger pageIndex, int errorsCount);
private:
void updateRendererImpl();

View File

@@ -107,7 +107,7 @@ public:
// into buckets of appropriate size.
if (scope != Scope::Page)
{
const int buckets = 32 * QThread::idealThreadCount();
const int buckets = 8 * QThread::idealThreadCount();
bucketSize = qMax(1, count / buckets);
}

View File

@@ -20,6 +20,7 @@
#include "pdfcms.h"
#include <QPainter>
#include <QCryptographicHash>
namespace pdf
{
@@ -500,13 +501,18 @@ void PDFPrecompiledPageGenerator::setCompositionMode(QPainter::CompositionMode m
m_precompiledPage->addSetCompositionMode(mode);
}
void PDFPrecompiledPage::draw(QPainter* painter, const QRectF& cropBox, const QMatrix& pagePointToDevicePointMatrix, PDFRenderer::Features features) const
void PDFPrecompiledPage::draw(QPainter* painter,
const QRectF& cropBox,
const QMatrix& pagePointToDevicePointMatrix,
PDFRenderer::Features features,
PDFReal opacity) const
{
Q_ASSERT(painter);
Q_ASSERT(pagePointToDevicePointMatrix.isInvertible());
painter->save();
painter->setWorldMatrix(QMatrix());
painter->setOpacity(opacity);
if (features.testFlag(PDFRenderer::ClipToCropBox))
{
@@ -831,4 +837,224 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError>
}
}
PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(QRectF mediaBox,
PDFReal epsilon) const
{
GraphicPieceInfos infos;
struct State
{
QMatrix matrix;
};
std::stack<State> stateStack;
stateStack.emplace();
// Check, if epsilon is not too small
if (qFuzzyIsNull(epsilon))
{
epsilon = 0.000001;
}
PDFReal factor = 1.0 / epsilon;
QImage shadingTestImage;
// Process all instructions
for (const Instruction& instruction : m_instructions)
{
switch (instruction.type)
{
case InstructionType::DrawPath:
{
const PathPaintData& data = m_paths[instruction.dataIndex];
GraphicPieceInfo info;
QByteArray serializedPath;
// Serialize data
if (true)
{
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
stream << data.isText;
stream << data.pen;
stream << data.brush;
// Translate map to page coordinates
QPainterPath pagePath = stateStack.top().matrix.map(data.path);
info.type = data.isText ? GraphicPieceInfo::Type::Text : GraphicPieceInfo::Type::VectorGraphics;
info.boundingRect = pagePath.controlPointRect();
info.pagePath = pagePath;
const int elementCount = pagePath.elementCount();
for (int i = 0; i < elementCount; ++i)
{
QPainterPath::Element element = pagePath.elementAt(i);
PDFReal roundedX = qFloor(element.x * factor);
PDFReal roundedY = qFloor(element.y * factor);
stream << roundedX;
stream << roundedY;
stream << element.type;
}
}
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
size_t size = qMin<size_t>(hash.length(), info.hash.size());
std::copy(hash.data(), hash.data() + size, info.hash.data());
infos.emplace_back(std::move(info));
break;
}
case InstructionType::DrawImage:
{
const ImageData& data = m_images[instruction.dataIndex];
const QImage& image = data.image;
GraphicPieceInfo info;
QByteArray serializedPath;
QByteArray serializedImage;
// Serialize data
if (true)
{
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
QDataStream streamImage(&serializedImage, QIODevice::WriteOnly);
// Jakub Melka: serialize image position
QMatrix worldMatrix = stateStack.top().matrix;
QPainterPath pagePath;
pagePath.addRect(0, 0, 1, 1);
pagePath = worldMatrix.map(pagePath);
info.type = GraphicPieceInfo::Type::Image;
info.boundingRect = pagePath.controlPointRect();
info.pagePath = pagePath;
const int elementCount = pagePath.elementCount();
for (int i = 0; i < elementCount; ++i)
{
QPainterPath::Element element = pagePath.elementAt(i);
PDFReal roundedX = qRound(element.x * factor);
PDFReal roundedY = qRound(element.y * factor);
stream << roundedX;
stream << roundedY;
stream << element.type;
}
// serialize image data
stream.writeBytes(reinterpret_cast<const char*>(image.bits()), image.sizeInBytes());
streamImage.writeBytes(reinterpret_cast<const char*>(image.bits()), image.sizeInBytes());
}
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
QByteArray imageHash = QCryptographicHash::hash(serializedImage, QCryptographicHash::Sha512);
size_t size = qMin<size_t>(hash.length(), info.hash.size());
std::copy(hash.data(), hash.data() + size, info.hash.data());
size_t sizeImage = qMin<size_t>(imageHash.length(), info.imageHash.size());
std::copy(imageHash.data(), imageHash.data() + sizeImage, info.imageHash.data());
infos.emplace_back(std::move(info));
break;
}
case InstructionType::DrawMesh:
{
const MeshPaintData& data = m_meshes[instruction.dataIndex];
if (shadingTestImage.isNull())
{
QSizeF mediaBoxSize = mediaBox.size();
mediaBoxSize = mediaBoxSize.scaled(256, 256, Qt::KeepAspectRatio);
QSize imageSize = mediaBoxSize.toSize();
shadingTestImage = QImage(imageSize, QImage::Format_ARGB32);
}
shadingTestImage.fill(Qt::transparent);
QMatrix pagePointToDevicePointMatrix;
pagePointToDevicePointMatrix.scale(shadingTestImage.width() / mediaBox.width(), -shadingTestImage.height() / mediaBox.height());
{
QPainter painter(&shadingTestImage);
painter.setWorldMatrix(pagePointToDevicePointMatrix);
data.mesh.paint(&painter, data.alpha);
}
GraphicPieceInfo info;
QByteArray serializedMesh;
// Serialize data
if (true)
{
QDataStream stream(&serializedMesh, QIODevice::WriteOnly);
// serialize image data
stream.writeBytes(reinterpret_cast<const char*>(shadingTestImage.bits()), shadingTestImage.sizeInBytes());
}
QByteArray hash = QCryptographicHash::hash(serializedMesh, QCryptographicHash::Sha512);
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
size_t size = qMin<size_t>(hash.length(), info.hash.size());
std::copy(hash.data(), hash.data() + size, info.hash.data());
info.boundingRect = QRectF();
info.type = GraphicPieceInfo::Type::Shading;
infos.emplace_back(std::move(info));
break;
}
case InstructionType::Clip:
{
// Do nothing, we are just collecting information
break;
}
case InstructionType::SaveGraphicState:
{
stateStack.push(stateStack.top());
break;
}
case InstructionType::RestoreGraphicState:
{
stateStack.pop();
break;
}
case InstructionType::SetWorldMatrix:
{
stateStack.top().matrix = m_matrices[instruction.dataIndex];
break;
}
case InstructionType::SetCompositionMode:
{
// Do nothing, we are just collecting information
break;
}
default:
{
Q_ASSERT(false);
break;
}
}
}
return infos;
}
} // namespace pdf

View File

@@ -187,7 +187,12 @@ public:
/// \param cropBox Page's crop box
/// \param pagePointToDevicePointMatrix Page point to device point transformation matrix
/// \param features Renderer features
void draw(QPainter* painter, const QRectF& cropBox, const QMatrix& pagePointToDevicePointMatrix, PDFRenderer::Features features) const;
/// \param opacity Opacity of page graphics
void draw(QPainter* painter,
const QRectF& cropBox,
const QMatrix& pagePointToDevicePointMatrix,
PDFRenderer::Features features,
PDFReal opacity) const;
/// Redact path - remove all content intersecting given path,
/// and fill redact path with given color.
@@ -234,6 +239,45 @@ public:
PDFSnapInfo* getSnapInfo() { return &m_snapInfo; }
const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; }
struct GraphicPieceInfo
{
enum class Type
{
Unknown,
Text,
VectorGraphics,
Image,
Shading
};
bool operator<(const GraphicPieceInfo& other) const
{
return std::tie(type, hash) < std::tie(other.type, other.hash);
}
bool isText() const { return type == Type::Text; }
bool isVectorGraphics() const { return type == Type::VectorGraphics; }
bool isImage() const { return type == Type::Image; }
bool isShading() const { return type == Type::Shading; }
Type type = Type::Unknown;
QRectF boundingRect;
std::array<uint8_t, 64> hash = { }; ///< Hash of all data
std::array<uint8_t, 64> imageHash = { }; ///< Hash of the image only
QPainterPath pagePath;
};
using GraphicPieceInfos = std::vector<GraphicPieceInfo>;
/// Creates information about piece of graphic in this page,
/// for example, for comparation reasons. Parameter \p epsilon
/// is for numerical precision - values under epsilon are considered
/// as equal.
/// \param mediaBox Page's media box
/// \param epsilon Epsilon
GraphicPieceInfos calculateGraphicPieceInfos(QRectF mediaBox,
PDFReal epsilon) const;
private:
struct PathPaintData
{

View File

@@ -106,7 +106,7 @@ PDFDocument PDFRedact::perform(Options options)
QPainter* painter = contentStreamBuilder.begin(newPageReference);
compiledPage.redact(redactPath, matrix, m_redactFillColor);
compiledPage.draw(painter, QRectF(), matrix, PDFRenderer::None);
compiledPage.draw(painter, QRectF(), matrix, PDFRenderer::None, 1.0);
contentStreamBuilder.end(painter);
}

View File

@@ -244,7 +244,7 @@ QImage PDFRasterizer::render(PDFInteger pageIndex,
QOpenGLPaintDevice device(size);
QPainter painter(&device);
painter.fillRect(QRect(QPoint(0, 0), size), compiledPage->getPaperColor());
compiledPage->draw(&painter, page->getCropBox(), matrix, features);
compiledPage->draw(&painter, page->getCropBox(), matrix, features, 1.0);
if (annotationManager)
{
@@ -276,7 +276,7 @@ QImage PDFRasterizer::render(PDFInteger pageIndex,
image.fill(Qt::white);
QPainter painter(&image);
compiledPage->draw(&painter, page->getCropBox(), matrix, features);
compiledPage->draw(&painter, page->getCropBox(), matrix, features, 1.0);
if (annotationManager)
{

View File

@@ -1176,6 +1176,7 @@ void PDFTextFlow::merge(const PDFTextFlow& next)
m_text += next.m_text;
m_boundingBox = m_boundingBox.united(next.m_boundingBox);
m_characterPointers.insert(m_characterPointers.end(), next.m_characterPointers.cbegin(), next.m_characterPointers.cend());
m_characterBoundingBoxes.insert(m_characterBoundingBoxes.end(), next.m_characterBoundingBoxes.cbegin(), next.m_characterBoundingBoxes.cend());
}
PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags flags, PDFInteger pageIndex)
@@ -1222,6 +1223,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
{
currentFlow.m_text += QChar(' ');
currentFlow.m_characterPointers.emplace_back();
currentFlow.m_characterBoundingBoxes.emplace_back();
}
}
@@ -1233,6 +1235,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
pointer.lineIndex = textLineIndex;
pointer.characterIndex = i;
currentFlow.m_characterPointers.emplace_back(qMove(pointer));
currentFlow.m_characterBoundingBoxes.emplace_back(currentCharacter.boundingBox.controlPointRect());
}
// Remove soft hyphen, if it is enabled
@@ -1240,6 +1243,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
{
currentFlow.m_text.chop(1);
currentFlow.m_characterPointers.pop_back();
currentFlow.m_characterBoundingBoxes.pop_back();
if (!flags.testFlag(AddLineBreaks))
{
@@ -1252,6 +1256,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
// Add line break
currentFlow.m_text += lineBreak;
currentFlow.m_characterPointers.insert(currentFlow.m_characterPointers.end(), lineBreak.length(), PDFCharacterPointer());
currentFlow.m_characterBoundingBoxes.insert(currentFlow.m_characterBoundingBoxes.end(), lineBreak.length(), QRectF());
++textLineIndex;
}

View File

@@ -297,6 +297,9 @@ public:
/// Returns whole text for this text flow
QString getText() const { return m_text; }
/// Returns character bounding boxes
std::vector<QRectF> getBoundingBoxes() const { return m_characterBoundingBoxes; }
/// Returns text form character pointers
/// \param begin Begin character
/// \param end End character
@@ -330,6 +333,7 @@ private:
QString m_text;
QRectF m_boundingBox;
std::vector<PDFCharacterPointer> m_characterPointers;
std::vector<QRectF> m_characterBoundingBoxes;
};
/// Text layout of single page. Can handle various fonts, various angles of lines

View File

@@ -367,6 +367,15 @@ std::vector<PDFInteger> PDFClosedIntervalSet::unfold() const
return result;
}
void PDFClosedIntervalSet::translate(PDFInteger offset)
{
for (auto& interval : m_intervals)
{
interval.first += offset;
interval.second += offset;
}
}
PDFClosedIntervalSet PDFClosedIntervalSet::parse(PDFInteger first, PDFInteger last, const QString& text, QString* errorMessage)
{
PDFClosedIntervalSet result;

View File

@@ -694,6 +694,10 @@ public:
/// Returns true, if interval set is empty
bool isEmpty() const { return m_intervals.empty(); }
/// Translates interval set by a given offset
/// \param offset Offset
void translate(PDFInteger offset);
/// Parses text into closed interval set, text should be in form "1,3,4,7,-11,12-,52-53,-",
/// where 1,3,4,7 means single pages, -11 means range from \p first to 11, 12- means range
/// from 12 to \p last, and 52-53 means closed interval [52, 53]. If text is not in this form,