diff --git a/Pdf4QtLib/Pdf4QtLib.pro b/Pdf4QtLib/Pdf4QtLib.pro
index 7dffe03..72743f3 100644
--- a/Pdf4QtLib/Pdf4QtLib.pro
+++ b/Pdf4QtLib/Pdf4QtLib.pro
@@ -110,6 +110,7 @@ SOURCES += \
HEADERS += \
sources/pdfaction.h \
sources/pdfadvancedtools.h \
+ sources/pdfalgorithmlcs.h \
sources/pdfannotation.h \
sources/pdfblendfunction.h \
sources/pdfccittfaxdecoder.h \
diff --git a/Pdf4QtLib/sources/pdfalgorithmlcs.h b/Pdf4QtLib/sources/pdfalgorithmlcs.h
new file mode 100644
index 0000000..90fa190
--- /dev/null
+++ b/Pdf4QtLib/sources/pdfalgorithmlcs.h
@@ -0,0 +1,126 @@
+// Copyright (C) 2021 Jakub Melka
+//
+// This file is part of PDF4QT.
+//
+// PDF4QT is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// with the written consent of the copyright owner, any later version.
+//
+// PDF4QT is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with PDF4QT. If not, see .
+
+#ifndef PDFALGORITHMLCS_H
+#define PDFALGORITHMLCS_H
+
+#include "pdfglobal.h"
+
+namespace pdf
+{
+
+/// Algorithm for computing longest common subsequence, on two sequences
+/// of objects, which are implementing operator "==" (equal operator).
+/// Constructor takes bidirectional iterators to the sequence. So, iterators
+/// are requred to be bidirectional.
+template
+class PDFAlgorithmLongestCommonSubsequence
+{
+public:
+ PDFAlgorithmLongestCommonSubsequence(Iterator it1,
+ Iterator it1End,
+ Iterator it2,
+ Iterator it2End,
+ Comparator comparator);
+
+ void perform();
+
+private:
+ Iterator m_it1;
+ Iterator m_it1End;
+ Iterator m_it2;
+ Iterator m_it2End;
+
+ size_t m_size1;
+ size_t m_size2;
+ size_t m_matrixSize;
+
+ Comparator m_comparator;
+
+ std::vector m_backtrackData;
+};
+
+template
+PDFAlgorithmLongestCommonSubsequence::PDFAlgorithmLongestCommonSubsequence(Iterator it1,
+ Iterator it1End,
+ Iterator it2,
+ Iterator it2End,
+ Comparator comparator) :
+ m_it1(std::move(it1)),
+ m_it1End(std::move(it1End)),
+ m_it2(std::move(it2)),
+ m_it2End(std::move(it2End)),
+ m_size1(0),
+ m_size2(0),
+ m_matrixSize(0),
+ m_comparator(std::move(comparator))
+{
+ m_size1 = std::distance(m_it1, m_it1End) + 1;
+ m_size2 = std::distance(m_it2, m_it2End) + 1;
+ m_matrixSize = m_size1 * m_size2;
+}
+
+template
+void PDFAlgorithmLongestCommonSubsequence::perform()
+{
+ m_backtrackData.resize(m_matrixSize);
+
+ std::vector rowTop(m_size1, size_t());
+ std::vector rowBottom(m_size1, size_t());
+
+ // Jakub Melka: we will have columns consisting of it1...it1End
+ // and rows consisting of it2...it2End. We iterate trough rows,
+ // and for each row, we update longest common subsequence data.
+
+ auto it2 = m_it2;
+ for (size_t i2 = 1; i2 < m_size2; ++i2, ++it2)
+ {
+ auto it1 = m_it1;
+ for (size_t i1 = 1; i1 < m_size1; ++i1, ++it1)
+ {
+ if (m_comparator(*it1, *it2))
+ {
+ // We have match
+ rowBottom[i1] = rowTop[i1 - 1] + 1;
+ }
+ else
+ {
+ const size_t leftCellValue = rowBottom[i1 - 1];
+ const size_t upperCellValue = rowTop[i1];
+ bool isLeftBigger = leftCellValue > upperCellValue;
+
+ if (isLeftBigger)
+ {
+ rowBottom[i1] = leftCellValue;
+ m_backtrackData[i2 * m_size1 + i1] = true;
+ }
+ else
+ {
+ rowBottom[i1] = upperCellValue;
+ m_backtrackData[i2 * m_size1 + i1] = false;
+ }
+ }
+ }
+
+ // Bottom row will become top row
+ std::swap(rowTop, rowBottom);
+ }
+}
+
+} // namespace pdf
+
+#endif // PDFALGORITHMLCS_H
diff --git a/Pdf4QtLib/sources/pdfdiff.cpp b/Pdf4QtLib/sources/pdfdiff.cpp
index 98e9246..ad7266d 100644
--- a/Pdf4QtLib/sources/pdfdiff.cpp
+++ b/Pdf4QtLib/sources/pdfdiff.cpp
@@ -23,6 +23,7 @@
#include "pdfcms.h"
#include "pdfcompiler.h"
#include "pdfconstants.h"
+#include "pdfalgorithmlcs.h"
#include
@@ -34,7 +35,7 @@ PDFDiff::PDFDiff(QObject* parent) :
m_progress(nullptr),
m_leftDocument(nullptr),
m_rightDocument(nullptr),
- m_options(Asynchronous),
+ m_options(Asynchronous | PC_Text | PC_VectorGraphics | PC_Images),
m_epsilon(0.0001),
m_cancelled(false)
{
@@ -173,6 +174,7 @@ void PDFDiff::stepProgress()
struct PDFDiffPageContext
{
PDFInteger pageIndex = 0;
+ std::array pageHash = { };
PDFPrecompiledPage::GraphicPieceInfos graphicPieces;
};
@@ -208,8 +210,11 @@ void PDFDiff::performSteps(const std::vector& leftPages, const std::
PDFRenderer renderer(m_leftDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
renderer.compile(&compiledPage, context.pageIndex);
- PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
- context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
+ auto page = m_leftDocument->getCatalog()->getPage(context.pageIndex);
+ PDFReal epsilon = calculateEpsilonForPage(page);
+ context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon);
+
+ finalizeGraphicsPieces(context);
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftPreparedPages.begin(), leftPreparedPages.end(), fillPageContext);
stepProgress();
@@ -233,14 +238,33 @@ void PDFDiff::performSteps(const std::vector& leftPages, const std::
PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
renderer.compile(&compiledPage, context.pageIndex);
- PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
- context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
+ const PDFPage* page = m_leftDocument->getCatalog()->getPage(context.pageIndex);
+ PDFReal epsilon = calculateEpsilonForPage(page);
+ context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon);
+
+ finalizeGraphicsPieces(context);
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, rightPreparedPages.begin(), rightPreparedPages.end(), fillPageContext);
stepProgress();
}
+ // StepMatchPages
+ if (!m_cancelled)
+ {
+ // Match pages
+ auto comparePages = [](const PDFDiffPageContext& left, const PDFDiffPageContext& right)
+ {
+ return left.pageHash == right.pageHash;
+ };
+ PDFAlgorithmLongestCommonSubsequence algorithm(leftPreparedPages.cbegin(), leftPreparedPages.cend(),
+ rightPreparedPages.cbegin(), rightPreparedPages.cend(),
+ comparePages);
+ algorithm.perform();
+
+ stepProgress();
+ }
+
// StepExtractTextLeftDocument
if (!m_cancelled)
{
@@ -266,6 +290,43 @@ void PDFDiff::performSteps(const std::vector& leftPages, const std::
}
}
+void PDFDiff::finalizeGraphicsPieces(PDFDiffPageContext& context)
+{
+ std::sort(context.graphicPieces.begin(), context.graphicPieces.end());
+
+ // Compute page hash using active settings
+ QCryptographicHash hasher(QCryptographicHash::Sha512);
+ hasher.reset();
+
+ for (const PDFPrecompiledPage::GraphicPieceInfo& info : context.graphicPieces)
+ {
+ if (info.isText() && !m_options.testFlag(PC_Text))
+ {
+ continue;
+ }
+ if (info.isVectorGraphics() && !m_options.testFlag(PC_VectorGraphics))
+ {
+ continue;
+ }
+ if (info.isImage() && !m_options.testFlag(PC_Images))
+ {
+ continue;
+ }
+ if (info.isShading() && !m_options.testFlag(PC_Mesh))
+ {
+ continue;
+ }
+
+ hasher.addData(reinterpret_cast(info.hash.data()), int(info.hash.size()));
+ }
+
+ QByteArray hash = hasher.result();
+ Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
+
+ size_t size = qMin(hash.length(), context.pageHash.size());
+ std::copy(hash.data(), hash.data() + size, context.pageHash.data());
+}
+
void PDFDiff::onComparationPerformed()
{
m_cancelled = false;
diff --git a/Pdf4QtLib/sources/pdfdiff.h b/Pdf4QtLib/sources/pdfdiff.h
index ce3a046..9256c4d 100644
--- a/Pdf4QtLib/sources/pdfdiff.h
+++ b/Pdf4QtLib/sources/pdfdiff.h
@@ -31,6 +31,8 @@
namespace pdf
{
+struct PDFDiffPageContext;
+
class PDFDiffResult
{
public:
@@ -57,8 +59,12 @@ public:
enum Option
{
- None = 0x0000,
- Asynchronous = 0x0001, ///< Compare document asynchronously
+ None = 0x0000,
+ Asynchronous = 0x0001, ///< Compare document asynchronously
+ PC_Text = 0x0002, ///< Use text to compare pages (determine, which pages correspond to each other)
+ PC_VectorGraphics = 0x0004, ///< Use vector graphics to compare pages (determine, which pages correspond to each other)
+ PC_Images = 0x0008, ///< Use images to compare pages (determine, which pages correspond to each other)
+ PC_Mesh = 0x0010, ///< Use mesh to compare pages (determine, which pages correspond to each other)
};
Q_DECLARE_FLAGS(Options, Option)
@@ -109,6 +115,7 @@ private:
{
StepExtractContentLeftDocument,
StepExtractContentRightDocument,
+ StepMatchPages,
StepExtractTextLeftDocument,
StepExtractTextRightDocument,
StepCompare,
@@ -119,6 +126,7 @@ private:
void stepProgress();
void performSteps(const std::vector& leftPages,
const std::vector& rightPages);
+ void finalizeGraphicsPieces(PDFDiffPageContext& context);
void onComparationPerformed();
diff --git a/Pdf4QtLib/sources/pdfpainter.cpp b/Pdf4QtLib/sources/pdfpainter.cpp
index d1d8d7c..e7fea70 100644
--- a/Pdf4QtLib/sources/pdfpainter.cpp
+++ b/Pdf4QtLib/sources/pdfpainter.cpp
@@ -832,7 +832,8 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList
}
}
-PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(PDFReal epsilon) const
+PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(QRectF mediaBox,
+ PDFReal epsilon) const
{
GraphicPieceInfos infos;
@@ -850,6 +851,8 @@ PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceI
}
PDFReal factor = 1.0 / epsilon;
+ QImage shadingTestImage;
+
// Process all instructions
for (const Instruction& instruction : m_instructions)
{
@@ -903,33 +906,98 @@ PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceI
case InstructionType::DrawImage:
{
- /*const ImageData& data = m_images[instruction.dataIndex];
+ const ImageData& data = m_images[instruction.dataIndex];
const QImage& image = data.image;
- painter->save();
+ GraphicPieceInfo info;
+ QByteArray serializedPath;
- QMatrix imageTransform(1.0 / image.width(), 0, 0, 1.0 / image.height(), 0, 0);
- QMatrix worldMatrix = imageTransform * painter->worldMatrix();
+ // Serialize data
+ if (true)
+ {
+ QDataStream stream(&serializedPath, QIODevice::WriteOnly);
- // Jakub Melka: Because Qt uses opposite axis direction than PDF, then we must transform the y-axis
- // to the opposite (so the image is then unchanged)
- worldMatrix.translate(0, image.height());
- worldMatrix.scale(1, -1);
+ // Jakub Melka: serialize image position
+ QMatrix worldMatrix = stateStack.top().matrix;
- painter->setWorldMatrix(worldMatrix);
- painter->drawImage(0, 0, image);
- painter->restore();*/
+ QPainterPath pagePath;
+ pagePath.addRect(0, 0, 1, 1);
+ pagePath = worldMatrix.map(pagePath);
+
+ info.type = GraphicPieceInfo::Type::Image;
+ info.boundingRect = pagePath.controlPointRect();
+
+ const int elementCount = pagePath.elementCount();
+ for (int i = 0; i < elementCount; ++i)
+ {
+ QPainterPath::Element element = pagePath.elementAt(i);
+
+ PDFReal roundedX = qRound(element.x * factor);
+ PDFReal roundedY = qRound(element.y * factor);
+
+ stream << roundedX;
+ stream << roundedY;
+ stream << element.type;
+ }
+
+ // serialize image data
+ stream.writeBytes(reinterpret_cast(image.bits()), image.sizeInBytes());
+ }
+
+ QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
+ Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
+
+ size_t size = qMin(hash.length(), info.hash.size());
+ std::copy(hash.data(), hash.data() + size, info.hash.data());
+
+ infos.emplace_back(std::move(info));
break;
}
case InstructionType::DrawMesh:
{
- /*const MeshPaintData& data = m_meshes[instruction.dataIndex];
+ const MeshPaintData& data = m_meshes[instruction.dataIndex];
- painter->save();
- painter->setWorldMatrix(pagePointToDevicePointMatrix);
- data.mesh.paint(painter, data.alpha);
- painter->restore();*/
+ if (shadingTestImage.isNull())
+ {
+ QSizeF mediaBoxSize = mediaBox.size();
+ mediaBoxSize = mediaBoxSize.scaled(256, 256, Qt::KeepAspectRatio);
+ QSize imageSize = mediaBoxSize.toSize();
+ shadingTestImage = QImage(imageSize, QImage::Format_ARGB32);
+ }
+
+ shadingTestImage.fill(Qt::transparent);
+
+ QMatrix pagePointToDevicePointMatrix;
+ pagePointToDevicePointMatrix.scale(shadingTestImage.width() / mediaBox.width(), -shadingTestImage.height() / mediaBox.height());
+
+ {
+ QPainter painter(&shadingTestImage);
+ painter.setWorldMatrix(pagePointToDevicePointMatrix);
+ data.mesh.paint(&painter, data.alpha);
+ }
+
+ GraphicPieceInfo info;
+ QByteArray serializedMesh;
+
+ // Serialize data
+ if (true)
+ {
+ QDataStream stream(&serializedMesh, QIODevice::WriteOnly);
+
+ // serialize image data
+ stream.writeBytes(reinterpret_cast(shadingTestImage.bits()), shadingTestImage.sizeInBytes());
+ }
+
+ QByteArray hash = QCryptographicHash::hash(serializedMesh, QCryptographicHash::Sha512);
+ Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
+
+ size_t size = qMin(hash.length(), info.hash.size());
+ std::copy(hash.data(), hash.data() + size, info.hash.data());
+
+ info.boundingRect = QRectF();
+ info.type = GraphicPieceInfo::Type::Shading;
+ infos.emplace_back(std::move(info));
break;
}
diff --git a/Pdf4QtLib/sources/pdfpainter.h b/Pdf4QtLib/sources/pdfpainter.h
index ecf872a..26f85df 100644
--- a/Pdf4QtLib/sources/pdfpainter.h
+++ b/Pdf4QtLib/sources/pdfpainter.h
@@ -241,9 +241,20 @@ public:
Unknown,
Text,
VectorGraphics,
- Image
+ Image,
+ Shading
};
+ bool operator<(const GraphicPieceInfo& other) const
+ {
+ return std::tie(type, hash) < std::tie(other.type, other.hash);
+ }
+
+ bool isText() const { return type == Type::Text; }
+ bool isVectorGraphics() const { return type == Type::VectorGraphics; }
+ bool isImage() const { return type == Type::Image; }
+ bool isShading() const { return type == Type::Shading; }
+
Type type = Type::Unknown;
QRectF boundingRect;
std::array hash = { };
@@ -255,8 +266,10 @@ public:
/// for example, for comparation reasons. Parameter \p epsilon
/// is for numerical precision - values under epsilon are considered
/// as equal.
+ /// \param mediaBox Page's media box
/// \param epsilon Epsilon
- GraphicPieceInfos calculateGraphicPieceInfos(PDFReal epsilon) const;
+ GraphicPieceInfos calculateGraphicPieceInfos(QRectF mediaBox,
+ PDFReal epsilon) const;
private:
struct PathPaintData