DocDiff application: calculating page graphic piece info

This commit is contained in:
Jakub Melka 2021-09-08 20:33:32 +02:00
parent e354a03564
commit b2a9342047
5 changed files with 295 additions and 22 deletions

View File

@ -16,7 +16,13 @@
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>. // along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "pdfdiff.h" #include "pdfdiff.h"
#include "pdfrenderer.h"
#include "pdfdocumenttextflow.h" #include "pdfdocumenttextflow.h"
#include "pdfexecutionpolicy.h"
#include "pdffont.h"
#include "pdfcms.h"
#include "pdfcompiler.h"
#include "pdfconstants.h"
#include <QtConcurrent/QtConcurrent> #include <QtConcurrent/QtConcurrent>
@ -29,6 +35,7 @@ PDFDiff::PDFDiff(QObject* parent) :
m_leftDocument(nullptr), m_leftDocument(nullptr),
m_rightDocument(nullptr), m_rightDocument(nullptr),
m_options(Asynchronous), m_options(Asynchronous),
m_epsilon(0.0001),
m_cancelled(false) m_cancelled(false)
{ {
@ -141,30 +148,11 @@ PDFDiffResult PDFDiff::perform()
{ {
ProgressStartupInfo info; ProgressStartupInfo info;
info.showDialog = false; info.showDialog = false;
info.text = tr(""); info.text = tr("Comparing documents.");
m_progress->start(StepLast, std::move(info)); m_progress->start(StepLast, std::move(info));
} }
// StepExtractContentLeftDocument performSteps(leftPages, rightPages);
stepProgress();
// StepExtractContentRightDocument
stepProgress();
// StepExtractTextLeftDocument
pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow;
factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true);
PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
stepProgress();
// StepExtractTextRightDocument
pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow;
factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true);
PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
stepProgress();
// StepCompare
stepProgress();
if (m_progress) if (m_progress)
{ {
@ -182,6 +170,102 @@ void PDFDiff::stepProgress()
} }
} }
struct PDFDiffPageContext
{
PDFInteger pageIndex = 0;
PDFPrecompiledPage::GraphicPieceInfos graphicPieces;
};
void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::vector<PDFInteger>& rightPages)
{
std::vector<PDFDiffPageContext> leftPreparedPages;
std::vector<PDFDiffPageContext> rightPreparedPages;
auto createDiffPageContext = [](auto pageIndex)
{
PDFDiffPageContext context;
context.pageIndex = pageIndex;
return context;
};
std::transform(leftPages.cbegin(), leftPages.cend(), std::back_inserter(leftPreparedPages), createDiffPageContext);
std::transform(rightPages.cbegin(), rightPages.cend(), std::back_inserter(rightPreparedPages), createDiffPageContext);
// StepExtractContentLeftDocument
if (!m_cancelled)
{
PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT);
PDFOptionalContentActivity optionalContentActivity(m_leftDocument, pdf::OCUsage::View, nullptr);
fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_leftDocument), &optionalContentActivity));
PDFCMSManager cmsManager(nullptr);
cmsManager.setDocument(m_leftDocument);
PDFCMSPointer cms = cmsManager.getCurrentCMS();
auto fillPageContext = [&, this](PDFDiffPageContext& context)
{
PDFPrecompiledPage compiledPage;
constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent;
PDFRenderer renderer(m_leftDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
renderer.compile(&compiledPage, context.pageIndex);
PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftPreparedPages.begin(), leftPreparedPages.end(), fillPageContext);
stepProgress();
}
// StepExtractContentRightDocument
if (!m_cancelled)
{
PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT);
PDFOptionalContentActivity optionalContentActivity(m_rightDocument, pdf::OCUsage::View, nullptr);
fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_rightDocument), &optionalContentActivity));
PDFCMSManager cmsManager(nullptr);
cmsManager.setDocument(m_rightDocument);
PDFCMSPointer cms = cmsManager.getCurrentCMS();
auto fillPageContext = [&, this](PDFDiffPageContext& context)
{
PDFPrecompiledPage compiledPage;
constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent;
PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
renderer.compile(&compiledPage, context.pageIndex);
PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, rightPreparedPages.begin(), rightPreparedPages.end(), fillPageContext);
stepProgress();
}
// StepExtractTextLeftDocument
if (!m_cancelled)
{
pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow;
factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true);
PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
stepProgress();
}
// StepExtractTextRightDocument
if (!m_cancelled)
{
pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow;
factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true);
PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
stepProgress();
}
// StepCompare
if (!m_cancelled)
{
stepProgress();
}
}
void PDFDiff::onComparationPerformed() void PDFDiff::onComparationPerformed()
{ {
m_cancelled = false; m_cancelled = false;
@ -189,6 +273,19 @@ void PDFDiff::onComparationPerformed()
emit comparationFinished(); emit comparationFinished();
} }
PDFReal PDFDiff::calculateEpsilonForPage(const PDFPage* page) const
{
Q_ASSERT(page);
QRectF mediaBox = page->getMediaBox();
PDFReal width = mediaBox.width();
PDFReal height = mediaBox.height();
PDFReal factor = qMax(width, height);
return factor * m_epsilon;
}
PDFDiffResult::PDFDiffResult() : PDFDiffResult::PDFDiffResult() :
m_result(true) m_result(true)
{ {

View File

@ -117,15 +117,24 @@ private:
PDFDiffResult perform(); PDFDiffResult perform();
void stepProgress(); void stepProgress();
void performSteps(const std::vector<PDFInteger>& leftPages,
const std::vector<PDFInteger>& rightPages);
void onComparationPerformed(); void onComparationPerformed();
/// Calculates real epsilon for a page. Epsilon is used in page
/// comparation process, where points closer that epsilon
/// are recognized as equal.
/// \param page Page
PDFReal calculateEpsilonForPage(const PDFPage* page) const;
PDFProgress* m_progress; PDFProgress* m_progress;
const PDFDocument* m_leftDocument; const PDFDocument* m_leftDocument;
const PDFDocument* m_rightDocument; const PDFDocument* m_rightDocument;
PDFClosedIntervalSet m_pagesForLeftDocument; PDFClosedIntervalSet m_pagesForLeftDocument;
PDFClosedIntervalSet m_pagesForRightDocument; PDFClosedIntervalSet m_pagesForRightDocument;
Options m_options; Options m_options;
PDFReal m_epsilon;
std::atomic_bool m_cancelled; std::atomic_bool m_cancelled;
PDFDiffResult m_result; PDFDiffResult m_result;

View File

@ -103,7 +103,7 @@ public:
void addInputInterface(IDrawWidgetInputInterface* inputInterface); void addInputInterface(IDrawWidgetInputInterface* inputInterface);
signals: signals:
void pageRenderingErrorsChanged(PDFInteger pageIndex, int errorsCount); void pageRenderingErrorsChanged(pdf::PDFInteger pageIndex, int errorsCount);
private: private:
void updateRendererImpl(); void updateRendererImpl();

View File

@ -20,6 +20,7 @@
#include "pdfcms.h" #include "pdfcms.h"
#include <QPainter> #include <QPainter>
#include <QCryptographicHash>
namespace pdf namespace pdf
{ {
@ -831,4 +832,146 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError>
} }
} }
PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(PDFReal epsilon) const
{
GraphicPieceInfos infos;
struct State
{
QMatrix matrix;
};
std::stack<State> stateStack;
stateStack.emplace();
// Check, if epsilon is not too small
if (qFuzzyIsNull(epsilon))
{
epsilon = 0.000001;
}
PDFReal factor = 1.0 / epsilon;
// Process all instructions
for (const Instruction& instruction : m_instructions)
{
switch (instruction.type)
{
case InstructionType::DrawPath:
{
const PathPaintData& data = m_paths[instruction.dataIndex];
GraphicPieceInfo info;
QByteArray serializedPath;
// Serialize data
if (true)
{
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
stream << data.isText;
stream << data.pen;
stream << data.brush;
// Translate map to page coordinates
QPainterPath pagePath = stateStack.top().matrix.map(data.path);
info.type = data.isText ? GraphicPieceInfo::Type::Text : GraphicPieceInfo::Type::VectorGraphics;
info.boundingRect = pagePath.controlPointRect();
const int elementCount = pagePath.elementCount();
for (int i = 0; i < elementCount; ++i)
{
QPainterPath::Element element = pagePath.elementAt(i);
PDFReal roundedX = qRound(element.x * factor);
PDFReal roundedY = qRound(element.y * factor);
stream << roundedX;
stream << roundedY;
stream << element.type;
}
}
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
size_t size = qMin<size_t>(hash.length(), info.hash.size());
std::copy(hash.data(), hash.data() + size, info.hash.data());
infos.emplace_back(std::move(info));
break;
}
case InstructionType::DrawImage:
{
/*const ImageData& data = m_images[instruction.dataIndex];
const QImage& image = data.image;
painter->save();
QMatrix imageTransform(1.0 / image.width(), 0, 0, 1.0 / image.height(), 0, 0);
QMatrix worldMatrix = imageTransform * painter->worldMatrix();
// Jakub Melka: Because Qt uses opposite axis direction than PDF, then we must transform the y-axis
// to the opposite (so the image is then unchanged)
worldMatrix.translate(0, image.height());
worldMatrix.scale(1, -1);
painter->setWorldMatrix(worldMatrix);
painter->drawImage(0, 0, image);
painter->restore();*/
break;
}
case InstructionType::DrawMesh:
{
/*const MeshPaintData& data = m_meshes[instruction.dataIndex];
painter->save();
painter->setWorldMatrix(pagePointToDevicePointMatrix);
data.mesh.paint(painter, data.alpha);
painter->restore();*/
break;
}
case InstructionType::Clip:
{
// Do nothing, we are just collecting information
break;
}
case InstructionType::SaveGraphicState:
{
stateStack.push(stateStack.top());
break;
}
case InstructionType::RestoreGraphicState:
{
stateStack.pop();
break;
}
case InstructionType::SetWorldMatrix:
{
stateStack.top().matrix = m_matrices[instruction.dataIndex];
break;
}
case InstructionType::SetCompositionMode:
{
// Do nothing, we are just collecting information
break;
}
default:
{
Q_ASSERT(false);
break;
}
}
}
return infos;
}
} // namespace pdf } // namespace pdf

View File

@ -234,6 +234,30 @@ public:
PDFSnapInfo* getSnapInfo() { return &m_snapInfo; } PDFSnapInfo* getSnapInfo() { return &m_snapInfo; }
const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; } const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; }
struct GraphicPieceInfo
{
enum class Type
{
Unknown,
Text,
VectorGraphics,
Image
};
Type type = Type::Unknown;
QRectF boundingRect;
std::array<uint8_t, 64> hash = { };
};
using GraphicPieceInfos = std::vector<GraphicPieceInfo>;
/// Creates information about piece of graphic in this page,
/// for example, for comparation reasons. Parameter \p epsilon
/// is for numerical precision - values under epsilon are considered
/// as equal.
/// \param epsilon Epsilon
GraphicPieceInfos calculateGraphicPieceInfos(PDFReal epsilon) const;
private: private:
struct PathPaintData struct PathPaintData
{ {