mirror of https://github.com/JakubMelka/PDF4QT.git
DocDiff application: calculating page graphic piece info
This commit is contained in:
parent
e354a03564
commit
b2a9342047
|
@ -16,7 +16,13 @@
|
||||||
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#include "pdfdiff.h"
|
#include "pdfdiff.h"
|
||||||
|
#include "pdfrenderer.h"
|
||||||
#include "pdfdocumenttextflow.h"
|
#include "pdfdocumenttextflow.h"
|
||||||
|
#include "pdfexecutionpolicy.h"
|
||||||
|
#include "pdffont.h"
|
||||||
|
#include "pdfcms.h"
|
||||||
|
#include "pdfcompiler.h"
|
||||||
|
#include "pdfconstants.h"
|
||||||
|
|
||||||
#include <QtConcurrent/QtConcurrent>
|
#include <QtConcurrent/QtConcurrent>
|
||||||
|
|
||||||
|
@ -29,6 +35,7 @@ PDFDiff::PDFDiff(QObject* parent) :
|
||||||
m_leftDocument(nullptr),
|
m_leftDocument(nullptr),
|
||||||
m_rightDocument(nullptr),
|
m_rightDocument(nullptr),
|
||||||
m_options(Asynchronous),
|
m_options(Asynchronous),
|
||||||
|
m_epsilon(0.0001),
|
||||||
m_cancelled(false)
|
m_cancelled(false)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -141,30 +148,11 @@ PDFDiffResult PDFDiff::perform()
|
||||||
{
|
{
|
||||||
ProgressStartupInfo info;
|
ProgressStartupInfo info;
|
||||||
info.showDialog = false;
|
info.showDialog = false;
|
||||||
info.text = tr("");
|
info.text = tr("Comparing documents.");
|
||||||
m_progress->start(StepLast, std::move(info));
|
m_progress->start(StepLast, std::move(info));
|
||||||
}
|
}
|
||||||
|
|
||||||
// StepExtractContentLeftDocument
|
performSteps(leftPages, rightPages);
|
||||||
stepProgress();
|
|
||||||
|
|
||||||
// StepExtractContentRightDocument
|
|
||||||
stepProgress();
|
|
||||||
|
|
||||||
// StepExtractTextLeftDocument
|
|
||||||
pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow;
|
|
||||||
factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true);
|
|
||||||
PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
|
|
||||||
stepProgress();
|
|
||||||
|
|
||||||
// StepExtractTextRightDocument
|
|
||||||
pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow;
|
|
||||||
factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true);
|
|
||||||
PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
|
|
||||||
stepProgress();
|
|
||||||
|
|
||||||
// StepCompare
|
|
||||||
stepProgress();
|
|
||||||
|
|
||||||
if (m_progress)
|
if (m_progress)
|
||||||
{
|
{
|
||||||
|
@ -182,6 +170,102 @@ void PDFDiff::stepProgress()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct PDFDiffPageContext
|
||||||
|
{
|
||||||
|
PDFInteger pageIndex = 0;
|
||||||
|
PDFPrecompiledPage::GraphicPieceInfos graphicPieces;
|
||||||
|
};
|
||||||
|
|
||||||
|
void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::vector<PDFInteger>& rightPages)
|
||||||
|
{
|
||||||
|
std::vector<PDFDiffPageContext> leftPreparedPages;
|
||||||
|
std::vector<PDFDiffPageContext> rightPreparedPages;
|
||||||
|
|
||||||
|
auto createDiffPageContext = [](auto pageIndex)
|
||||||
|
{
|
||||||
|
PDFDiffPageContext context;
|
||||||
|
context.pageIndex = pageIndex;
|
||||||
|
return context;
|
||||||
|
};
|
||||||
|
std::transform(leftPages.cbegin(), leftPages.cend(), std::back_inserter(leftPreparedPages), createDiffPageContext);
|
||||||
|
std::transform(rightPages.cbegin(), rightPages.cend(), std::back_inserter(rightPreparedPages), createDiffPageContext);
|
||||||
|
|
||||||
|
// StepExtractContentLeftDocument
|
||||||
|
if (!m_cancelled)
|
||||||
|
{
|
||||||
|
PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT);
|
||||||
|
PDFOptionalContentActivity optionalContentActivity(m_leftDocument, pdf::OCUsage::View, nullptr);
|
||||||
|
fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_leftDocument), &optionalContentActivity));
|
||||||
|
|
||||||
|
PDFCMSManager cmsManager(nullptr);
|
||||||
|
cmsManager.setDocument(m_leftDocument);
|
||||||
|
PDFCMSPointer cms = cmsManager.getCurrentCMS();
|
||||||
|
|
||||||
|
auto fillPageContext = [&, this](PDFDiffPageContext& context)
|
||||||
|
{
|
||||||
|
PDFPrecompiledPage compiledPage;
|
||||||
|
constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent;
|
||||||
|
PDFRenderer renderer(m_leftDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
|
||||||
|
renderer.compile(&compiledPage, context.pageIndex);
|
||||||
|
|
||||||
|
PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
|
||||||
|
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
|
||||||
|
};
|
||||||
|
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftPreparedPages.begin(), leftPreparedPages.end(), fillPageContext);
|
||||||
|
stepProgress();
|
||||||
|
}
|
||||||
|
|
||||||
|
// StepExtractContentRightDocument
|
||||||
|
if (!m_cancelled)
|
||||||
|
{
|
||||||
|
PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT);
|
||||||
|
PDFOptionalContentActivity optionalContentActivity(m_rightDocument, pdf::OCUsage::View, nullptr);
|
||||||
|
fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_rightDocument), &optionalContentActivity));
|
||||||
|
|
||||||
|
PDFCMSManager cmsManager(nullptr);
|
||||||
|
cmsManager.setDocument(m_rightDocument);
|
||||||
|
PDFCMSPointer cms = cmsManager.getCurrentCMS();
|
||||||
|
|
||||||
|
auto fillPageContext = [&, this](PDFDiffPageContext& context)
|
||||||
|
{
|
||||||
|
PDFPrecompiledPage compiledPage;
|
||||||
|
constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent;
|
||||||
|
PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
|
||||||
|
renderer.compile(&compiledPage, context.pageIndex);
|
||||||
|
|
||||||
|
PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex));
|
||||||
|
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon);
|
||||||
|
};
|
||||||
|
|
||||||
|
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, rightPreparedPages.begin(), rightPreparedPages.end(), fillPageContext);
|
||||||
|
stepProgress();
|
||||||
|
}
|
||||||
|
|
||||||
|
// StepExtractTextLeftDocument
|
||||||
|
if (!m_cancelled)
|
||||||
|
{
|
||||||
|
pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow;
|
||||||
|
factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true);
|
||||||
|
PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
|
||||||
|
stepProgress();
|
||||||
|
}
|
||||||
|
|
||||||
|
// StepExtractTextRightDocument
|
||||||
|
if (!m_cancelled)
|
||||||
|
{
|
||||||
|
pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow;
|
||||||
|
factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true);
|
||||||
|
PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto);
|
||||||
|
stepProgress();
|
||||||
|
}
|
||||||
|
|
||||||
|
// StepCompare
|
||||||
|
if (!m_cancelled)
|
||||||
|
{
|
||||||
|
stepProgress();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void PDFDiff::onComparationPerformed()
|
void PDFDiff::onComparationPerformed()
|
||||||
{
|
{
|
||||||
m_cancelled = false;
|
m_cancelled = false;
|
||||||
|
@ -189,6 +273,19 @@ void PDFDiff::onComparationPerformed()
|
||||||
emit comparationFinished();
|
emit comparationFinished();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFReal PDFDiff::calculateEpsilonForPage(const PDFPage* page) const
|
||||||
|
{
|
||||||
|
Q_ASSERT(page);
|
||||||
|
|
||||||
|
QRectF mediaBox = page->getMediaBox();
|
||||||
|
|
||||||
|
PDFReal width = mediaBox.width();
|
||||||
|
PDFReal height = mediaBox.height();
|
||||||
|
PDFReal factor = qMax(width, height);
|
||||||
|
|
||||||
|
return factor * m_epsilon;
|
||||||
|
}
|
||||||
|
|
||||||
PDFDiffResult::PDFDiffResult() :
|
PDFDiffResult::PDFDiffResult() :
|
||||||
m_result(true)
|
m_result(true)
|
||||||
{
|
{
|
||||||
|
|
|
@ -117,15 +117,24 @@ private:
|
||||||
|
|
||||||
PDFDiffResult perform();
|
PDFDiffResult perform();
|
||||||
void stepProgress();
|
void stepProgress();
|
||||||
|
void performSteps(const std::vector<PDFInteger>& leftPages,
|
||||||
|
const std::vector<PDFInteger>& rightPages);
|
||||||
|
|
||||||
void onComparationPerformed();
|
void onComparationPerformed();
|
||||||
|
|
||||||
|
/// Calculates real epsilon for a page. Epsilon is used in page
|
||||||
|
/// comparation process, where points closer that epsilon
|
||||||
|
/// are recognized as equal.
|
||||||
|
/// \param page Page
|
||||||
|
PDFReal calculateEpsilonForPage(const PDFPage* page) const;
|
||||||
|
|
||||||
PDFProgress* m_progress;
|
PDFProgress* m_progress;
|
||||||
const PDFDocument* m_leftDocument;
|
const PDFDocument* m_leftDocument;
|
||||||
const PDFDocument* m_rightDocument;
|
const PDFDocument* m_rightDocument;
|
||||||
PDFClosedIntervalSet m_pagesForLeftDocument;
|
PDFClosedIntervalSet m_pagesForLeftDocument;
|
||||||
PDFClosedIntervalSet m_pagesForRightDocument;
|
PDFClosedIntervalSet m_pagesForRightDocument;
|
||||||
Options m_options;
|
Options m_options;
|
||||||
|
PDFReal m_epsilon;
|
||||||
std::atomic_bool m_cancelled;
|
std::atomic_bool m_cancelled;
|
||||||
PDFDiffResult m_result;
|
PDFDiffResult m_result;
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,7 @@ public:
|
||||||
void addInputInterface(IDrawWidgetInputInterface* inputInterface);
|
void addInputInterface(IDrawWidgetInputInterface* inputInterface);
|
||||||
|
|
||||||
signals:
|
signals:
|
||||||
void pageRenderingErrorsChanged(PDFInteger pageIndex, int errorsCount);
|
void pageRenderingErrorsChanged(pdf::PDFInteger pageIndex, int errorsCount);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void updateRendererImpl();
|
void updateRendererImpl();
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "pdfcms.h"
|
#include "pdfcms.h"
|
||||||
|
|
||||||
#include <QPainter>
|
#include <QPainter>
|
||||||
|
#include <QCryptographicHash>
|
||||||
|
|
||||||
namespace pdf
|
namespace pdf
|
||||||
{
|
{
|
||||||
|
@ -831,4 +832,146 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(PDFReal epsilon) const
|
||||||
|
{
|
||||||
|
GraphicPieceInfos infos;
|
||||||
|
|
||||||
|
struct State
|
||||||
|
{
|
||||||
|
QMatrix matrix;
|
||||||
|
};
|
||||||
|
std::stack<State> stateStack;
|
||||||
|
stateStack.emplace();
|
||||||
|
|
||||||
|
// Check, if epsilon is not too small
|
||||||
|
if (qFuzzyIsNull(epsilon))
|
||||||
|
{
|
||||||
|
epsilon = 0.000001;
|
||||||
|
}
|
||||||
|
PDFReal factor = 1.0 / epsilon;
|
||||||
|
|
||||||
|
// Process all instructions
|
||||||
|
for (const Instruction& instruction : m_instructions)
|
||||||
|
{
|
||||||
|
switch (instruction.type)
|
||||||
|
{
|
||||||
|
case InstructionType::DrawPath:
|
||||||
|
{
|
||||||
|
const PathPaintData& data = m_paths[instruction.dataIndex];
|
||||||
|
|
||||||
|
GraphicPieceInfo info;
|
||||||
|
QByteArray serializedPath;
|
||||||
|
|
||||||
|
// Serialize data
|
||||||
|
if (true)
|
||||||
|
{
|
||||||
|
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
|
||||||
|
|
||||||
|
stream << data.isText;
|
||||||
|
stream << data.pen;
|
||||||
|
stream << data.brush;
|
||||||
|
|
||||||
|
// Translate map to page coordinates
|
||||||
|
QPainterPath pagePath = stateStack.top().matrix.map(data.path);
|
||||||
|
|
||||||
|
info.type = data.isText ? GraphicPieceInfo::Type::Text : GraphicPieceInfo::Type::VectorGraphics;
|
||||||
|
info.boundingRect = pagePath.controlPointRect();
|
||||||
|
|
||||||
|
const int elementCount = pagePath.elementCount();
|
||||||
|
for (int i = 0; i < elementCount; ++i)
|
||||||
|
{
|
||||||
|
QPainterPath::Element element = pagePath.elementAt(i);
|
||||||
|
|
||||||
|
PDFReal roundedX = qRound(element.x * factor);
|
||||||
|
PDFReal roundedY = qRound(element.y * factor);
|
||||||
|
|
||||||
|
stream << roundedX;
|
||||||
|
stream << roundedY;
|
||||||
|
stream << element.type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
|
||||||
|
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
|
||||||
|
|
||||||
|
size_t size = qMin<size_t>(hash.length(), info.hash.size());
|
||||||
|
std::copy(hash.data(), hash.data() + size, info.hash.data());
|
||||||
|
|
||||||
|
infos.emplace_back(std::move(info));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::DrawImage:
|
||||||
|
{
|
||||||
|
/*const ImageData& data = m_images[instruction.dataIndex];
|
||||||
|
const QImage& image = data.image;
|
||||||
|
|
||||||
|
painter->save();
|
||||||
|
|
||||||
|
QMatrix imageTransform(1.0 / image.width(), 0, 0, 1.0 / image.height(), 0, 0);
|
||||||
|
QMatrix worldMatrix = imageTransform * painter->worldMatrix();
|
||||||
|
|
||||||
|
// Jakub Melka: Because Qt uses opposite axis direction than PDF, then we must transform the y-axis
|
||||||
|
// to the opposite (so the image is then unchanged)
|
||||||
|
worldMatrix.translate(0, image.height());
|
||||||
|
worldMatrix.scale(1, -1);
|
||||||
|
|
||||||
|
painter->setWorldMatrix(worldMatrix);
|
||||||
|
painter->drawImage(0, 0, image);
|
||||||
|
painter->restore();*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::DrawMesh:
|
||||||
|
{
|
||||||
|
/*const MeshPaintData& data = m_meshes[instruction.dataIndex];
|
||||||
|
|
||||||
|
painter->save();
|
||||||
|
painter->setWorldMatrix(pagePointToDevicePointMatrix);
|
||||||
|
data.mesh.paint(painter, data.alpha);
|
||||||
|
painter->restore();*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::Clip:
|
||||||
|
{
|
||||||
|
// Do nothing, we are just collecting information
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::SaveGraphicState:
|
||||||
|
{
|
||||||
|
stateStack.push(stateStack.top());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::RestoreGraphicState:
|
||||||
|
{
|
||||||
|
stateStack.pop();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::SetWorldMatrix:
|
||||||
|
{
|
||||||
|
stateStack.top().matrix = m_matrices[instruction.dataIndex];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case InstructionType::SetCompositionMode:
|
||||||
|
{
|
||||||
|
// Do nothing, we are just collecting information
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
Q_ASSERT(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return infos;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
|
|
@ -234,6 +234,30 @@ public:
|
||||||
PDFSnapInfo* getSnapInfo() { return &m_snapInfo; }
|
PDFSnapInfo* getSnapInfo() { return &m_snapInfo; }
|
||||||
const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; }
|
const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; }
|
||||||
|
|
||||||
|
struct GraphicPieceInfo
|
||||||
|
{
|
||||||
|
enum class Type
|
||||||
|
{
|
||||||
|
Unknown,
|
||||||
|
Text,
|
||||||
|
VectorGraphics,
|
||||||
|
Image
|
||||||
|
};
|
||||||
|
|
||||||
|
Type type = Type::Unknown;
|
||||||
|
QRectF boundingRect;
|
||||||
|
std::array<uint8_t, 64> hash = { };
|
||||||
|
};
|
||||||
|
|
||||||
|
using GraphicPieceInfos = std::vector<GraphicPieceInfo>;
|
||||||
|
|
||||||
|
/// Creates information about piece of graphic in this page,
|
||||||
|
/// for example, for comparation reasons. Parameter \p epsilon
|
||||||
|
/// is for numerical precision - values under epsilon are considered
|
||||||
|
/// as equal.
|
||||||
|
/// \param epsilon Epsilon
|
||||||
|
GraphicPieceInfos calculateGraphicPieceInfos(PDFReal epsilon) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct PathPaintData
|
struct PathPaintData
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue