// Copyright (C) 2021 Jakub Melka // // This file is part of PDF4QT. // // PDF4QT is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // with the written consent of the copyright owner, any later version. // // PDF4QT is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with PDF4QT. If not, see . #include "pdfdiff.h" #include "pdfrenderer.h" #include "pdfdocumenttextflow.h" #include "pdfexecutionpolicy.h" #include "pdffont.h" #include "pdfcms.h" #include "pdfcompiler.h" #include "pdfconstants.h" #include "pdfalgorithmlcs.h" #include namespace pdf { class PDFDiffHelper { public: using GraphicPieceInfo = PDFPrecompiledPage::GraphicPieceInfo; using GraphicPieceInfos = PDFPrecompiledPage::GraphicPieceInfos; using PageSequence = PDFAlgorithmLongestCommonSubsequenceBase::Sequence; struct Differences { GraphicPieceInfos left; GraphicPieceInfos right; bool isEmpty() const { return left.empty() && right.empty(); } }; struct TextFlowDifferences { PDFDocumentTextFlow leftTextFlow; PDFDocumentTextFlow rightTextFlow; QString leftText; QString rightText; }; static Differences calculateDifferences(const GraphicPieceInfos& left, const GraphicPieceInfos& right, PDFReal epsilon); static std::vector getLeftUnmatched(const PageSequence& sequence); static std::vector getRightUnmatched(const PageSequence& sequence); static void matchPage(PageSequence& sequence, size_t leftPage, size_t rightPage); }; PDFDiff::PDFDiff(QObject* parent) : BaseClass(parent), m_progress(nullptr), m_leftDocument(nullptr), m_rightDocument(nullptr), m_options(Asynchronous | PC_Text | PC_VectorGraphics | PC_Images), m_epsilon(0.001), m_cancelled(false), m_textAnalysisAlgorithm(PDFDocumentTextFlowFactory::Algorithm::Layout) { } PDFDiff::~PDFDiff() { stop(); } void PDFDiff::setLeftDocument(const PDFDocument* leftDocument) { if (m_leftDocument != leftDocument) { stop(); m_leftDocument = leftDocument; } } void PDFDiff::setRightDocument(const PDFDocument* rightDocument) { if (m_rightDocument != rightDocument) { stop(); m_rightDocument = rightDocument; } } void PDFDiff::setPagesForLeftDocument(PDFClosedIntervalSet pagesForLeftDocument) { stop(); m_pagesForLeftDocument = std::move(pagesForLeftDocument); } void PDFDiff::setPagesForRightDocument(PDFClosedIntervalSet pagesForRightDocument) { stop(); m_pagesForRightDocument = std::move(pagesForRightDocument); } void PDFDiff::start() { // Jakub Melka: First, we must ensure, that comparation // process is finished, otherwise we must wait for end. // Then, create a new future watcher. stop(); m_cancelled = false; if (m_options.testFlag(Asynchronous)) { m_futureWatcher = std::nullopt; m_futureWatcher.emplace(); m_future = QtConcurrent::run(std::bind(&PDFDiff::perform, this)); connect(&*m_futureWatcher, &QFutureWatcher::finished, this, &PDFDiff::onComparationPerformed); m_futureWatcher->setFuture(m_future); } else { // Just do comparation immediately m_result = perform(); emit comparationFinished(); } } void PDFDiff::stop() { if (m_futureWatcher && !m_futureWatcher->isFinished()) { // Do stop only if process doesn't finished already. // If we are finished, we do not want to set cancelled state. m_cancelled = true; m_futureWatcher->waitForFinished(); } } PDFDiffResult PDFDiff::perform() { PDFDiffResult result; if (!m_leftDocument || !m_rightDocument) { result.setResult(tr("No document to be compared.")); return result; } if (m_pagesForLeftDocument.isEmpty() || m_pagesForRightDocument.isEmpty()) { result.setResult(tr("No page to be compared.")); return result; } auto leftPages = m_pagesForLeftDocument.unfold(); auto rightPages = m_pagesForRightDocument.unfold(); const size_t leftDocumentPageCount = m_leftDocument->getCatalog()->getPageCount(); const size_t rightDocumentPageCount = m_rightDocument->getCatalog()->getPageCount(); if (leftPages.front() < 0 || leftPages.back() >= PDFInteger(leftDocumentPageCount) || rightPages.front() < 0 || rightPages.back() >= PDFInteger(rightDocumentPageCount)) { result.setResult(tr("Invalid page range.")); return result; } if (m_progress) { ProgressStartupInfo info; info.showDialog = false; info.text = tr("Comparing documents."); m_progress->start(StepLast, std::move(info)); } performSteps(leftPages, rightPages); if (m_progress) { m_progress->finish(); } return result; } void PDFDiff::stepProgress() { if (m_progress) { m_progress->step(); } } struct PDFDiffPageContext { PDFInteger pageIndex = 0; std::array pageHash = { }; PDFPrecompiledPage::GraphicPieceInfos graphicPieces; PDFDocumentTextFlow text; }; void PDFDiff::performPageMatching(const std::vector& leftPreparedPages, const std::vector& rightPreparedPages, PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence, std::map& pageMatches) { // Match pages. We will use following algorithm: exact solution can fail, because // we are using hashes and due to numerical instability, hashes can be different // even for exactly the same page. But if hashes are the same, the page must be the same. // So, we use longest common subsequence algorithm to detect same page ranges, // and then we match the rest. We assume the number of failing pages is relatively small. auto comparePages = [&](const PDFDiffPageContext& left, const PDFDiffPageContext& right) { if (left.pageHash == right.pageHash) { return true; } auto it = pageMatches.find(left.pageIndex); if (it != pageMatches.cend()) { return it->second == right.pageIndex; } return false; }; PDFAlgorithmLongestCommonSubsequence algorithm(leftPreparedPages.cbegin(), leftPreparedPages.cend(), rightPreparedPages.cbegin(), rightPreparedPages.cend(), comparePages); algorithm.perform(); pageSequence = algorithm.getSequence(); std::vector leftUnmatched = PDFDiffHelper::getLeftUnmatched(pageSequence); std::vector rightUnmatched = PDFDiffHelper::getRightUnmatched(pageSequence); // We are matching left pages to the right ones std::map> matchedPages; for (const size_t index : leftUnmatched) { matchedPages[index] = std::vector(); } auto matchLeftPage = [&, this](size_t leftIndex) { const PDFDiffPageContext& leftPageContext = leftPreparedPages[leftIndex]; auto page = m_leftDocument->getCatalog()->getPage(leftPageContext.pageIndex); PDFReal epsilon = calculateEpsilonForPage(page); for (const size_t rightIndex : rightUnmatched) { const PDFDiffPageContext& rightPageContext = rightPreparedPages[rightIndex]; if (leftPageContext.graphicPieces.size() != rightPageContext.graphicPieces.size()) { // Match cannot exist, graphic pieces have different size continue; } PDFDiffHelper::Differences differences = PDFDiffHelper::calculateDifferences(leftPageContext.graphicPieces, rightPageContext.graphicPieces, epsilon); if (differences.isEmpty()) { // Jakub Melka: we have a match matchedPages[leftIndex].push_back(rightIndex); } } }; PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftUnmatched.begin(), leftUnmatched.end(), matchLeftPage); std::vector leftPagesMoved; std::vector rightPagesMoved; std::set matchedRightPages; for (const auto& matchedPage : matchedPages) { for (size_t rightContextIndex : matchedPage.second) { if (!matchedRightPages.count(rightContextIndex)) { matchedRightPages.insert(rightContextIndex); const PDFDiffPageContext& leftPageContext = leftPreparedPages[matchedPage.first]; const PDFDiffPageContext& rightPageContext = rightPreparedPages[rightContextIndex]; leftPagesMoved.push_back(leftPageContext.pageIndex); rightPagesMoved.push_back(rightPageContext.pageIndex); pageMatches[leftPageContext.pageIndex] = rightPageContext.pageIndex; } } } if (!pageMatches.empty()) { algorithm.perform(); pageSequence = algorithm.getSequence(); } std::sort(leftPagesMoved.begin(), leftPagesMoved.end()); std::sort(rightPagesMoved.begin(), rightPagesMoved.end()); PDFAlgorithmLongestCommonSubsequenceBase::markSequence(pageSequence, leftPagesMoved, rightPagesMoved); } void PDFDiff::performSteps(const std::vector& leftPages, const std::vector& rightPages) { std::vector leftPreparedPages; std::vector rightPreparedPages; PDFDiffHelper::PageSequence pageSequence; std::map pageMatches; // Indices are real page indices, not indices to page contexts auto createDiffPageContext = [](auto pageIndex) { PDFDiffPageContext context; context.pageIndex = pageIndex; return context; }; std::transform(leftPages.cbegin(), leftPages.cend(), std::back_inserter(leftPreparedPages), createDiffPageContext); std::transform(rightPages.cbegin(), rightPages.cend(), std::back_inserter(rightPreparedPages), createDiffPageContext); // StepExtractContentLeftDocument if (!m_cancelled) { PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT); PDFOptionalContentActivity optionalContentActivity(m_leftDocument, pdf::OCUsage::View, nullptr); fontCache.setDocument(pdf::PDFModifiedDocument(const_cast(m_leftDocument), &optionalContentActivity)); PDFCMSManager cmsManager(nullptr); cmsManager.setDocument(m_leftDocument); PDFCMSPointer cms = cmsManager.getCurrentCMS(); auto fillPageContext = [&, this](PDFDiffPageContext& context) { PDFPrecompiledPage compiledPage; constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent; PDFRenderer renderer(m_leftDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings()); renderer.compile(&compiledPage, context.pageIndex); auto page = m_leftDocument->getCatalog()->getPage(context.pageIndex); PDFReal epsilon = calculateEpsilonForPage(page); context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon); finalizeGraphicsPieces(context); }; PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftPreparedPages.begin(), leftPreparedPages.end(), fillPageContext); stepProgress(); } // StepExtractContentRightDocument if (!m_cancelled) { PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT); PDFOptionalContentActivity optionalContentActivity(m_rightDocument, pdf::OCUsage::View, nullptr); fontCache.setDocument(pdf::PDFModifiedDocument(const_cast(m_rightDocument), &optionalContentActivity)); PDFCMSManager cmsManager(nullptr); cmsManager.setDocument(m_rightDocument); PDFCMSPointer cms = cmsManager.getCurrentCMS(); auto fillPageContext = [&, this](PDFDiffPageContext& context) { PDFPrecompiledPage compiledPage; constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent; PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings()); renderer.compile(&compiledPage, context.pageIndex); const PDFPage* page = m_rightDocument->getCatalog()->getPage(context.pageIndex); PDFReal epsilon = calculateEpsilonForPage(page); context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon); finalizeGraphicsPieces(context); }; PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, rightPreparedPages.begin(), rightPreparedPages.end(), fillPageContext); stepProgress(); } // StepMatchPages if (!m_cancelled) { performPageMatching(leftPreparedPages, rightPreparedPages, pageSequence, pageMatches); stepProgress(); } // StepExtractTextLeftDocument if (!m_cancelled) { pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow; factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true); PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, m_textAnalysisAlgorithm); std::map splittedText = leftTextFlow.split(PDFDocumentTextFlow::Text); for (PDFDiffPageContext& leftContext : leftPreparedPages) { auto it = splittedText.find(leftContext.pageIndex); if (it != splittedText.cend()) { leftContext.text = std::move(it->second); splittedText.erase(it); } } stepProgress(); } // StepExtractTextRightDocument if (!m_cancelled) { pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow; factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true); PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, m_textAnalysisAlgorithm); std::map splittedText = rightTextFlow.split(PDFDocumentTextFlow::Text); for (PDFDiffPageContext& rightContext : rightPreparedPages) { auto it = splittedText.find(rightContext.pageIndex); if (it != splittedText.cend()) { rightContext.text = std::move(it->second); splittedText.erase(it); } } stepProgress(); } // StepCompare if (!m_cancelled) { performCompare(leftPreparedPages, rightPreparedPages, pageSequence, pageMatches); stepProgress(); } } void PDFDiff::performCompare(const std::vector& leftPreparedPages, const std::vector& rightPreparedPages, PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence, const std::map& pageMatches) { using AlgorithmLCS = PDFAlgorithmLongestCommonSubsequenceBase; auto modifiedRanges = AlgorithmLCS::getModifiedRanges(pageSequence); // First find all moved pages for (const AlgorithmLCS::SequenceItem& item : pageSequence) { if (item.isMovedLeft()) { Q_ASSERT(pageMatches.contains(leftPreparedPages.at(item.index1).pageIndex)); const PDFInteger leftIndex = leftPreparedPages[item.index1].pageIndex; const PDFInteger rightIndex = pageMatches.at(leftIndex); m_result.addPageMoved(leftIndex, rightIndex); } if (item.isMoved()) { m_result.addPageMoved(leftPreparedPages[item.index1].pageIndex, rightPreparedPages[item.index2].pageIndex); } } std::vector textFlowDifferences; for (const auto& range : modifiedRanges) { AlgorithmLCS::SequenceItemFlags flags = AlgorithmLCS::collectFlags(range); const bool isAdded = flags.testFlag(AlgorithmLCS::Added); const bool isRemoved = flags.testFlag(AlgorithmLCS::Removed); const bool isReplaced = flags.testFlag(AlgorithmLCS::Replaced); Q_ASSERT(isAdded || isRemoved || isReplaced); // There are two cases. Some page content was replaced, or either // page range was added, or page range was removed. if (isReplaced) { PDFDocumentTextFlow leftTextFlow; PDFDocumentTextFlow rightTextFlow; const bool isTextComparedAsVectorGraphics = m_options.testFlag(CompareTextsAsVector); for (auto it = range.first; it != range.second; ++it) { const AlgorithmLCS::SequenceItem& item = *it; if (item.isReplaced()) { const PDFDiffPageContext& leftPageContext = leftPreparedPages[item.index1]; const PDFDiffPageContext& rightPageContext = rightPreparedPages[item.index2]; if (!isTextComparedAsVectorGraphics) { leftTextFlow.append(leftPageContext.text); rightTextFlow.append(rightPageContext.text); } auto pageLeft = m_leftDocument->getCatalog()->getPage(leftPageContext.pageIndex); auto pageRight = m_rightDocument->getCatalog()->getPage(rightPageContext.pageIndex); PDFReal epsilon = (calculateEpsilonForPage(pageLeft) + calculateEpsilonForPage(pageRight)) * 0.5; PDFDiffHelper::Differences differences = PDFDiffHelper::calculateDifferences(leftPageContext.graphicPieces, rightPageContext.graphicPieces, epsilon); for (const PDFDiffHelper::GraphicPieceInfo& info : differences.left) { switch (info.type) { case PDFDiffHelper::GraphicPieceInfo::Type::Text: if (isTextComparedAsVectorGraphics) { m_result.addRemovedTextCharContent(leftPageContext.pageIndex, info.boundingRect); } break; case PDFDiffHelper::GraphicPieceInfo::Type::VectorGraphics: m_result.addRemovedVectorGraphicContent(leftPageContext.pageIndex, info.boundingRect); break; case PDFDiffHelper::GraphicPieceInfo::Type::Image: m_result.addRemovedImageContent(leftPageContext.pageIndex, info.boundingRect); break; case PDFDiffHelper::GraphicPieceInfo::Type::Shading: m_result.addRemovedShadingContent(leftPageContext.pageIndex, info.boundingRect); break; default: Q_ASSERT(false); break; } } for (const PDFDiffHelper::GraphicPieceInfo& info : differences.right) { switch (info.type) { case PDFDiffHelper::GraphicPieceInfo::Type::Text: if (isTextComparedAsVectorGraphics) { m_result.addAddedTextCharContent(rightPageContext.pageIndex, info.boundingRect); } break; case PDFDiffHelper::GraphicPieceInfo::Type::VectorGraphics: m_result.addAddedVectorGraphicContent(rightPageContext.pageIndex, info.boundingRect); break; case PDFDiffHelper::GraphicPieceInfo::Type::Image: m_result.addAddedImageContent(rightPageContext.pageIndex, info.boundingRect); break; case PDFDiffHelper::GraphicPieceInfo::Type::Shading: m_result.addAddedShadingContent(rightPageContext.pageIndex, info.boundingRect); break; default: Q_ASSERT(false); break; } } } if (item.isAdded()) { const PDFDiffPageContext& rightPageContext = rightPreparedPages[item.index2]; if (!isTextComparedAsVectorGraphics) { rightTextFlow.append(rightPageContext.text); } m_result.addPageAdded(rightPageContext.pageIndex); } if (item.isRemoved()) { const PDFDiffPageContext& leftPageContext = leftPreparedPages[item.index1]; if (!isTextComparedAsVectorGraphics) { leftTextFlow.append(leftPageContext.text); } m_result.addPageRemoved(leftPageContext.pageIndex); } } textFlowDifferences.emplace_back(); PDFDiffHelper::TextFlowDifferences& addedDifferences = textFlowDifferences.back(); addedDifferences.leftText = leftTextFlow.getText(); addedDifferences.rightText = rightTextFlow.getText(); if (addedDifferences.leftText == addedDifferences.rightText) { // Text is the same, no difference is found textFlowDifferences.pop_back(); } else { addedDifferences.leftTextFlow = std::move(leftTextFlow); addedDifferences.rightTextFlow = std::move(rightTextFlow); } } else { for (auto it = range.first; it != range.second; ++it) { const AlgorithmLCS::SequenceItem& item = *it; Q_ASSERT(item.isAdded() || item.isRemoved()); if (item.isAdded()) { m_result.addPageAdded(rightPreparedPages[item.index2].pageIndex); } if (item.isRemoved()) { m_result.addPageRemoved(leftPreparedPages[item.index1].pageIndex); } } } } // Jakub Melka: try to compare text differences auto compareTexts = [this](PDFDiffHelper::TextFlowDifferences& context) { struct CompareItem { size_t index = 0; int charIndex = 0; bool left = false; }; std::vector leftItems; std::vector rightItems; const size_t leftCount = context.leftTextFlow.getSize(); for (size_t i = 0; i < leftCount; ++i) { CompareItem item; item.index = i; item.left = true; const PDFDocumentTextFlow::Item* textFlowItem = context.leftTextFlow.getItem(i); for (int j = 0; j < textFlowItem->text.size(); ++j) { item.charIndex = j; leftItems.push_back(item); } } const size_t rightCount = context.rightTextFlow.getSize(); for (size_t i = 0; i < rightCount; ++i) { CompareItem item; item.index = i; item.left = false; const PDFDocumentTextFlow::Item* textFlowItem = context.rightTextFlow.getItem(i); for (int j = 0; j < textFlowItem->text.size(); ++j) { item.charIndex = j; rightItems.push_back(item); } } auto compareCharacters = [&](const CompareItem& a, const CompareItem& b) { const auto& aItem = a.left ? context.leftTextFlow : context.rightTextFlow; const auto& bItem = b.left ? context.leftTextFlow : context.rightTextFlow; QChar aChar = aItem.getItem(a.index)->text[a.charIndex]; QChar bChar = bItem.getItem(b.index)->text[b.charIndex]; return aChar == bChar; }; PDFAlgorithmLongestCommonSubsequence algorithm(leftItems.cbegin(), leftItems.cend(), rightItems.cbegin(), rightItems.cend(), compareCharacters); algorithm.perform(); PDFAlgorithmLongestCommonSubsequenceBase::Sequence sequence = algorithm.getSequence(); PDFAlgorithmLongestCommonSubsequenceBase::markSequence(sequence, { }, { }); PDFAlgorithmLongestCommonSubsequenceBase::SequenceItemRanges modifiedRanges = PDFAlgorithmLongestCommonSubsequenceBase::getModifiedRanges(sequence); for (const auto& range : modifiedRanges) { } }; PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, textFlowDifferences.begin(), textFlowDifferences.end(), compareTexts); } void PDFDiff::finalizeGraphicsPieces(PDFDiffPageContext& context) { std::sort(context.graphicPieces.begin(), context.graphicPieces.end()); // Compute page hash using active settings QCryptographicHash hasher(QCryptographicHash::Sha512); hasher.reset(); for (const PDFPrecompiledPage::GraphicPieceInfo& info : context.graphicPieces) { if (info.isText() && !m_options.testFlag(PC_Text)) { continue; } if (info.isVectorGraphics() && !m_options.testFlag(PC_VectorGraphics)) { continue; } if (info.isImage() && !m_options.testFlag(PC_Images)) { continue; } if (info.isShading() && !m_options.testFlag(PC_Mesh)) { continue; } hasher.addData(reinterpret_cast(info.hash.data()), int(info.hash.size())); } QByteArray hash = hasher.result(); Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64); size_t size = qMin(hash.length(), context.pageHash.size()); std::copy(hash.data(), hash.data() + size, context.pageHash.data()); } void PDFDiff::onComparationPerformed() { m_cancelled = false; m_result = m_future.result(); emit comparationFinished(); } PDFReal PDFDiff::calculateEpsilonForPage(const PDFPage* page) const { Q_ASSERT(page); QRectF mediaBox = page->getMediaBox(); PDFReal width = mediaBox.width(); PDFReal height = mediaBox.height(); PDFReal factor = qMax(width, height); return factor * m_epsilon; } PDFDocumentTextFlowFactory::Algorithm PDFDiff::getTextAnalysisAlgorithm() const { return m_textAnalysisAlgorithm; } void PDFDiff::setTextAnalysisAlgorithm(PDFDocumentTextFlowFactory::Algorithm textAnalysisAlgorithm) { m_textAnalysisAlgorithm = textAnalysisAlgorithm; } PDFDiffResult::PDFDiffResult() : m_result(true) { } void PDFDiffResult::addPageMoved(PDFInteger pageIndex1, PDFInteger pageIndex2) { Difference difference; difference.type = Type::PageMoved; difference.pageIndex1 = pageIndex1; difference.pageIndex2 = pageIndex2; m_differences.emplace_back(std::move(difference)); } void PDFDiffResult::addPageAdded(PDFInteger pageIndex) { Difference difference; difference.type = Type::PageAdded; difference.pageIndex2 = pageIndex; m_differences.emplace_back(std::move(difference)); } void PDFDiffResult::addPageRemoved(PDFInteger pageIndex) { Difference difference; difference.type = Type::PageRemoved; difference.pageIndex1 = pageIndex; m_differences.emplace_back(std::move(difference)); } void PDFDiffResult::addLeftItem(Type type, PDFInteger pageIndex, QRectF rect) { Difference difference; difference.type = type; difference.pageIndex1 = pageIndex; addRectLeft(difference, rect); m_differences.emplace_back(std::move(difference)); } void PDFDiffResult::addRightItem(Type type, PDFInteger pageIndex, QRectF rect) { Difference difference; difference.type = type; difference.pageIndex2 = pageIndex; addRectRight(difference, rect); m_differences.emplace_back(std::move(difference)); } void PDFDiffResult::addRemovedTextCharContent(PDFInteger pageIndex, QRectF rect) { addLeftItem(Type::RemovedTextCharContent, pageIndex, rect); } void PDFDiffResult::addRemovedVectorGraphicContent(PDFInteger pageIndex, QRectF rect) { addLeftItem(Type::RemovedVectorGraphicContent, pageIndex, rect); } void PDFDiffResult::addRemovedImageContent(PDFInteger pageIndex, QRectF rect) { addLeftItem(Type::RemovedImageContent, pageIndex, rect); } void PDFDiffResult::addRemovedShadingContent(PDFInteger pageIndex, QRectF rect) { addLeftItem(Type::RemovedShadingContent, pageIndex, rect); } void PDFDiffResult::addAddedTextCharContent(PDFInteger pageIndex, QRectF rect) { addRightItem(Type::AddedTextCharContent, pageIndex, rect); } void PDFDiffResult::addAddedVectorGraphicContent(PDFInteger pageIndex, QRectF rect) { addRightItem(Type::AddedVectorGraphicContent, pageIndex, rect); } void PDFDiffResult::addAddedImageContent(PDFInteger pageIndex, QRectF rect) { addRightItem(Type::AddedImageContent, pageIndex, rect); } void PDFDiffResult::addAddedShadingContent(PDFInteger pageIndex, QRectF rect) { addRightItem(Type::AddedShadingContent, pageIndex, rect); } QString PDFDiffResult::getMessage(size_t index) const { if (index >= m_differences.size()) { return QString(); } const Difference& difference = m_differences[index]; switch (difference.type) { case Type::PageMoved: return PDFDiff::tr("Page no. %1 from old document has been moved to a new document at page no. %2.").arg(difference.pageIndex1 + 1).arg(difference.pageIndex2 + 1); case Type::PageAdded: return PDFDiff::tr("Page no. %1 was added.").arg(difference.pageIndex2 + 1); case Type::PageRemoved: return PDFDiff::tr("Page no. %1 was removed.").arg(difference.pageIndex1 + 1); case Type::RemovedTextCharContent: return PDFDiff::tr("Removed text character from page %1.").arg(difference.pageIndex1 + 1); case Type::RemovedVectorGraphicContent: return PDFDiff::tr("Removed vector graphics from page %1.").arg(difference.pageIndex1 + 1); case Type::RemovedImageContent: return PDFDiff::tr("Removed image from page %1.").arg(difference.pageIndex1 + 1); case Type::RemovedShadingContent: return PDFDiff::tr("Removed shading from page %1.").arg(difference.pageIndex1 + 1); case Type::AddedTextCharContent: return PDFDiff::tr("Added text character from page %1.").arg(difference.pageIndex2 + 1); case Type::AddedVectorGraphicContent: return PDFDiff::tr("Added vector graphics from page %1.").arg(difference.pageIndex2 + 1); case Type::AddedImageContent: return PDFDiff::tr("Added image from page %1.").arg(difference.pageIndex2 + 1); case Type::AddedShadingContent: return PDFDiff::tr("Added shading from page %1.").arg(difference.pageIndex2 + 1); default: Q_ASSERT(false); break; } return QString(); } void PDFDiffResult::addRectLeft(Difference& difference, QRectF rect) { difference.leftRectIndex = m_rects.size(); difference.leftRectCount = 1; m_rects.emplace_back(rect); } void PDFDiffResult::addRectRight(Difference& difference, QRectF rect) { difference.rightRectIndex = m_rects.size(); difference.rightRectCount = 1; m_rects.emplace_back(rect); } PDFDiffHelper::Differences PDFDiffHelper::calculateDifferences(const GraphicPieceInfos& left, const GraphicPieceInfos& right, PDFReal epsilon) { Differences differences; Q_ASSERT(std::is_sorted(left.cbegin(), left.cend())); Q_ASSERT(std::is_sorted(right.cbegin(), right.cend())); for (const GraphicPieceInfo& info : left) { if (!std::binary_search(right.cbegin(), right.cend(), info)) { differences.left.push_back(info); } } for (const GraphicPieceInfo& info : right) { if (!std::binary_search(left.cbegin(), left.cend(), info)) { differences.right.push_back(info); } } const PDFReal epsilonSquared = epsilon * epsilon; // If exact match fails, then try to use match with epsilon. For each // item in left, we try to find matching item in right. for (auto it = differences.left.begin(); it != differences.left.end();) { bool hasMatch = false; const GraphicPieceInfo& leftInfo = *it; for (auto it2 = differences.right.begin(); it2 != differences.right.end();) { // Heuristically compare these items const GraphicPieceInfo& rightInfo = *it2; if (leftInfo.type != rightInfo.type || !leftInfo.boundingRect.intersects(rightInfo.boundingRect)) { ++it2; continue; } const int elementCountPath1 = leftInfo.pagePath.elementCount(); const int elementCountPath2 = rightInfo.pagePath.elementCount(); if (elementCountPath1 != elementCountPath2) { ++it2; continue; } hasMatch = (leftInfo.type != GraphicPieceInfo::Type::Image) || (leftInfo.imageHash == rightInfo.imageHash); const int elementCount = leftInfo.pagePath.elementCount(); for (int i = 0; i < elementCount && hasMatch; ++i) { QPainterPath::Element leftElement = leftInfo.pagePath.elementAt(i); QPainterPath::Element rightElement = rightInfo.pagePath.elementAt(i); PDFReal diffX = leftElement.x - rightElement.x; PDFReal diffY = leftElement.y - rightElement.y; PDFReal squaredDistance = diffX * diffX + diffY * diffY; hasMatch = (leftElement.type == rightElement.type) && (squaredDistance < epsilonSquared); } if (hasMatch) { it2 = differences.right.erase(it2); } else { ++it2; } } if (hasMatch) { it = differences.left.erase(it); } else { ++it; } } return differences; } std::vector PDFDiffHelper::getLeftUnmatched(const PageSequence& sequence) { std::vector result; for (const auto& item : sequence) { if (item.isLeft()) { result.push_back(item.index1); } } return result; } std::vector PDFDiffHelper::getRightUnmatched(const PageSequence& sequence) { std::vector result; for (const auto& item : sequence) { if (item.isRight()) { result.push_back(item.index2); } } return result; } void PDFDiffHelper::matchPage(PageSequence& sequence, size_t leftPage, size_t rightPage) { for (auto it = sequence.begin(); it != sequence.end();) { auto& item = *it; if (item.isLeft() && item.index1 == leftPage) { item.index2 = rightPage; } if (item.isRight() && item.index2 == rightPage) { it = sequence.erase(it); } else { ++it; } } } } // namespace pdf