mirror of
				https://github.com/JakubMelka/PDF4QT.git
				synced 2025-06-05 21:59:17 +02:00 
			
		
		
		
	DocDiff application: calculating page graphic piece info
This commit is contained in:
		| @@ -16,7 +16,13 @@ | ||||
| //    along with PDF4QT.  If not, see <https://www.gnu.org/licenses/>. | ||||
|  | ||||
| #include "pdfdiff.h" | ||||
| #include "pdfrenderer.h" | ||||
| #include "pdfdocumenttextflow.h" | ||||
| #include "pdfexecutionpolicy.h" | ||||
| #include "pdffont.h" | ||||
| #include "pdfcms.h" | ||||
| #include "pdfcompiler.h" | ||||
| #include "pdfconstants.h" | ||||
|  | ||||
| #include <QtConcurrent/QtConcurrent> | ||||
|  | ||||
| @@ -29,6 +35,7 @@ PDFDiff::PDFDiff(QObject* parent) : | ||||
|     m_leftDocument(nullptr), | ||||
|     m_rightDocument(nullptr), | ||||
|     m_options(Asynchronous), | ||||
|     m_epsilon(0.0001), | ||||
|     m_cancelled(false) | ||||
| { | ||||
|  | ||||
| @@ -141,30 +148,11 @@ PDFDiffResult PDFDiff::perform() | ||||
|     { | ||||
|         ProgressStartupInfo info; | ||||
|         info.showDialog = false; | ||||
|         info.text = tr(""); | ||||
|         info.text = tr("Comparing documents."); | ||||
|         m_progress->start(StepLast, std::move(info)); | ||||
|     } | ||||
|  | ||||
|     // StepExtractContentLeftDocument | ||||
|     stepProgress(); | ||||
|  | ||||
|     // StepExtractContentRightDocument | ||||
|     stepProgress(); | ||||
|  | ||||
|     // StepExtractTextLeftDocument | ||||
|     pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow; | ||||
|     factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true); | ||||
|     PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto); | ||||
|     stepProgress(); | ||||
|  | ||||
|     // StepExtractTextRightDocument | ||||
|     pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow; | ||||
|     factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true); | ||||
|     PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto); | ||||
|     stepProgress(); | ||||
|  | ||||
|     // StepCompare | ||||
|     stepProgress(); | ||||
|     performSteps(leftPages, rightPages); | ||||
|  | ||||
|     if (m_progress) | ||||
|     { | ||||
| @@ -182,6 +170,102 @@ void PDFDiff::stepProgress() | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct PDFDiffPageContext | ||||
| { | ||||
|     PDFInteger pageIndex = 0; | ||||
|     PDFPrecompiledPage::GraphicPieceInfos graphicPieces; | ||||
| }; | ||||
|  | ||||
| void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::vector<PDFInteger>& rightPages) | ||||
| { | ||||
|     std::vector<PDFDiffPageContext> leftPreparedPages; | ||||
|     std::vector<PDFDiffPageContext> rightPreparedPages; | ||||
|  | ||||
|     auto createDiffPageContext = [](auto pageIndex) | ||||
|     { | ||||
|        PDFDiffPageContext context; | ||||
|        context.pageIndex = pageIndex; | ||||
|        return context; | ||||
|     }; | ||||
|     std::transform(leftPages.cbegin(), leftPages.cend(), std::back_inserter(leftPreparedPages), createDiffPageContext); | ||||
|     std::transform(rightPages.cbegin(), rightPages.cend(), std::back_inserter(rightPreparedPages), createDiffPageContext); | ||||
|  | ||||
|     // StepExtractContentLeftDocument | ||||
|     if (!m_cancelled) | ||||
|     { | ||||
|         PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT); | ||||
|         PDFOptionalContentActivity optionalContentActivity(m_leftDocument, pdf::OCUsage::View, nullptr); | ||||
|         fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_leftDocument), &optionalContentActivity)); | ||||
|  | ||||
|         PDFCMSManager cmsManager(nullptr); | ||||
|         cmsManager.setDocument(m_leftDocument); | ||||
|         PDFCMSPointer cms = cmsManager.getCurrentCMS(); | ||||
|  | ||||
|         auto fillPageContext = [&, this](PDFDiffPageContext& context) | ||||
|         { | ||||
|             PDFPrecompiledPage compiledPage; | ||||
|             constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent; | ||||
|             PDFRenderer renderer(m_leftDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings()); | ||||
|             renderer.compile(&compiledPage, context.pageIndex); | ||||
|  | ||||
|             PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex)); | ||||
|             context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon); | ||||
|         }; | ||||
|         PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftPreparedPages.begin(), leftPreparedPages.end(), fillPageContext); | ||||
|         stepProgress(); | ||||
|     } | ||||
|  | ||||
|     // StepExtractContentRightDocument | ||||
|     if (!m_cancelled) | ||||
|     { | ||||
|         PDFFontCache fontCache(DEFAULT_FONT_CACHE_LIMIT, DEFAULT_REALIZED_FONT_CACHE_LIMIT); | ||||
|         PDFOptionalContentActivity optionalContentActivity(m_rightDocument, pdf::OCUsage::View, nullptr); | ||||
|         fontCache.setDocument(pdf::PDFModifiedDocument(const_cast<pdf::PDFDocument*>(m_rightDocument), &optionalContentActivity)); | ||||
|  | ||||
|         PDFCMSManager cmsManager(nullptr); | ||||
|         cmsManager.setDocument(m_rightDocument); | ||||
|         PDFCMSPointer cms = cmsManager.getCurrentCMS(); | ||||
|  | ||||
|         auto fillPageContext = [&, this](PDFDiffPageContext& context) | ||||
|         { | ||||
|             PDFPrecompiledPage compiledPage; | ||||
|             constexpr PDFRenderer::Features features = PDFRenderer::IgnoreOptionalContent; | ||||
|             PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings()); | ||||
|             renderer.compile(&compiledPage, context.pageIndex); | ||||
|  | ||||
|             PDFReal epsilon = calculateEpsilonForPage(m_leftDocument->getCatalog()->getPage(context.pageIndex)); | ||||
|             context.graphicPieces = compiledPage.calculateGraphicPieceInfos(epsilon); | ||||
|         }; | ||||
|  | ||||
|         PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, rightPreparedPages.begin(), rightPreparedPages.end(), fillPageContext); | ||||
|         stepProgress(); | ||||
|     } | ||||
|  | ||||
|     // StepExtractTextLeftDocument | ||||
|     if (!m_cancelled) | ||||
|     { | ||||
|         pdf::PDFDocumentTextFlowFactory factoryLeftDocumentTextFlow; | ||||
|         factoryLeftDocumentTextFlow.setCalculateBoundingBoxes(true); | ||||
|         PDFDocumentTextFlow leftTextFlow = factoryLeftDocumentTextFlow.create(m_leftDocument, leftPages, PDFDocumentTextFlowFactory::Algorithm::Auto); | ||||
|         stepProgress(); | ||||
|     } | ||||
|  | ||||
|     // StepExtractTextRightDocument | ||||
|     if (!m_cancelled) | ||||
|     { | ||||
|         pdf::PDFDocumentTextFlowFactory factoryRightDocumentTextFlow; | ||||
|         factoryRightDocumentTextFlow.setCalculateBoundingBoxes(true); | ||||
|         PDFDocumentTextFlow rightTextFlow = factoryRightDocumentTextFlow.create(m_rightDocument, rightPages, PDFDocumentTextFlowFactory::Algorithm::Auto); | ||||
|         stepProgress(); | ||||
|     } | ||||
|  | ||||
|     // StepCompare | ||||
|     if (!m_cancelled) | ||||
|     { | ||||
|         stepProgress(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void PDFDiff::onComparationPerformed() | ||||
| { | ||||
|     m_cancelled = false; | ||||
| @@ -189,6 +273,19 @@ void PDFDiff::onComparationPerformed() | ||||
|     emit comparationFinished(); | ||||
| } | ||||
|  | ||||
| PDFReal PDFDiff::calculateEpsilonForPage(const PDFPage* page) const | ||||
| { | ||||
|     Q_ASSERT(page); | ||||
|  | ||||
|     QRectF mediaBox = page->getMediaBox(); | ||||
|  | ||||
|     PDFReal width = mediaBox.width(); | ||||
|     PDFReal height = mediaBox.height(); | ||||
|     PDFReal factor = qMax(width, height); | ||||
|  | ||||
|     return factor * m_epsilon; | ||||
| } | ||||
|  | ||||
| PDFDiffResult::PDFDiffResult() : | ||||
|     m_result(true) | ||||
| { | ||||
|   | ||||
| @@ -117,15 +117,24 @@ private: | ||||
|  | ||||
|     PDFDiffResult perform(); | ||||
|     void stepProgress(); | ||||
|     void performSteps(const std::vector<PDFInteger>& leftPages, | ||||
|                       const std::vector<PDFInteger>& rightPages); | ||||
|  | ||||
|     void onComparationPerformed(); | ||||
|  | ||||
|     /// Calculates real epsilon for a page. Epsilon is used in page | ||||
|     /// comparation process, where points closer that epsilon | ||||
|     /// are recognized as equal. | ||||
|     /// \param page Page | ||||
|     PDFReal calculateEpsilonForPage(const PDFPage* page) const; | ||||
|  | ||||
|     PDFProgress* m_progress; | ||||
|     const PDFDocument* m_leftDocument; | ||||
|     const PDFDocument* m_rightDocument; | ||||
|     PDFClosedIntervalSet m_pagesForLeftDocument; | ||||
|     PDFClosedIntervalSet m_pagesForRightDocument; | ||||
|     Options m_options; | ||||
|     PDFReal m_epsilon; | ||||
|     std::atomic_bool m_cancelled; | ||||
|     PDFDiffResult m_result; | ||||
|  | ||||
|   | ||||
| @@ -103,7 +103,7 @@ public: | ||||
|     void addInputInterface(IDrawWidgetInputInterface* inputInterface); | ||||
|  | ||||
| signals: | ||||
|     void pageRenderingErrorsChanged(PDFInteger pageIndex, int errorsCount); | ||||
|     void pageRenderingErrorsChanged(pdf::PDFInteger pageIndex, int errorsCount); | ||||
|  | ||||
| private: | ||||
|     void updateRendererImpl(); | ||||
|   | ||||
| @@ -20,6 +20,7 @@ | ||||
| #include "pdfcms.h" | ||||
|  | ||||
| #include <QPainter> | ||||
| #include <QCryptographicHash> | ||||
|  | ||||
| namespace pdf | ||||
| { | ||||
| @@ -831,4 +832,146 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError> | ||||
|     } | ||||
| } | ||||
|  | ||||
| PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(PDFReal epsilon) const | ||||
| { | ||||
|     GraphicPieceInfos infos; | ||||
|  | ||||
|     struct State | ||||
|     { | ||||
|         QMatrix matrix; | ||||
|     }; | ||||
|     std::stack<State> stateStack; | ||||
|     stateStack.emplace(); | ||||
|  | ||||
|     // Check, if epsilon is not too small | ||||
|     if (qFuzzyIsNull(epsilon)) | ||||
|     { | ||||
|         epsilon = 0.000001; | ||||
|     } | ||||
|     PDFReal factor = 1.0 / epsilon; | ||||
|  | ||||
|     // Process all instructions | ||||
|     for (const Instruction& instruction : m_instructions) | ||||
|     { | ||||
|         switch (instruction.type) | ||||
|         { | ||||
|             case InstructionType::DrawPath: | ||||
|             { | ||||
|                 const PathPaintData& data = m_paths[instruction.dataIndex]; | ||||
|  | ||||
|                 GraphicPieceInfo info; | ||||
|                 QByteArray serializedPath; | ||||
|  | ||||
|                 // Serialize data | ||||
|                 if (true) | ||||
|                 { | ||||
|                     QDataStream stream(&serializedPath, QIODevice::WriteOnly); | ||||
|  | ||||
|                     stream << data.isText; | ||||
|                     stream << data.pen; | ||||
|                     stream << data.brush; | ||||
|  | ||||
|                     // Translate map to page coordinates | ||||
|                     QPainterPath pagePath = stateStack.top().matrix.map(data.path); | ||||
|  | ||||
|                     info.type = data.isText ? GraphicPieceInfo::Type::Text : GraphicPieceInfo::Type::VectorGraphics; | ||||
|                     info.boundingRect = pagePath.controlPointRect(); | ||||
|  | ||||
|                     const int elementCount = pagePath.elementCount(); | ||||
|                     for (int i = 0; i < elementCount; ++i) | ||||
|                     { | ||||
|                         QPainterPath::Element element = pagePath.elementAt(i); | ||||
|  | ||||
|                         PDFReal roundedX = qRound(element.x * factor); | ||||
|                         PDFReal roundedY = qRound(element.y * factor); | ||||
|  | ||||
|                         stream << roundedX; | ||||
|                         stream << roundedY; | ||||
|                         stream << element.type; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512); | ||||
|                 Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64); | ||||
|  | ||||
|                 size_t size = qMin<size_t>(hash.length(), info.hash.size()); | ||||
|                 std::copy(hash.data(), hash.data() + size, info.hash.data()); | ||||
|  | ||||
|                 infos.emplace_back(std::move(info)); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::DrawImage: | ||||
|             { | ||||
|                 /*const ImageData& data = m_images[instruction.dataIndex]; | ||||
|                 const QImage& image = data.image; | ||||
|  | ||||
|                 painter->save(); | ||||
|  | ||||
|                 QMatrix imageTransform(1.0 / image.width(), 0, 0, 1.0 / image.height(), 0, 0); | ||||
|                 QMatrix worldMatrix = imageTransform * painter->worldMatrix(); | ||||
|  | ||||
|                 // Jakub Melka: Because Qt uses opposite axis direction than PDF, then we must transform the y-axis | ||||
|                 // to the opposite (so the image is then unchanged) | ||||
|                 worldMatrix.translate(0, image.height()); | ||||
|                 worldMatrix.scale(1, -1); | ||||
|  | ||||
|                 painter->setWorldMatrix(worldMatrix); | ||||
|                 painter->drawImage(0, 0, image); | ||||
|                 painter->restore();*/ | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::DrawMesh: | ||||
|             { | ||||
|                 /*const MeshPaintData& data = m_meshes[instruction.dataIndex]; | ||||
|  | ||||
|                 painter->save(); | ||||
|                 painter->setWorldMatrix(pagePointToDevicePointMatrix); | ||||
|                 data.mesh.paint(painter, data.alpha); | ||||
|                 painter->restore();*/ | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::Clip: | ||||
|             { | ||||
|                 // Do nothing, we are just collecting information | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::SaveGraphicState: | ||||
|             { | ||||
|                 stateStack.push(stateStack.top()); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::RestoreGraphicState: | ||||
|             { | ||||
|                 stateStack.pop(); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::SetWorldMatrix: | ||||
|             { | ||||
|                 stateStack.top().matrix = m_matrices[instruction.dataIndex]; | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case InstructionType::SetCompositionMode: | ||||
|             { | ||||
|                 // Do nothing, we are just collecting information | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             default: | ||||
|             { | ||||
|                 Q_ASSERT(false); | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return infos; | ||||
| } | ||||
|  | ||||
| }   // namespace pdf | ||||
|   | ||||
| @@ -234,6 +234,30 @@ public: | ||||
|     PDFSnapInfo* getSnapInfo() { return &m_snapInfo; } | ||||
|     const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; } | ||||
|  | ||||
|     struct GraphicPieceInfo | ||||
|     { | ||||
|         enum class Type | ||||
|         { | ||||
|             Unknown, | ||||
|             Text, | ||||
|             VectorGraphics, | ||||
|             Image | ||||
|         }; | ||||
|  | ||||
|         Type type = Type::Unknown; | ||||
|         QRectF boundingRect; | ||||
|         std::array<uint8_t, 64> hash = { }; | ||||
|     }; | ||||
|  | ||||
|     using GraphicPieceInfos = std::vector<GraphicPieceInfo>; | ||||
|  | ||||
|     /// Creates information about piece of graphic in this page, | ||||
|     /// for example, for comparation reasons. Parameter \p epsilon | ||||
|     /// is for numerical precision - values under epsilon are considered | ||||
|     /// as equal. | ||||
|     /// \param epsilon Epsilon | ||||
|     GraphicPieceInfos calculateGraphicPieceInfos(PDFReal epsilon) const; | ||||
|  | ||||
| private: | ||||
|     struct PathPaintData | ||||
|     { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user