mirror of https://github.com/JakubMelka/PDF4QT.git
DocDiff application: finish page matching algorithm
This commit is contained in:
parent
050ba869f3
commit
3e327f8201
|
@ -44,6 +44,7 @@ DESTDIR = $$OUT_PWD/..
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
sources/pdfaction.cpp \
|
sources/pdfaction.cpp \
|
||||||
sources/pdfadvancedtools.cpp \
|
sources/pdfadvancedtools.cpp \
|
||||||
|
sources/pdfalgorithmlcs.cpp \
|
||||||
sources/pdfannotation.cpp \
|
sources/pdfannotation.cpp \
|
||||||
sources/pdfblendfunction.cpp \
|
sources/pdfblendfunction.cpp \
|
||||||
sources/pdfccittfaxdecoder.cpp \
|
sources/pdfccittfaxdecoder.cpp \
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
// Copyright (C) 2021 Jakub Melka
|
||||||
|
//
|
||||||
|
// This file is part of PDF4QT.
|
||||||
|
//
|
||||||
|
// PDF4QT is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// with the written consent of the copyright owner, any later version.
|
||||||
|
//
|
||||||
|
// PDF4QT is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "pdfalgorithmlcs.h"
|
||||||
|
|
||||||
|
namespace pdf
|
||||||
|
{
|
||||||
|
|
||||||
|
void PDFAlgorithmLongestCommonSubsequenceBase::markSequence(Sequence& sequence,
|
||||||
|
const std::vector<size_t>& movedItemsLeft,
|
||||||
|
const std::vector<size_t>& movedItemsRight)
|
||||||
|
{
|
||||||
|
Sequence updatedSequence;
|
||||||
|
|
||||||
|
Q_ASSERT(std::is_sorted(movedItemsLeft.cbegin(), movedItemsLeft.cend()));
|
||||||
|
Q_ASSERT(std::is_sorted(movedItemsRight.cbegin(), movedItemsRight.cend()));
|
||||||
|
|
||||||
|
for (auto it = sequence.cbegin(); it != sequence.cend();)
|
||||||
|
{
|
||||||
|
if (it->isMatch())
|
||||||
|
{
|
||||||
|
updatedSequence.push_back(*it);
|
||||||
|
++it;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Sequence leftItems;
|
||||||
|
Sequence rightItems;
|
||||||
|
|
||||||
|
for (; it != sequence.cend() && !it->isMatch(); ++it)
|
||||||
|
{
|
||||||
|
const SequenceItem& currentItem = *it;
|
||||||
|
Q_ASSERT(currentItem.isLeft() || currentItem.isRight());
|
||||||
|
|
||||||
|
if (currentItem.isLeft())
|
||||||
|
{
|
||||||
|
if (std::binary_search(movedItemsLeft.cbegin(), movedItemsLeft.cend(), currentItem.index1))
|
||||||
|
{
|
||||||
|
SequenceItem item = *it;
|
||||||
|
item.markMovedLeft();
|
||||||
|
updatedSequence.push_back(item);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
leftItems.push_back(currentItem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentItem.isRight())
|
||||||
|
{
|
||||||
|
if (std::binary_search(movedItemsRight.cbegin(), movedItemsRight.cend(), currentItem.index2))
|
||||||
|
{
|
||||||
|
SequenceItem item = *it;
|
||||||
|
item.markMovedRight();
|
||||||
|
updatedSequence.push_back(item);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rightItems.push_back(currentItem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(leftItems.begin(), leftItems.end());
|
||||||
|
std::reverse(rightItems.begin(), rightItems.end());
|
||||||
|
|
||||||
|
bool isReplaced = !leftItems.empty() && !rightItems.empty();
|
||||||
|
|
||||||
|
while (!leftItems.empty() && !rightItems.empty())
|
||||||
|
{
|
||||||
|
SequenceItem item;
|
||||||
|
item.index1 = leftItems.back().index1;
|
||||||
|
item.index2 = rightItems.back().index2;
|
||||||
|
item.markReplaced();
|
||||||
|
updatedSequence.push_back(item);
|
||||||
|
|
||||||
|
leftItems.pop_back();
|
||||||
|
rightItems.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!leftItems.empty())
|
||||||
|
{
|
||||||
|
SequenceItem item = leftItems.back();
|
||||||
|
item.markRemoved();
|
||||||
|
|
||||||
|
if (isReplaced)
|
||||||
|
{
|
||||||
|
item.markReplaced();
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedSequence.push_back(item);
|
||||||
|
leftItems.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!rightItems.empty())
|
||||||
|
{
|
||||||
|
SequenceItem item = rightItems.back();
|
||||||
|
item.markAdded();
|
||||||
|
|
||||||
|
if (isReplaced)
|
||||||
|
{
|
||||||
|
item.markReplaced();
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedSequence.push_back(item);
|
||||||
|
rightItems.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence = qMove(updatedSequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace pdf
|
|
@ -26,18 +26,54 @@ namespace pdf
|
||||||
class PDFAlgorithmLongestCommonSubsequenceBase
|
class PDFAlgorithmLongestCommonSubsequenceBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
enum SequenceItemFlag
|
||||||
|
{
|
||||||
|
None = 0x0000,
|
||||||
|
MovedLeft = 0x0001, ///< Item has been moved from this position (is present in sequence no. 1)
|
||||||
|
MovedRight = 0x0002, ///< Item has been moved to this position (is present in a sequence no. 2)
|
||||||
|
Moved = 0x0004, ///< Index of item has been changed
|
||||||
|
Added = 0x0008, ///< Item has been added to a sequence no. 2
|
||||||
|
Removed = 0x0010, ///< Item has been removed from a sequence no. 1
|
||||||
|
Replaced = 0x0020, ///< Item has been replaced (or sequence of items has been replaced)
|
||||||
|
};
|
||||||
|
Q_DECLARE_FLAGS(SequenceItemFlags, SequenceItemFlag)
|
||||||
|
|
||||||
struct SequenceItem
|
struct SequenceItem
|
||||||
{
|
{
|
||||||
size_t index1 = std::numeric_limits<size_t>::max();
|
size_t index1 = std::numeric_limits<size_t>::max();
|
||||||
size_t index2 = std::numeric_limits<size_t>::max();
|
size_t index2 = std::numeric_limits<size_t>::max();
|
||||||
|
SequenceItemFlags flags = None;
|
||||||
|
|
||||||
bool isLeftValid() const { return index1 != std::numeric_limits<size_t>::max(); }
|
bool isLeftValid() const { return index1 != std::numeric_limits<size_t>::max(); }
|
||||||
bool isRightValid() const { return index2 != std::numeric_limits<size_t>::max(); }
|
bool isRightValid() const { return index2 != std::numeric_limits<size_t>::max(); }
|
||||||
bool isLeft() const { return isLeftValid() && !isRightValid(); }
|
bool isLeft() const { return isLeftValid() && !isRightValid(); }
|
||||||
bool isRight() const { return isRightValid() && !isLeftValid(); }
|
bool isRight() const { return isRightValid() && !isLeftValid(); }
|
||||||
bool isMatch() const { return isLeftValid() && isRightValid(); }
|
bool isMatch() const { return isLeftValid() && isRightValid(); }
|
||||||
|
bool isMovedLeft() const { return flags.testFlag(MovedLeft); }
|
||||||
|
bool isMovedRight() const { return flags.testFlag(MovedRight); }
|
||||||
|
bool isMoved() const { return flags.testFlag(Moved); }
|
||||||
|
bool isAdded() const { return flags.testFlag(Added); }
|
||||||
|
bool isRemoved() const { return flags.testFlag(Removed); }
|
||||||
|
bool isReplaced() const { return flags.testFlag(Replaced); }
|
||||||
|
|
||||||
|
void markMovedLeft() { flags.setFlag(MovedLeft); }
|
||||||
|
void markMovedRight() { flags.setFlag(MovedRight); }
|
||||||
|
void markMoved() { flags.setFlag(Moved); }
|
||||||
|
void markAdded() { flags.setFlag(Added); }
|
||||||
|
void markRemoved() { flags.setFlag(Removed); }
|
||||||
|
void markReplaced() { flags.setFlag(Replaced); }
|
||||||
};
|
};
|
||||||
using Sequence = std::vector<SequenceItem>;
|
using Sequence = std::vector<SequenceItem>;
|
||||||
|
|
||||||
|
/// Marks a sequence with set of flags representing added/removed/replaced/moved
|
||||||
|
/// items. Moved items sequences must be sorted.
|
||||||
|
/// \param sequence Sequence to be marked
|
||||||
|
/// \param movedItemsLeft Sorted sequence of left indices, which have been moved
|
||||||
|
/// \param movedItemsRight sorted sequence of right indices, which have been moved
|
||||||
|
static void markSequence(Sequence& sequence,
|
||||||
|
const std::vector<size_t>& movedItemsLeft,
|
||||||
|
const std::vector<size_t>& movedItemsRight);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Algorithm for computing longest common subsequence, on two sequences
|
/// Algorithm for computing longest common subsequence, on two sequences
|
||||||
|
|
|
@ -271,6 +271,9 @@ void PDFDiff::performPageMatching(const std::vector<PDFDiffPageContext>& leftPre
|
||||||
|
|
||||||
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftUnmatched.begin(), leftUnmatched.end(), matchLeftPage);
|
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Page, leftUnmatched.begin(), leftUnmatched.end(), matchLeftPage);
|
||||||
|
|
||||||
|
std::vector<size_t> leftPagesMoved;
|
||||||
|
std::vector<size_t> rightPagesMoved;
|
||||||
|
|
||||||
std::set<size_t> matchedRightPages;
|
std::set<size_t> matchedRightPages;
|
||||||
for (const auto& matchedPage : matchedPages)
|
for (const auto& matchedPage : matchedPages)
|
||||||
{
|
{
|
||||||
|
@ -282,6 +285,9 @@ void PDFDiff::performPageMatching(const std::vector<PDFDiffPageContext>& leftPre
|
||||||
const PDFDiffPageContext& leftPageContext = leftPreparedPages[matchedPage.first];
|
const PDFDiffPageContext& leftPageContext = leftPreparedPages[matchedPage.first];
|
||||||
const PDFDiffPageContext& rightPageContext = rightPreparedPages[rightContextIndex];
|
const PDFDiffPageContext& rightPageContext = rightPreparedPages[rightContextIndex];
|
||||||
|
|
||||||
|
leftPagesMoved.push_back(leftPageContext.pageIndex);
|
||||||
|
rightPagesMoved.push_back(rightPageContext.pageIndex);
|
||||||
|
|
||||||
pageMatches[leftPageContext.pageIndex] = rightPageContext.pageIndex;
|
pageMatches[leftPageContext.pageIndex] = rightPageContext.pageIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -292,6 +298,11 @@ void PDFDiff::performPageMatching(const std::vector<PDFDiffPageContext>& leftPre
|
||||||
algorithm.perform();
|
algorithm.perform();
|
||||||
pageSequence = algorithm.getSequence();
|
pageSequence = algorithm.getSequence();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::sort(leftPagesMoved.begin(), leftPagesMoved.end());
|
||||||
|
std::sort(rightPagesMoved.begin(), rightPagesMoved.end());
|
||||||
|
|
||||||
|
PDFAlgorithmLongestCommonSubsequenceBase::markSequence(pageSequence, leftPagesMoved, rightPagesMoved);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::vector<PDFInteger>& rightPages)
|
void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::vector<PDFInteger>& rightPages)
|
||||||
|
@ -356,7 +367,7 @@ void PDFDiff::performSteps(const std::vector<PDFInteger>& leftPages, const std::
|
||||||
PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
|
PDFRenderer renderer(m_rightDocument, &fontCache, cms.data(), &optionalContentActivity, features, pdf::PDFMeshQualitySettings());
|
||||||
renderer.compile(&compiledPage, context.pageIndex);
|
renderer.compile(&compiledPage, context.pageIndex);
|
||||||
|
|
||||||
const PDFPage* page = m_leftDocument->getCatalog()->getPage(context.pageIndex);
|
const PDFPage* page = m_rightDocument->getCatalog()->getPage(context.pageIndex);
|
||||||
PDFReal epsilon = calculateEpsilonForPage(page);
|
PDFReal epsilon = calculateEpsilonForPage(page);
|
||||||
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon);
|
context.graphicPieces = compiledPage.calculateGraphicPieceInfos(page->getMediaBox(), epsilon);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue