mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Merge remote-tracking branch 'remotes/origin/branch/pdfdiff-REBASED'
This commit is contained in:
@@ -44,11 +44,13 @@ DESTDIR = $$OUT_PWD/..
|
||||
SOURCES += \
|
||||
sources/pdfaction.cpp \
|
||||
sources/pdfadvancedtools.cpp \
|
||||
sources/pdfalgorithmlcs.cpp \
|
||||
sources/pdfannotation.cpp \
|
||||
sources/pdfblendfunction.cpp \
|
||||
sources/pdfccittfaxdecoder.cpp \
|
||||
sources/pdfcms.cpp \
|
||||
sources/pdfcompiler.cpp \
|
||||
sources/pdfdiff.cpp \
|
||||
sources/pdfdocumentbuilder.cpp \
|
||||
sources/pdfdocumentmanipulator.cpp \
|
||||
sources/pdfdocumenttextflow.cpp \
|
||||
@@ -110,11 +112,13 @@ SOURCES += \
|
||||
HEADERS += \
|
||||
sources/pdfaction.h \
|
||||
sources/pdfadvancedtools.h \
|
||||
sources/pdfalgorithmlcs.h \
|
||||
sources/pdfannotation.h \
|
||||
sources/pdfblendfunction.h \
|
||||
sources/pdfccittfaxdecoder.h \
|
||||
sources/pdfcms.h \
|
||||
sources/pdfcompiler.h \
|
||||
sources/pdfdiff.h \
|
||||
sources/pdfdocumentbuilder.h \
|
||||
sources/pdfdocumentdrawinterface.h \
|
||||
sources/pdfdocumentmanipulator.h \
|
||||
|
177
Pdf4QtLib/sources/pdfalgorithmlcs.cpp
Normal file
177
Pdf4QtLib/sources/pdfalgorithmlcs.cpp
Normal file
@@ -0,0 +1,177 @@
|
||||
// Copyright (C) 2021 Jakub Melka
|
||||
//
|
||||
// This file is part of PDF4QT.
|
||||
//
|
||||
// PDF4QT is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// with the written consent of the copyright owner, any later version.
|
||||
//
|
||||
// PDF4QT is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfalgorithmlcs.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
void PDFAlgorithmLongestCommonSubsequenceBase::markSequence(Sequence& sequence,
|
||||
const std::vector<size_t>& movedItemsLeft,
|
||||
const std::vector<size_t>& movedItemsRight)
|
||||
{
|
||||
Sequence updatedSequence;
|
||||
|
||||
Q_ASSERT(std::is_sorted(movedItemsLeft.cbegin(), movedItemsLeft.cend()));
|
||||
Q_ASSERT(std::is_sorted(movedItemsRight.cbegin(), movedItemsRight.cend()));
|
||||
|
||||
for (auto it = sequence.cbegin(); it != sequence.cend();)
|
||||
{
|
||||
if (it->isMatch())
|
||||
{
|
||||
updatedSequence.push_back(*it);
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
Sequence leftItems;
|
||||
Sequence rightItems;
|
||||
|
||||
for (; it != sequence.cend() && !it->isMatch(); ++it)
|
||||
{
|
||||
const SequenceItem& currentItem = *it;
|
||||
Q_ASSERT(currentItem.isLeft() || currentItem.isRight());
|
||||
|
||||
if (currentItem.isLeft())
|
||||
{
|
||||
if (std::binary_search(movedItemsLeft.cbegin(), movedItemsLeft.cend(), currentItem.index1))
|
||||
{
|
||||
SequenceItem item = *it;
|
||||
item.markMovedLeft();
|
||||
updatedSequence.push_back(item);
|
||||
}
|
||||
else
|
||||
{
|
||||
leftItems.push_back(currentItem);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentItem.isRight())
|
||||
{
|
||||
if (std::binary_search(movedItemsRight.cbegin(), movedItemsRight.cend(), currentItem.index2))
|
||||
{
|
||||
SequenceItem item = *it;
|
||||
item.markMovedRight();
|
||||
updatedSequence.push_back(item);
|
||||
}
|
||||
else
|
||||
{
|
||||
rightItems.push_back(currentItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::reverse(leftItems.begin(), leftItems.end());
|
||||
std::reverse(rightItems.begin(), rightItems.end());
|
||||
|
||||
bool isReplaced = !leftItems.empty() && !rightItems.empty();
|
||||
|
||||
while (!leftItems.empty() && !rightItems.empty())
|
||||
{
|
||||
SequenceItem item;
|
||||
item.index1 = leftItems.back().index1;
|
||||
item.index2 = rightItems.back().index2;
|
||||
item.markReplaced();
|
||||
updatedSequence.push_back(item);
|
||||
|
||||
leftItems.pop_back();
|
||||
rightItems.pop_back();
|
||||
}
|
||||
|
||||
while (!leftItems.empty())
|
||||
{
|
||||
SequenceItem item = leftItems.back();
|
||||
item.markRemoved();
|
||||
|
||||
if (isReplaced)
|
||||
{
|
||||
item.markReplaced();
|
||||
}
|
||||
|
||||
updatedSequence.push_back(item);
|
||||
leftItems.pop_back();
|
||||
}
|
||||
|
||||
while (!rightItems.empty())
|
||||
{
|
||||
SequenceItem item = rightItems.back();
|
||||
item.markAdded();
|
||||
|
||||
if (isReplaced)
|
||||
{
|
||||
item.markReplaced();
|
||||
}
|
||||
|
||||
updatedSequence.push_back(item);
|
||||
rightItems.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
for (SequenceItem& item : updatedSequence)
|
||||
{
|
||||
if (item.isMatch() && !item.isRemoved() && !item.isReplaced() && !item.isAdded() && item.index1 != item.index2)
|
||||
{
|
||||
item.markMoved();
|
||||
}
|
||||
}
|
||||
|
||||
sequence = qMove(updatedSequence);
|
||||
}
|
||||
|
||||
PDFAlgorithmLongestCommonSubsequenceBase::SequenceItemRanges PDFAlgorithmLongestCommonSubsequenceBase::getModifiedRanges(Sequence& sequence)
|
||||
{
|
||||
SequenceItemRanges result;
|
||||
|
||||
for (auto it = sequence.begin(); it != sequence.end();)
|
||||
{
|
||||
const SequenceItem& item = *it;
|
||||
if (!item.isModified())
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Jakub Melka: now, we have iterator pointing on item,
|
||||
// which has been modified. We will search for modification
|
||||
// range.
|
||||
|
||||
auto itEnd = it;
|
||||
while (itEnd != sequence.end() && itEnd->isModified())
|
||||
{
|
||||
++itEnd;
|
||||
}
|
||||
|
||||
result.emplace_back(it, itEnd);
|
||||
it = itEnd;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFAlgorithmLongestCommonSubsequenceBase::SequenceItemFlags PDFAlgorithmLongestCommonSubsequenceBase::collectFlags(const SequenceItemRange& range)
|
||||
{
|
||||
SequenceItemFlags flags = 0;
|
||||
|
||||
for (auto it = range.first; it != range.second; ++it)
|
||||
{
|
||||
flags |= it->flags;
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
260
Pdf4QtLib/sources/pdfalgorithmlcs.h
Normal file
260
Pdf4QtLib/sources/pdfalgorithmlcs.h
Normal file
@@ -0,0 +1,260 @@
|
||||
// Copyright (C) 2021 Jakub Melka
|
||||
//
|
||||
// This file is part of PDF4QT.
|
||||
//
|
||||
// PDF4QT is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// with the written consent of the copyright owner, any later version.
|
||||
//
|
||||
// PDF4QT is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFALGORITHMLCS_H
|
||||
#define PDFALGORITHMLCS_H
|
||||
|
||||
#include "pdfglobal.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
class PDFAlgorithmLongestCommonSubsequenceBase
|
||||
{
|
||||
public:
|
||||
|
||||
enum SequenceItemFlag
|
||||
{
|
||||
None = 0x0000,
|
||||
MovedLeft = 0x0001, ///< Item has been moved from this position (is present in a sequence no. 1)
|
||||
MovedRight = 0x0002, ///< Item has been moved to this position (is present in a sequence no. 2)
|
||||
Moved = 0x0004, ///< Index of item has been changed
|
||||
Added = 0x0008, ///< Item has been added to a sequence no. 2
|
||||
Removed = 0x0010, ///< Item has been removed from a sequence no. 1
|
||||
Replaced = 0x0020, ///< Item has been replaced (or sequence of items has been replaced)
|
||||
};
|
||||
Q_DECLARE_FLAGS(SequenceItemFlags, SequenceItemFlag)
|
||||
|
||||
struct SequenceItem
|
||||
{
|
||||
size_t index1 = std::numeric_limits<size_t>::max();
|
||||
size_t index2 = std::numeric_limits<size_t>::max();
|
||||
SequenceItemFlags flags = None;
|
||||
|
||||
bool isLeftValid() const { return index1 != std::numeric_limits<size_t>::max(); }
|
||||
bool isRightValid() const { return index2 != std::numeric_limits<size_t>::max(); }
|
||||
bool isLeft() const { return isLeftValid() && !isRightValid(); }
|
||||
bool isRight() const { return isRightValid() && !isLeftValid(); }
|
||||
bool isMatch() const { return isLeftValid() && isRightValid(); }
|
||||
bool isMovedLeft() const { return flags.testFlag(MovedLeft); }
|
||||
bool isMovedRight() const { return flags.testFlag(MovedRight); }
|
||||
bool isMoved() const { return flags.testFlag(Moved); }
|
||||
bool isAdded() const { return flags.testFlag(Added); }
|
||||
bool isRemoved() const { return flags.testFlag(Removed); }
|
||||
bool isReplaced() const { return flags.testFlag(Replaced); }
|
||||
bool isModified() const { return isAdded() || isRemoved() || isReplaced(); }
|
||||
|
||||
void markMovedLeft() { flags.setFlag(MovedLeft); }
|
||||
void markMovedRight() { flags.setFlag(MovedRight); }
|
||||
void markMoved() { flags.setFlag(Moved); }
|
||||
void markAdded() { flags.setFlag(Added); }
|
||||
void markRemoved() { flags.setFlag(Removed); }
|
||||
void markReplaced() { flags.setFlag(Replaced); }
|
||||
};
|
||||
|
||||
using Sequence = typename std::vector<SequenceItem>;
|
||||
using SequenceIterator = typename Sequence::iterator;
|
||||
using SequenceItemRange = typename std::pair<SequenceIterator, SequenceIterator>;
|
||||
using SequenceItemRanges = typename std::vector<SequenceItemRange>;
|
||||
|
||||
/// Marks a sequence with set of flags representing added/removed/replaced/moved
|
||||
/// items. Moved items sequences must be sorted.
|
||||
/// \param sequence Sequence to be marked
|
||||
/// \param movedItemsLeft Sorted sequence of left indices, which have been moved
|
||||
/// \param movedItemsRight sorted sequence of right indices, which have been moved
|
||||
static void markSequence(Sequence& sequence,
|
||||
const std::vector<size_t>& movedItemsLeft,
|
||||
const std::vector<size_t>& movedItemsRight);
|
||||
|
||||
/// Returns item ranges, which should be checked - for example,
|
||||
/// for text modification.
|
||||
/// \param sequence Sequence
|
||||
static SequenceItemRanges getModifiedRanges(Sequence& sequence);
|
||||
|
||||
/// Collect flags from given item range
|
||||
/// \param range Range
|
||||
static SequenceItemFlags collectFlags(const SequenceItemRange& range);
|
||||
};
|
||||
|
||||
/// Algorithm for computing longest common subsequence, on two sequences
|
||||
/// of objects, which are implementing operator "==" (equal operator).
|
||||
/// Constructor takes bidirectional iterators to the sequence. So, iterators
|
||||
/// are requred to be bidirectional.
|
||||
template<typename Iterator, typename Comparator>
|
||||
class PDFAlgorithmLongestCommonSubsequence : public PDFAlgorithmLongestCommonSubsequenceBase
|
||||
{
|
||||
public:
|
||||
PDFAlgorithmLongestCommonSubsequence(Iterator it1,
|
||||
Iterator it1End,
|
||||
Iterator it2,
|
||||
Iterator it2End,
|
||||
Comparator comparator);
|
||||
|
||||
|
||||
void perform();
|
||||
|
||||
const Sequence& getSequence() const { return m_sequence; }
|
||||
|
||||
private:
|
||||
Iterator m_it1;
|
||||
Iterator m_it1End;
|
||||
Iterator m_it2;
|
||||
Iterator m_it2End;
|
||||
|
||||
size_t m_size1;
|
||||
size_t m_size2;
|
||||
size_t m_matrixSize;
|
||||
|
||||
Comparator m_comparator;
|
||||
|
||||
std::vector<bool> m_backtrackData;
|
||||
Sequence m_sequence;
|
||||
};
|
||||
|
||||
template<typename Iterator, typename Comparator>
|
||||
PDFAlgorithmLongestCommonSubsequence<Iterator, Comparator>::PDFAlgorithmLongestCommonSubsequence(Iterator it1,
|
||||
Iterator it1End,
|
||||
Iterator it2,
|
||||
Iterator it2End,
|
||||
Comparator comparator) :
|
||||
m_it1(std::move(it1)),
|
||||
m_it1End(std::move(it1End)),
|
||||
m_it2(std::move(it2)),
|
||||
m_it2End(std::move(it2End)),
|
||||
m_size1(0),
|
||||
m_size2(0),
|
||||
m_matrixSize(0),
|
||||
m_comparator(std::move(comparator))
|
||||
{
|
||||
m_size1 = std::distance(m_it1, m_it1End) + 1;
|
||||
m_size2 = std::distance(m_it2, m_it2End) + 1;
|
||||
m_matrixSize = m_size1 * m_size2;
|
||||
}
|
||||
|
||||
template<typename Iterator, typename Comparator>
|
||||
void PDFAlgorithmLongestCommonSubsequence<Iterator, Comparator>::perform()
|
||||
{
|
||||
m_backtrackData.resize(m_matrixSize);
|
||||
m_sequence.clear();
|
||||
|
||||
std::vector<size_t> rowTop(m_size1, size_t());
|
||||
std::vector<size_t> rowBottom(m_size1, size_t());
|
||||
|
||||
// Jakub Melka: we will have columns consisting of it1...it1End
|
||||
// and rows consisting of it2...it2End. We iterate trough rows,
|
||||
// and for each row, we update longest common subsequence data.
|
||||
|
||||
auto it2 = m_it2;
|
||||
for (size_t i2 = 1; i2 < m_size2; ++i2, ++it2)
|
||||
{
|
||||
auto it1 = m_it1;
|
||||
for (size_t i1 = 1; i1 < m_size1; ++i1, ++it1)
|
||||
{
|
||||
if (m_comparator(*it1, *it2))
|
||||
{
|
||||
// We have match
|
||||
rowBottom[i1] = rowTop[i1 - 1] + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t leftCellValue = rowBottom[i1 - 1];
|
||||
const size_t upperCellValue = rowTop[i1];
|
||||
bool isLeftBigger = leftCellValue > upperCellValue;
|
||||
|
||||
if (isLeftBigger)
|
||||
{
|
||||
rowBottom[i1] = leftCellValue;
|
||||
m_backtrackData[i2 * m_size1 + i1] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
rowBottom[i1] = upperCellValue;
|
||||
m_backtrackData[i2 * m_size1 + i1] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bottom row will become top row
|
||||
std::swap(rowTop, rowBottom);
|
||||
}
|
||||
|
||||
size_t i1 = m_size1 - 1;
|
||||
size_t i2 = m_size2 - 1;
|
||||
|
||||
while (i1 > 0 && i2 > 0)
|
||||
{
|
||||
SequenceItem item;
|
||||
|
||||
const size_t index1 = i1 - 1;
|
||||
const size_t index2 = i2 - 1;
|
||||
|
||||
auto it1 = std::next(m_it1, index1);
|
||||
auto it2 = std::next(m_it2, index2);
|
||||
|
||||
if (m_comparator(*it1, *it2))
|
||||
{
|
||||
item.index1 = index1;
|
||||
item.index2 = index2;
|
||||
|
||||
--i1;
|
||||
--i2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_backtrackData[i2 * m_size1 + i1])
|
||||
{
|
||||
item.index1 = index1;
|
||||
--i1;
|
||||
}
|
||||
else
|
||||
{
|
||||
item.index2 = index2;
|
||||
--i2;
|
||||
}
|
||||
}
|
||||
|
||||
m_sequence.push_back(item);
|
||||
}
|
||||
|
||||
while (i1 > 0)
|
||||
{
|
||||
SequenceItem item;
|
||||
|
||||
const size_t index1 = i1 - 1;
|
||||
item.index1 = index1;
|
||||
--i1;
|
||||
|
||||
m_sequence.push_back(item);
|
||||
}
|
||||
|
||||
while (i2 > 0)
|
||||
{
|
||||
SequenceItem item;
|
||||
|
||||
const size_t index2 = i2 - 1;
|
||||
item.index2 = index2;
|
||||
--i2;
|
||||
|
||||
m_sequence.push_back(item);
|
||||
}
|
||||
|
||||
std::reverse(m_sequence.begin(), m_sequence.end());
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFALGORITHMLCS_H
|
@@ -39,12 +39,13 @@ class PDFDocument;
|
||||
/// to be used in viewer application.
|
||||
enum class PageLayout
|
||||
{
|
||||
SinglePage, ///< Display one page at time (single page on screen)
|
||||
OneColumn, ///< Displays pages in one column (continuous mode)
|
||||
TwoColumnLeft, ///< Display pages in two continuous columns, odd numbered pages are on the left
|
||||
TwoColumnRight, ///< Display pages in two continuous columns, even numbered pages are on the left
|
||||
TwoPagesLeft, ///< Display two pages on the screen, odd numbered pages are on the left
|
||||
TwoPagesRight ///< Display two pages on the screen, even numbered pages are on the left
|
||||
SinglePage, ///< Display one page at time (single page on screen)
|
||||
OneColumn, ///< Displays pages in one column (continuous mode)
|
||||
TwoColumnLeft, ///< Display pages in two continuous columns, odd numbered pages are on the left
|
||||
TwoColumnRight, ///< Display pages in two continuous columns, even numbered pages are on the left
|
||||
TwoPagesLeft, ///< Display two pages on the screen, odd numbered pages are on the left
|
||||
TwoPagesRight, ///< Display two pages on the screen, even numbered pages are on the left
|
||||
Custom ///< Custom layout, multiple columns can be used, -1 as page index means page is omitted
|
||||
};
|
||||
|
||||
/// Specifies, how the document should be displayed in the viewer application.
|
||||
|
1869
Pdf4QtLib/sources/pdfdiff.cpp
Normal file
1869
Pdf4QtLib/sources/pdfdiff.cpp
Normal file
File diff suppressed because it is too large
Load Diff
408
Pdf4QtLib/sources/pdfdiff.h
Normal file
408
Pdf4QtLib/sources/pdfdiff.h
Normal file
@@ -0,0 +1,408 @@
|
||||
// Copyright (C) 2021 Jakub Melka
|
||||
//
|
||||
// This file is part of PDF4QT.
|
||||
//
|
||||
// PDF4QT is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// with the written consent of the copyright owner, any later version.
|
||||
//
|
||||
// PDF4QT is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFDIFF_H
|
||||
#define PDFDIFF_H
|
||||
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfprogress.h"
|
||||
#include "pdfutils.h"
|
||||
#include "pdfalgorithmlcs.h"
|
||||
#include "pdfdocumenttextflow.h"
|
||||
|
||||
#include <QObject>
|
||||
#include <QFuture>
|
||||
#include <QFutureWatcher>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
class QIODevice;
|
||||
class QXmlStreamWriter;
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
struct PDFDiffPageContext;
|
||||
|
||||
class PDF4QTLIBSHARED_EXPORT PDFDiffResult
|
||||
{
|
||||
public:
|
||||
explicit PDFDiffResult();
|
||||
|
||||
enum class Type : uint32_t
|
||||
{
|
||||
Invalid = 0x0000,
|
||||
PageMoved = 0x0001,
|
||||
PageAdded = 0x0002,
|
||||
PageRemoved = 0x0004,
|
||||
RemovedTextCharContent = 0x0008,
|
||||
RemovedVectorGraphicContent = 0x0010,
|
||||
RemovedImageContent = 0x0020,
|
||||
RemovedShadingContent = 0x0040,
|
||||
AddedTextCharContent = 0x0080,
|
||||
AddedVectorGraphicContent = 0x0100,
|
||||
AddedImageContent = 0x0200,
|
||||
AddedShadingContent = 0x0400,
|
||||
TextReplaced = 0x0800,
|
||||
TextAdded = 0x1000,
|
||||
TextRemoved = 0x2000,
|
||||
};
|
||||
|
||||
struct PageSequenceItem
|
||||
{
|
||||
PDFInteger leftPage = -1;
|
||||
PDFInteger rightPage = -1;
|
||||
};
|
||||
|
||||
using PageSequence = std::vector<PageSequenceItem>;
|
||||
|
||||
using RectInfos = std::vector<std::pair<PDFInteger, QRectF>>;
|
||||
using RectInfosIt = typename RectInfos::const_iterator;
|
||||
|
||||
void setResult(PDFOperationResult result) { m_result = std::move(result); }
|
||||
const PDFOperationResult& getResult() const { return m_result; }
|
||||
|
||||
/// Returns true, if some difference was found
|
||||
bool isChanged() const { return getDifferencesCount() > 0; }
|
||||
|
||||
/// Returns true, if no difference was found
|
||||
bool isSame() const { return !isChanged(); }
|
||||
|
||||
/// Returns number of detected changes
|
||||
size_t getDifferencesCount() const { return m_differences.size(); }
|
||||
|
||||
/// Returns message describing difference in a page content
|
||||
/// \param index Index
|
||||
QString getMessage(size_t index) const;
|
||||
|
||||
/// Returns index of left page (or -1, if difference occured
|
||||
/// only on a right page)
|
||||
/// \param index Index
|
||||
PDFInteger getLeftPage(size_t index) const;
|
||||
|
||||
/// Returns index of right page (or -1, if difference occured
|
||||
/// only on a left page)
|
||||
/// \param index Index
|
||||
PDFInteger getRightPage(size_t index) const;
|
||||
|
||||
/// Return type of difference
|
||||
/// \param index Index
|
||||
Type getType(size_t index) const;
|
||||
|
||||
/// Returns text description of type
|
||||
/// \param index Index
|
||||
QString getTypeDescription(size_t index) const;
|
||||
|
||||
/// Returns iterator range for rectangles of "left" pages of an item
|
||||
std::pair<RectInfosIt, RectInfosIt> getLeftRectangles(size_t index) const;
|
||||
|
||||
/// Returns iterator range for rectangles of "right" pages of an item
|
||||
std::pair<RectInfosIt, RectInfosIt> getRightRectangles(size_t index) const;
|
||||
|
||||
bool isPageMoveAddRemoveDifference(size_t index) const;
|
||||
bool isPageMoveDifference(size_t index) const;
|
||||
bool isAddDifference(size_t index) const;
|
||||
bool isRemoveDifference(size_t index) const;
|
||||
bool isReplaceDifference(size_t index) const;
|
||||
|
||||
bool hasPageMoveDifferences() const { return m_typeFlags & FLAGS_PAGE_MOVE; }
|
||||
bool hasTextDifferences() const { return m_typeFlags & FLAGS_TEXT; }
|
||||
bool hasVectorGraphicsDifferences() const { return m_typeFlags & FLAGS_VECTOR_GRAPHICS; }
|
||||
bool hasImageDifferences() const { return m_typeFlags & FLAGS_IMAGE; }
|
||||
bool hasShadingDifferences() const { return m_typeFlags & FLAGS_SHADING; }
|
||||
|
||||
/// Returns sorted changed page indices from left document
|
||||
std::vector<PDFInteger> getChangedLeftPageIndices() const;
|
||||
|
||||
/// Returns sorted changed page indices from right document
|
||||
std::vector<PDFInteger> getChangedRightPageIndices() const;
|
||||
|
||||
/// Filters results using given critera
|
||||
/// \param filterPageMoveDifferences Filter page move differences?
|
||||
/// \param filterTextDifferences Filter text diffferences?
|
||||
/// \param filterVectorGraphicsDifferences Filter vector graphics differences?
|
||||
/// \param filterImageDifferences Filter image differences?
|
||||
/// \param filterShadingDifferences Filter shading differences?
|
||||
PDFDiffResult filter(bool filterPageMoveDifferences,
|
||||
bool filterTextDifferences,
|
||||
bool filterVectorGraphicsDifferences,
|
||||
bool filterImageDifferences,
|
||||
bool filterShadingDifferences);
|
||||
|
||||
const PageSequence& getPageSequence() const;
|
||||
void setPageSequence(PageSequence pageSequence);
|
||||
|
||||
/// Saves all differences to a XML stream
|
||||
/// represented by device
|
||||
/// \param device Output device
|
||||
void saveToXML(QIODevice* device) const;
|
||||
|
||||
/// Saves all differences to a byte array
|
||||
/// \param byteArray Output byte array
|
||||
void saveToXML(QByteArray* byteArray) const;
|
||||
|
||||
/// Saves all differences to a string
|
||||
/// \param string Output string
|
||||
void saveToXML(QString* string) const;
|
||||
|
||||
private:
|
||||
friend class PDFDiff;
|
||||
|
||||
static constexpr uint32_t FLAGS_PAGE_MOVE = uint32_t(Type::PageMoved) | uint32_t(Type::PageAdded) | uint32_t(Type::PageRemoved);
|
||||
static constexpr uint32_t FLAGS_TEXT = uint32_t(Type::RemovedTextCharContent) | uint32_t(Type::AddedTextCharContent) | uint32_t(Type::TextReplaced) | uint32_t(Type::TextAdded) | uint32_t(Type::TextRemoved);
|
||||
static constexpr uint32_t FLAGS_VECTOR_GRAPHICS = uint32_t(Type::RemovedVectorGraphicContent) | uint32_t(Type::AddedVectorGraphicContent);
|
||||
static constexpr uint32_t FLAGS_IMAGE = uint32_t(Type::RemovedImageContent) | uint32_t(Type::AddedImageContent);
|
||||
static constexpr uint32_t FLAGS_SHADING = uint32_t(Type::RemovedShadingContent) | uint32_t(Type::AddedShadingContent);
|
||||
|
||||
static constexpr uint32_t FLAGS_TYPE_PAGE_MOVE = uint32_t(Type::PageMoved);
|
||||
static constexpr uint32_t FLAGS_TYPE_PAGE_MOVE_ADD_REMOVE = uint32_t(Type::PageMoved) | uint32_t(Type::PageAdded) | uint32_t(Type::PageRemoved);
|
||||
static constexpr uint32_t FLAGS_TYPE_ADD = uint32_t(Type::PageAdded) | uint32_t(Type::AddedTextCharContent) | uint32_t(Type::AddedVectorGraphicContent) | uint32_t(Type::AddedImageContent) | uint32_t(Type::AddedShadingContent) | uint32_t(Type::TextAdded);
|
||||
static constexpr uint32_t FLAGS_TYPE_REMOVE = uint32_t(Type::PageRemoved) | uint32_t(Type::RemovedTextCharContent) | uint32_t(Type::RemovedVectorGraphicContent) | uint32_t(Type::RemovedImageContent) | uint32_t(Type::RemovedShadingContent) | uint32_t(Type::TextRemoved);
|
||||
static constexpr uint32_t FLAGS_TYPE_REPLACE = uint32_t(Type::TextReplaced);
|
||||
|
||||
void addPageMoved(PDFInteger pageIndex1, PDFInteger pageIndex2);
|
||||
void addPageAdded(PDFInteger pageIndex);
|
||||
void addPageRemoved(PDFInteger pageIndex);
|
||||
|
||||
void addRemovedTextCharContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addRemovedVectorGraphicContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addRemovedImageContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addRemovedShadingContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addAddedTextCharContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addAddedVectorGraphicContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addAddedImageContent(PDFInteger pageIndex, QRectF rect);
|
||||
void addAddedShadingContent(PDFInteger pageIndex, QRectF rect);
|
||||
|
||||
void addTextAdded(PDFInteger pageIndex, QString text, const RectInfos& rectInfos);
|
||||
void addTextRemoved(PDFInteger pageIndex, QString text, const RectInfos& rectInfos);
|
||||
|
||||
void addTextReplaced(PDFInteger pageIndex1,
|
||||
PDFInteger pageIndex2,
|
||||
QString textRemoved,
|
||||
QString textAdded,
|
||||
const RectInfos& rectInfos1,
|
||||
const RectInfos& rectInfos2);
|
||||
|
||||
void saveToStream(QXmlStreamWriter* stream) const;
|
||||
|
||||
void finalize();
|
||||
|
||||
uint32_t getTypeFlags(size_t index) const;
|
||||
|
||||
/// Single content difference descriptor. It describes type
|
||||
/// of difference (such as graphics, image, text change) on a page
|
||||
/// or on a list of multiple pages.
|
||||
struct Difference
|
||||
{
|
||||
Type type = Type::Invalid;
|
||||
PDFInteger pageIndex1 = -1;
|
||||
PDFInteger pageIndex2 = -1;
|
||||
size_t leftRectIndex = 0;
|
||||
size_t leftRectCount = 0;
|
||||
size_t rightRectIndex = 0;
|
||||
size_t rightRectCount = 0;
|
||||
int textAddedIndex = -1;
|
||||
int textRemovedIndex = -1;
|
||||
};
|
||||
|
||||
using Differences = std::vector<Difference>;
|
||||
|
||||
void addLeftItem(Type type, PDFInteger pageIndex, QRectF rect);
|
||||
void addRightItem(Type type, PDFInteger pageIndex, QRectF rect);
|
||||
|
||||
void addRectLeft(Difference& difference, QRectF rect);
|
||||
void addRectRight(Difference& difference, QRectF rect);
|
||||
|
||||
Differences m_differences;
|
||||
RectInfos m_rects; ///< Rectangles with page indices
|
||||
PDFOperationResult m_result;
|
||||
QStringList m_strings;
|
||||
uint32_t m_typeFlags = 0;
|
||||
PageSequence m_pageSequence;
|
||||
};
|
||||
|
||||
/// Class for result navigation, can go to next, or previous result.
|
||||
class PDF4QTLIBSHARED_EXPORT PDFDiffResultNavigator : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit PDFDiffResultNavigator(QObject* parent);
|
||||
virtual ~PDFDiffResultNavigator() override;
|
||||
|
||||
void setResult(const PDFDiffResult* diffResult);
|
||||
|
||||
/// Returns true, if valid result is selected
|
||||
bool isSelected() const;
|
||||
|
||||
/// Returns true if action go to next result can be performed,
|
||||
/// otherwise false is returned.
|
||||
bool canGoNext() const;
|
||||
|
||||
/// Returns true if action go to previous result can be performed,
|
||||
/// otherwise false is returned.
|
||||
bool canGoPrevious() const;
|
||||
|
||||
/// Goes to next result. If action cannot be performed,
|
||||
/// nothing happens and signal is not emitted.
|
||||
void goNext();
|
||||
|
||||
/// Goes to previous result. If action cannot be performed,
|
||||
/// nothing happens and signal is not emitted.
|
||||
void goPrevious();
|
||||
|
||||
/// Updates selection, if difference result was changed
|
||||
void update();
|
||||
|
||||
/// Selects current index
|
||||
/// \param currentIndex
|
||||
void select(size_t currentIndex);
|
||||
|
||||
signals:
|
||||
void selectionChanged(size_t currentIndex);
|
||||
|
||||
private:
|
||||
size_t getLimit() const { return m_diffResult ? m_diffResult->getDifferencesCount() : 0; }
|
||||
|
||||
const PDFDiffResult* m_diffResult;
|
||||
size_t m_currentIndex;
|
||||
};
|
||||
|
||||
/// Diff engine for comparing two pdf documents.
|
||||
class PDF4QTLIBSHARED_EXPORT PDFDiff : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
private:
|
||||
using BaseClass = QObject;
|
||||
|
||||
public:
|
||||
explicit PDFDiff(QObject* parent);
|
||||
virtual ~PDFDiff() override;
|
||||
|
||||
enum Option
|
||||
{
|
||||
None = 0x0000,
|
||||
Asynchronous = 0x0001, ///< Compare document asynchronously
|
||||
PC_Text = 0x0002, ///< Use text to compare pages (determine, which pages correspond to each other)
|
||||
PC_VectorGraphics = 0x0004, ///< Use vector graphics to compare pages (determine, which pages correspond to each other)
|
||||
PC_Images = 0x0008, ///< Use images to compare pages (determine, which pages correspond to each other)
|
||||
PC_Mesh = 0x0010, ///< Use mesh to compare pages (determine, which pages correspond to each other)
|
||||
CompareTextsAsVector = 0x0020, ///< Compare texts as vector graphics
|
||||
CompareWords = 0x0040, ///< Compare words, not just characters
|
||||
};
|
||||
Q_DECLARE_FLAGS(Options, Option)
|
||||
|
||||
/// Source document (left)
|
||||
/// \param leftDocument Document
|
||||
void setLeftDocument(const PDFDocument* leftDocument);
|
||||
|
||||
/// Source document (right)(
|
||||
/// \param rightDocument Document
|
||||
void setRightDocument(const PDFDocument* rightDocument);
|
||||
|
||||
/// Source pages to be compared (left document)
|
||||
/// \param pagesForLeftDocument Page indices
|
||||
void setPagesForLeftDocument(PDFClosedIntervalSet pagesForLeftDocument);
|
||||
|
||||
/// Source pages to be compared (right document)
|
||||
/// \param pagesForRightDocument Page indices
|
||||
void setPagesForRightDocument(PDFClosedIntervalSet pagesForRightDocument);
|
||||
|
||||
/// Sets progress object
|
||||
/// \param progress Progress object
|
||||
void setProgress(PDFProgress* progress) { m_progress = progress; }
|
||||
|
||||
/// Enables or disables comparator engine option
|
||||
/// \param option Option
|
||||
/// \param enable Enable or disable option?
|
||||
void setOption(Option option, bool enable) { m_options.setFlag(option, enable); }
|
||||
|
||||
/// Starts comparator engine. If asynchronous engine option
|
||||
/// is enabled, then separate thread is started, in which two
|
||||
/// document is compared, and then signal \p comparationFinished,
|
||||
/// otherwise this function is blocking until comparation process
|
||||
/// is finished.
|
||||
void start();
|
||||
|
||||
/// Stops comparator engine. Result data are cleared.
|
||||
void stop();
|
||||
|
||||
/// Returns result of a comparation process
|
||||
const PDFDiffResult& getResult() const { return m_result; }
|
||||
|
||||
PDFDocumentTextFlowFactory::Algorithm getTextAnalysisAlgorithm() const;
|
||||
void setTextAnalysisAlgorithm(PDFDocumentTextFlowFactory::Algorithm textAnalysisAlgorithm);
|
||||
|
||||
signals:
|
||||
void comparationFinished();
|
||||
|
||||
private:
|
||||
|
||||
enum Steps
|
||||
{
|
||||
StepExtractContentLeftDocument,
|
||||
StepExtractContentRightDocument,
|
||||
StepMatchPages,
|
||||
StepExtractTextLeftDocument,
|
||||
StepExtractTextRightDocument,
|
||||
StepCompare,
|
||||
StepLast
|
||||
};
|
||||
|
||||
PDFDiffResult perform();
|
||||
void stepProgress();
|
||||
void performSteps(const std::vector<PDFInteger>& leftPages,
|
||||
const std::vector<PDFInteger>& rightPages,
|
||||
PDFDiffResult& result);
|
||||
void performPageMatching(const std::vector<PDFDiffPageContext>& leftPreparedPages,
|
||||
const std::vector<PDFDiffPageContext>& rightPreparedPages,
|
||||
PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence,
|
||||
std::map<size_t, size_t>& pageMatches);
|
||||
void performCompare(const std::vector<PDFDiffPageContext>& leftPreparedPages,
|
||||
const std::vector<PDFDiffPageContext>& rightPreparedPages,
|
||||
PDFAlgorithmLongestCommonSubsequenceBase::Sequence& pageSequence,
|
||||
const std::map<size_t, size_t>& pageMatches,
|
||||
PDFDiffResult& result);
|
||||
void finalizeGraphicsPieces(PDFDiffPageContext& context);
|
||||
|
||||
void onComparationPerformed();
|
||||
|
||||
/// Calculates real epsilon for a page. Epsilon is used in page
|
||||
/// comparation process, where points closer that epsilon
|
||||
/// are recognized as equal.
|
||||
/// \param page Page
|
||||
PDFReal calculateEpsilonForPage(const PDFPage* page) const;
|
||||
|
||||
PDFProgress* m_progress;
|
||||
const PDFDocument* m_leftDocument;
|
||||
const PDFDocument* m_rightDocument;
|
||||
PDFClosedIntervalSet m_pagesForLeftDocument;
|
||||
PDFClosedIntervalSet m_pagesForRightDocument;
|
||||
Options m_options;
|
||||
PDFReal m_epsilon;
|
||||
std::atomic_bool m_cancelled;
|
||||
PDFDiffResult m_result;
|
||||
PDFDocumentTextFlowFactory::Algorithm m_textAnalysisAlgorithm;
|
||||
|
||||
QFuture<PDFDiffResult> m_future;
|
||||
std::optional<QFutureWatcher<PDFDiffResult>> m_futureWatcher;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFDIFF_H
|
@@ -99,6 +99,31 @@ PDFOperationResult PDFDocumentManipulator::assemble(const AssembledPages& pages)
|
||||
return true;
|
||||
}
|
||||
|
||||
PDFDocumentManipulator::AssembledPages PDFDocumentManipulator::createAllDocumentPages(int documentIndex, const PDFDocument* document)
|
||||
{
|
||||
AssembledPages assembledPages;
|
||||
size_t pageCount = document->getCatalog()->getPageCount();
|
||||
|
||||
for (size_t i = 0; i < pageCount; ++i)
|
||||
{
|
||||
pdf::PDFDocumentManipulator::AssembledPage assembledPage;
|
||||
|
||||
assembledPage.documentIndex = documentIndex;
|
||||
assembledPage.imageIndex = -1;
|
||||
assembledPage.pageIndex = i;
|
||||
|
||||
const pdf::PDFPage* page = document->getCatalog()->getPage(i);
|
||||
const pdf::PageRotation originalPageRotation = page->getPageRotation();
|
||||
|
||||
assembledPage.pageRotation = originalPageRotation;
|
||||
assembledPage.pageSize = page->getMediaBox().size();
|
||||
|
||||
assembledPages.emplace_back(assembledPage);
|
||||
}
|
||||
|
||||
return assembledPages;
|
||||
}
|
||||
|
||||
PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::processPages(PDFDocumentBuilder& documentBuilder, const AssembledPages& pages)
|
||||
{
|
||||
ProcessedPages processedPages;
|
||||
|
@@ -91,6 +91,8 @@ public:
|
||||
/// \returns Assembled document
|
||||
PDFDocument&& takeAssembledDocument() { return std::move(m_assembledDocument); }
|
||||
|
||||
static AssembledPages createAllDocumentPages(int documentIndex, const PDFDocument* document);
|
||||
|
||||
static constexpr AssembledPage createDocumentPage(int documentIndex, int pageIndex, QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ documentIndex, -1, pageIndex, pageSize, pageRotation}; }
|
||||
static constexpr AssembledPage createImagePage(int imageIndex, QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, imageIndex, -1, pageSize, pageRotation}; }
|
||||
static constexpr AssembledPage createBlankPage(QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, -1, -1, pageSize, pageRotation}; }
|
||||
|
@@ -89,21 +89,22 @@ struct PDFStructureTreeTextItem
|
||||
};
|
||||
|
||||
PDFStructureTreeTextItem() = default;
|
||||
PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect) :
|
||||
type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect)
|
||||
PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect, std::vector<QRectF> characterBoundingRects) :
|
||||
type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect), characterBoundingRects(std::move(characterBoundingRects))
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect); }
|
||||
static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF()); }
|
||||
static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF()); }
|
||||
static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect, std::vector<QRectF> characterBoundingRects) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect, std::move(characterBoundingRects)); }
|
||||
static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF(), { }); }
|
||||
static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF(), { }); }
|
||||
|
||||
Type type = Type::Text;
|
||||
const PDFStructureItem* item = nullptr;
|
||||
QString text;
|
||||
PDFInteger pageIndex = -1;
|
||||
QRectF boundingRect;
|
||||
std::vector<QRectF> characterBoundingRects;
|
||||
};
|
||||
|
||||
using PDFStructureTreeTextSequence = std::vector<PDFStructureTreeTextItem>;
|
||||
@@ -147,6 +148,7 @@ public:
|
||||
QRectF boundingRect;
|
||||
PDFInteger pageIndex = -1;
|
||||
QString text;
|
||||
std::vector<QRectF> characterBoundingRects;
|
||||
};
|
||||
|
||||
using TextItems = std::vector<TextItem>;
|
||||
@@ -204,7 +206,6 @@ protected:
|
||||
virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override;
|
||||
virtual void performMarkedContentBegin(const QByteArray& tag, const PDFObject& properties) override;
|
||||
virtual void performMarkedContentEnd() override;
|
||||
virtual void performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule) override;
|
||||
|
||||
private:
|
||||
const PDFStructureItem* getStructureTreeItemFromMCID(PDFInteger mcid) const;
|
||||
@@ -232,33 +233,25 @@ private:
|
||||
QStringList m_unmatchedText;
|
||||
PDFStructureTreeTextExtractor::Options m_extractorOptions;
|
||||
PDFInteger m_pageIndex;
|
||||
std::vector<QRectF> m_characterBoundingRects;
|
||||
};
|
||||
|
||||
void PDFStructureTreeTextContentProcessor::performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule)
|
||||
{
|
||||
if (!text)
|
||||
{
|
||||
// Jakub Melka: This should not occur
|
||||
return;
|
||||
}
|
||||
|
||||
if (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Q_UNUSED(stroke);
|
||||
Q_UNUSED(fill);
|
||||
Q_UNUSED(fillRule);
|
||||
|
||||
QMatrix matrix = getCurrentWorldMatrix();
|
||||
QPainterPath worldPath = matrix.map(path);
|
||||
m_currentBoundingBox = m_currentBoundingBox.united(worldPath.controlPointRect());
|
||||
}
|
||||
|
||||
void PDFStructureTreeTextContentProcessor::finishText()
|
||||
{
|
||||
m_currentText = m_currentText.trimmed();
|
||||
QString trimmedText = m_currentText.trimmed();
|
||||
const int index = m_currentText.indexOf(trimmedText);
|
||||
Q_ASSERT(index != -1);
|
||||
if (trimmedText.size() < m_currentText.size())
|
||||
{
|
||||
// Fix character bounding boxes...
|
||||
if (m_characterBoundingRects.size() == m_currentText.size())
|
||||
{
|
||||
std::vector<QRectF> boundingRects(std::next(m_characterBoundingRects.cbegin(), index), std::next(m_characterBoundingRects.cbegin(), index + trimmedText.length()));
|
||||
m_characterBoundingRects = std::move(boundingRects);
|
||||
}
|
||||
m_currentText = std::move(trimmedText);
|
||||
}
|
||||
|
||||
if (!m_currentText.isEmpty() && (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::SkipArtifact) || !isArtifact()))
|
||||
{
|
||||
if (m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::AdjustReversedText) && isReversedText())
|
||||
@@ -270,11 +263,14 @@ void PDFStructureTreeTextContentProcessor::finishText()
|
||||
reversed.push_back(*it);
|
||||
}
|
||||
m_currentText = qMove(reversed);
|
||||
std::reverse(m_characterBoundingRects.begin(), m_characterBoundingRects.end());
|
||||
}
|
||||
m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText), m_pageIndex, m_currentBoundingBox));
|
||||
Q_ASSERT(m_currentText.size() == m_characterBoundingRects.size() || m_characterBoundingRects.empty());
|
||||
m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(std::move(m_currentText), m_pageIndex, m_currentBoundingBox, std::move(m_characterBoundingRects)));
|
||||
}
|
||||
m_currentText = QString();
|
||||
m_currentBoundingBox = QRectF();
|
||||
m_characterBoundingRects.clear();
|
||||
}
|
||||
|
||||
bool PDFStructureTreeTextContentProcessor::isArtifact() const
|
||||
@@ -346,6 +342,7 @@ void PDFStructureTreeTextContentProcessor::performMarkedContentEnd()
|
||||
m_unmatchedText << qMove(m_currentText);
|
||||
}
|
||||
m_currentBoundingBox = QRectF();
|
||||
m_characterBoundingRects.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -374,8 +371,6 @@ bool PDFStructureTreeTextContentProcessor::isContentKindSuppressed(ContentKind k
|
||||
switch (kind)
|
||||
{
|
||||
case ContentKind::Text:
|
||||
return !m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes);
|
||||
|
||||
case ContentKind::Shapes:
|
||||
case ContentKind::Images:
|
||||
case ContentKind::Shading:
|
||||
@@ -401,6 +396,18 @@ void PDFStructureTreeTextContentProcessor::performOutputCharacter(const PDFTextC
|
||||
if (!info.character.isNull() && info.character != QChar(QChar::SoftHyphen))
|
||||
{
|
||||
m_currentText.push_back(info.character);
|
||||
|
||||
QPainterPath worldPath = info.matrix.map(info.outline);
|
||||
if (!worldPath.isEmpty())
|
||||
{
|
||||
QRectF boundingRect = worldPath.controlPointRect();
|
||||
m_currentBoundingBox = m_currentBoundingBox.united(boundingRect);
|
||||
m_characterBoundingRects.push_back(boundingRect);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_characterBoundingRects.push_back(QRectF());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -464,17 +471,26 @@ void PDFStructureTreeTextExtractor::perform(const std::vector<PDFInteger>& pageI
|
||||
switch (sequenceItem.type)
|
||||
{
|
||||
case PDFStructureTreeTextItem::Type::StartTag:
|
||||
{
|
||||
stack.push(sequenceItem.item);
|
||||
break;
|
||||
}
|
||||
case PDFStructureTreeTextItem::Type::EndTag:
|
||||
{
|
||||
stack.pop();
|
||||
break;
|
||||
}
|
||||
case PDFStructureTreeTextItem::Type::Text:
|
||||
{
|
||||
if (!stack.empty())
|
||||
{
|
||||
m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text });
|
||||
m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text, sequenceItem.characterBoundingRects });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -598,7 +614,7 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure
|
||||
for (const auto& textItem : m_extractor->getText(structureElement))
|
||||
{
|
||||
markHasContent();
|
||||
m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text });
|
||||
m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text, textItem.characterBoundingRects });
|
||||
}
|
||||
|
||||
acceptChildren(structureElement);
|
||||
@@ -688,7 +704,7 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart });
|
||||
for (const PDFTextFlow& textFlow : textFlows)
|
||||
{
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text });
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text, textFlow.getBoundingBoxes() });
|
||||
}
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
|
||||
|
||||
@@ -748,7 +764,7 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
|
||||
{
|
||||
if (sequenceItem.type == PDFStructureTreeTextItem::Type::Text)
|
||||
{
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text });
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text, sequenceItem.characterBoundingRects });
|
||||
}
|
||||
}
|
||||
flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
|
||||
@@ -1040,4 +1056,36 @@ void PDFDocumentTextFlowEditor::updateModifiedFlag(size_t index)
|
||||
item->editedItemFlags.setFlag(Modified, isModified);
|
||||
}
|
||||
|
||||
std::map<PDFInteger, PDFDocumentTextFlow> PDFDocumentTextFlow::split(Flags mask) const
|
||||
{
|
||||
std::map<PDFInteger, PDFDocumentTextFlow> result;
|
||||
|
||||
for (const Item& item : m_items)
|
||||
{
|
||||
if (item.flags & mask)
|
||||
{
|
||||
result[item.pageIndex].addItem(item);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void PDFDocumentTextFlow::append(const PDFDocumentTextFlow& textFlow)
|
||||
{
|
||||
m_items.insert(m_items.end(), textFlow.m_items.cbegin(), textFlow.m_items.cend());
|
||||
}
|
||||
|
||||
QString PDFDocumentTextFlow::getText() const
|
||||
{
|
||||
QStringList texts;
|
||||
|
||||
for (const auto& item : m_items)
|
||||
{
|
||||
texts << item.text.trimmed();
|
||||
}
|
||||
|
||||
return texts.join(" ");
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@@ -56,6 +56,7 @@ public:
|
||||
PDFInteger pageIndex = 0;
|
||||
QString text;
|
||||
Flags flags = None;
|
||||
std::vector<QRectF> characterBoundingRects;
|
||||
|
||||
bool isText() const { return flags.testFlag(Text); }
|
||||
bool isSpecial() const { return !isText(); }
|
||||
@@ -71,6 +72,9 @@ public:
|
||||
|
||||
}
|
||||
|
||||
/// Add text item
|
||||
void addItem(Item item) { m_items.emplace_back(std::move(item)); }
|
||||
|
||||
const Items& getItems() const { return m_items; }
|
||||
|
||||
/// Returns item at a given index
|
||||
@@ -83,6 +87,18 @@ public:
|
||||
/// Returns true, if text flow is empty
|
||||
bool isEmpty() const { return m_items.empty(); }
|
||||
|
||||
/// Split text flow to pages using given mask. Items, which
|
||||
/// are masked out, are not added.
|
||||
/// \param mask Mask
|
||||
std::map<PDFInteger, PDFDocumentTextFlow> split(Flags mask) const;
|
||||
|
||||
/// Appends document text flow to this one
|
||||
/// \param textFlow Text flow
|
||||
void append(const PDFDocumentTextFlow& textFlow);
|
||||
|
||||
/// Returns text concantecated from all items
|
||||
QString getText() const;
|
||||
|
||||
private:
|
||||
Items m_items;
|
||||
};
|
||||
|
@@ -154,6 +154,20 @@ void PDFDrawSpaceController::setPageRotation(PageRotation pageRotation)
|
||||
}
|
||||
}
|
||||
|
||||
void PDFDrawSpaceController::setCustomLayout(LayoutItems customLayoutItems)
|
||||
{
|
||||
if (m_customLayoutItems != customLayoutItems)
|
||||
{
|
||||
m_customLayoutItems = std::move(customLayoutItems);
|
||||
|
||||
if (m_pageLayoutMode == PageLayout::Custom)
|
||||
{
|
||||
// Recalculate only, if custom layout is active
|
||||
recalculate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PDFDrawSpaceController::recalculate()
|
||||
{
|
||||
if (!m_document)
|
||||
@@ -181,7 +195,7 @@ void PDFDrawSpaceController::recalculate()
|
||||
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(leftIndex)->getRotatedMediaBoxMM(), m_pageRotation).size();
|
||||
PDFReal xPos = -pageSize.width() - m_horizontalSpacingMM * 0.5;
|
||||
QRectF rect(xPos, yPos, pageSize.width(), pageSize.height());
|
||||
m_layoutItems.emplace_back(blockIndex, leftIndex, rect);
|
||||
m_layoutItems.emplace_back(blockIndex, leftIndex, -1, rect);
|
||||
yPosAdvance = qMax(yPosAdvance, pageSize.height());
|
||||
boundingRect = boundingRect.united(rect);
|
||||
}
|
||||
@@ -191,7 +205,7 @@ void PDFDrawSpaceController::recalculate()
|
||||
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(rightIndex)->getRotatedMediaBoxMM(), m_pageRotation).size();
|
||||
PDFReal xPos = m_horizontalSpacingMM * 0.5;
|
||||
QRectF rect(xPos, yPos, pageSize.width(), pageSize.height());
|
||||
m_layoutItems.emplace_back(blockIndex, rightIndex, rect);
|
||||
m_layoutItems.emplace_back(blockIndex, rightIndex, -1, rect);
|
||||
yPosAdvance = qMax(yPosAdvance, pageSize.height());
|
||||
boundingRect = boundingRect.united(rect);
|
||||
}
|
||||
@@ -253,7 +267,7 @@ void PDFDrawSpaceController::recalculate()
|
||||
{
|
||||
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(i)->getRotatedMediaBoxMM(), m_pageRotation).size();
|
||||
QRectF rect(-pageSize.width() * 0.5, -pageSize.height() * 0.5, pageSize.width(), pageSize.height());
|
||||
m_layoutItems.emplace_back(i, i, rect);
|
||||
m_layoutItems.emplace_back(i, i, -1, rect);
|
||||
m_blockItems.emplace_back(rect);
|
||||
}
|
||||
|
||||
@@ -274,7 +288,7 @@ void PDFDrawSpaceController::recalculate()
|
||||
// Top of current page is at yPos.
|
||||
QSizeF pageSize = PDFPage::getRotatedBox(catalog->getPage(i)->getRotatedMediaBoxMM(), m_pageRotation).size();
|
||||
QRectF rect(-pageSize.width() * 0.5, yPos, pageSize.width(), pageSize.height());
|
||||
m_layoutItems.emplace_back(0, i, rect);
|
||||
m_layoutItems.emplace_back(0, i, -1, rect);
|
||||
yPos += pageSize.height() + m_verticalSpacingMM;
|
||||
boundingRectangle = boundingRectangle.united(rect);
|
||||
}
|
||||
@@ -361,6 +375,54 @@ void PDFDrawSpaceController::recalculate()
|
||||
break;
|
||||
}
|
||||
|
||||
case PageLayout::Custom:
|
||||
{
|
||||
m_layoutItems = m_customLayoutItems;
|
||||
|
||||
// We do not support page rotation for custom layout
|
||||
Q_ASSERT(m_pageRotation == PageRotation::None);
|
||||
|
||||
// Assure, that layout items are sorted by block and page group
|
||||
auto comparator = [](const LayoutItem& l, const LayoutItem& r)
|
||||
{
|
||||
return std::tie(l.blockIndex, l.groupIndex) < std::tie(r.blockIndex, r.groupIndex);
|
||||
};
|
||||
std::stable_sort(m_layoutItems.begin(), m_layoutItems.end(), comparator);
|
||||
|
||||
// Now, compute blocks
|
||||
if (!m_layoutItems.empty())
|
||||
{
|
||||
m_blockItems.reserve(m_layoutItems.back().blockIndex + 1);
|
||||
|
||||
QRectF currentBoundingRect;
|
||||
PDFInteger blockIndex = -1;
|
||||
|
||||
for (const LayoutItem& layoutItem : m_layoutItems)
|
||||
{
|
||||
if (blockIndex != layoutItem.blockIndex)
|
||||
{
|
||||
blockIndex = layoutItem.blockIndex;
|
||||
|
||||
if (currentBoundingRect.isValid())
|
||||
{
|
||||
m_blockItems.push_back(LayoutBlock(currentBoundingRect));
|
||||
currentBoundingRect = QRectF();
|
||||
}
|
||||
}
|
||||
|
||||
currentBoundingRect = currentBoundingRect.united(layoutItem.pageRectMM);
|
||||
}
|
||||
|
||||
if (currentBoundingRect.isValid())
|
||||
{
|
||||
m_blockItems.push_back(LayoutBlock(currentBoundingRect));
|
||||
currentBoundingRect = QRectF();
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
Q_ASSERT(false);
|
||||
@@ -504,7 +566,7 @@ void PDFDrawWidgetProxy::update()
|
||||
m_layout.items.reserve(items.size());
|
||||
for (const PDFDrawSpaceController::LayoutItem& item : items)
|
||||
{
|
||||
m_layout.items.emplace_back(item.pageIndex, fromDeviceSpace(item.pageRectMM).toRect());
|
||||
m_layout.items.emplace_back(item.pageIndex, item.groupIndex, fromDeviceSpace(item.pageRectMM).toRect());
|
||||
}
|
||||
|
||||
m_layout.blockRect = fromDeviceSpace(rectangle).toRect();
|
||||
@@ -700,8 +762,13 @@ void PDFDrawWidgetProxy::drawPages(QPainter* painter, QRect rect, PDFRenderer::F
|
||||
QRect placedRect = item.pageRect.translated(m_horizontalOffset - m_layout.blockRect.left(), m_verticalOffset - m_layout.blockRect.top());
|
||||
if (placedRect.intersects(rect))
|
||||
{
|
||||
GroupInfo groupInfo = getGroupInfo(item.groupIndex);
|
||||
|
||||
// Clear the page space by paper color
|
||||
painter->fillRect(placedRect, paperColor);
|
||||
if (groupInfo.drawPaper)
|
||||
{
|
||||
painter->fillRect(placedRect, paperColor);
|
||||
}
|
||||
|
||||
const PDFPrecompiledPage* compiledPage = m_compiler->getCompiledPage(item.pageIndex, true);
|
||||
if (compiledPage && compiledPage->isValid())
|
||||
@@ -711,7 +778,7 @@ void PDFDrawWidgetProxy::drawPages(QPainter* painter, QRect rect, PDFRenderer::F
|
||||
|
||||
const PDFPage* page = m_controller->getDocument()->getCatalog()->getPage(item.pageIndex);
|
||||
QMatrix matrix = createPagePointToDevicePointMatrix(page, placedRect) * baseMatrix;
|
||||
compiledPage->draw(painter, page->getCropBox(), matrix, features);
|
||||
compiledPage->draw(painter, page->getCropBox(), matrix, features, groupInfo.transparency);
|
||||
PDFTextLayoutGetter layoutGetter = m_textLayoutCompiler->getTextLayoutLazy(item.pageIndex);
|
||||
|
||||
// Draw text blocks/text lines, if it is enabled
|
||||
@@ -939,6 +1006,22 @@ PDFWidgetSnapshot PDFDrawWidgetProxy::getSnapshot() const
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
void PDFDrawWidgetProxy::setGroupTransparency(PDFInteger groupIndex, bool drawPaper, PDFReal transparency)
|
||||
{
|
||||
GroupInfo groupInfo;
|
||||
groupInfo.drawPaper = drawPaper;
|
||||
groupInfo.transparency = transparency;
|
||||
|
||||
if (groupInfo == GroupInfo())
|
||||
{
|
||||
m_groupInfos.erase(groupIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_groupInfos[groupIndex] = std::move(groupInfo);
|
||||
}
|
||||
}
|
||||
|
||||
QRect PDFDrawWidgetProxy::getPagesIntersectingRectBoundingBox(QRect rect) const
|
||||
{
|
||||
QRect resultRect;
|
||||
@@ -1156,6 +1239,15 @@ void PDFDrawWidgetProxy::setPageLayout(PageLayout pageLayout)
|
||||
}
|
||||
}
|
||||
|
||||
void PDFDrawWidgetProxy::setCustomPageLayout(PDFDrawSpaceController::LayoutItems layoutItems)
|
||||
{
|
||||
if (m_controller->getCustomLayout() != layoutItems)
|
||||
{
|
||||
m_controller->setCustomLayout(std::move(layoutItems));
|
||||
emit pageLayoutChanged();
|
||||
}
|
||||
}
|
||||
|
||||
QRectF PDFDrawWidgetProxy::fromDeviceSpace(const QRectF& rect) const
|
||||
{
|
||||
Q_ASSERT(rect.isValid());
|
||||
@@ -1185,6 +1277,9 @@ bool PDFDrawWidgetProxy::isBlockMode() const
|
||||
case PageLayout::TwoPagesLeft:
|
||||
case PageLayout::TwoPagesRight:
|
||||
return true;
|
||||
|
||||
case PageLayout::Custom:
|
||||
return m_controller->getBlockCount() > 1;
|
||||
}
|
||||
|
||||
Q_ASSERT(false);
|
||||
@@ -1217,6 +1312,10 @@ void PDFDrawWidgetProxy::prefetchPages(PDFInteger pageIndex)
|
||||
prefetchCount = 2;
|
||||
break;
|
||||
|
||||
case PageLayout::Custom:
|
||||
prefetchCount = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
@@ -1307,6 +1406,17 @@ void PDFDrawWidgetProxy::updateVerticalScrollbarFromOffset()
|
||||
}
|
||||
}
|
||||
|
||||
PDFDrawWidgetProxy::GroupInfo PDFDrawWidgetProxy::getGroupInfo(int groupIndex) const
|
||||
{
|
||||
auto it = m_groupInfos.find(groupIndex);
|
||||
if (it != m_groupInfos.cend())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
|
||||
return GroupInfo();
|
||||
}
|
||||
|
||||
PDFWidgetAnnotationManager* PDFDrawWidgetProxy::getAnnotationManager() const
|
||||
{
|
||||
return m_widget->getAnnotationManager();
|
||||
|
@@ -77,14 +77,17 @@ public:
|
||||
/// page and page rectangle, in which the page is contained.
|
||||
struct LayoutItem
|
||||
{
|
||||
constexpr inline explicit LayoutItem() : blockIndex(-1), pageIndex(-1) { }
|
||||
constexpr inline explicit LayoutItem(PDFInteger blockIndex, PDFInteger pageIndex, const QRectF& pageRectMM) :
|
||||
blockIndex(blockIndex), pageIndex(pageIndex), pageRectMM(pageRectMM) { }
|
||||
constexpr inline explicit LayoutItem() : blockIndex(-1), pageIndex(-1), groupIndex(-1) { }
|
||||
constexpr inline explicit LayoutItem(PDFInteger blockIndex, PDFInteger pageIndex, PDFInteger groupIndex, const QRectF& pageRectMM) :
|
||||
blockIndex(blockIndex), pageIndex(pageIndex), groupIndex(groupIndex), pageRectMM(pageRectMM) { }
|
||||
|
||||
bool operator ==(const LayoutItem&) const = default;
|
||||
|
||||
bool isValid() const { return pageIndex != -1; }
|
||||
|
||||
PDFInteger blockIndex;
|
||||
PDFInteger pageIndex;
|
||||
PDFInteger groupIndex; ///< Page group index
|
||||
QRectF pageRectMM;
|
||||
};
|
||||
|
||||
@@ -123,6 +126,15 @@ public:
|
||||
/// Sets page rotation
|
||||
void setPageRotation(PageRotation pageRotation);
|
||||
|
||||
/// Set custom layout. Custom layout provides a way how to define
|
||||
/// custom page layout, including blocks. Block indices must be properly defined,
|
||||
/// that means block index must start by zero and must be continuous. If this
|
||||
/// criteria are not fulfilled, behaviour is undefined.
|
||||
void setCustomLayout(LayoutItems customLayoutItems);
|
||||
|
||||
/// Returns custom layout
|
||||
const LayoutItems& getCustomLayout() const { return m_customLayoutItems; }
|
||||
|
||||
signals:
|
||||
void drawSpaceChanged();
|
||||
void repaintNeeded();
|
||||
@@ -155,6 +167,7 @@ private:
|
||||
PDFReal m_verticalSpacingMM;
|
||||
PDFReal m_horizontalSpacingMM;
|
||||
PageRotation m_pageRotation;
|
||||
LayoutItems m_customLayoutItems;
|
||||
|
||||
/// Font cache
|
||||
PDFFontCache m_fontCache;
|
||||
@@ -282,6 +295,11 @@ public:
|
||||
/// \param pageLayout Page layout
|
||||
void setPageLayout(PageLayout pageLayout);
|
||||
|
||||
/// Sets custom page layout. If this function is used, page layout mode
|
||||
/// must be set to 'Custom'.
|
||||
/// \param layoutItems Layout items
|
||||
void setCustomPageLayout(PDFDrawSpaceController::LayoutItems layoutItems);
|
||||
|
||||
/// Returns the page layout
|
||||
PageLayout getPageLayout() const { return m_controller->getPageLayout(); }
|
||||
|
||||
@@ -354,13 +372,20 @@ public:
|
||||
|
||||
/// Returns snapshot of current view area
|
||||
PDFWidgetSnapshot getSnapshot() const;
|
||||
|
||||
/// Sets page group transparency settings. All pages with a given group index
|
||||
/// will be displayed with this transparency settings.
|
||||
/// \param groupIndex Group index
|
||||
/// \param drawPaper Draw background paper
|
||||
/// \param transparency Page graphics transparency
|
||||
void setGroupTransparency(PDFInteger groupIndex, bool drawPaper = true, PDFReal transparency = 1.0);
|
||||
|
||||
PDFWidgetAnnotationManager* getAnnotationManager() const;
|
||||
|
||||
signals:
|
||||
void drawSpaceChanged();
|
||||
void pageLayoutChanged();
|
||||
void renderingError(PDFInteger pageIndex, const QList<PDFRenderError>& errors);
|
||||
void renderingError(pdf::PDFInteger pageIndex, const QList<pdf::PDFRenderError>& errors);
|
||||
void repaintNeeded();
|
||||
void pageImageChanged(bool all, const std::vector<PDFInteger>& pages);
|
||||
void textLayoutChanged();
|
||||
@@ -368,12 +393,13 @@ signals:
|
||||
private:
|
||||
struct LayoutItem
|
||||
{
|
||||
constexpr inline explicit LayoutItem() : pageIndex(-1) { }
|
||||
constexpr inline explicit LayoutItem(PDFInteger pageIndex, const QRect& pageRect) :
|
||||
pageIndex(pageIndex), pageRect(pageRect) { }
|
||||
constexpr inline explicit LayoutItem() : pageIndex(-1), groupIndex(-1) { }
|
||||
constexpr inline explicit LayoutItem(PDFInteger pageIndex, PDFInteger groupIndex, const QRect& pageRect) :
|
||||
pageIndex(pageIndex), groupIndex(groupIndex), pageRect(pageRect) { }
|
||||
|
||||
|
||||
PDFInteger pageIndex;
|
||||
PDFInteger groupIndex; ///< Used to create group of pages (for transparency and overlay)
|
||||
QRect pageRect;
|
||||
};
|
||||
|
||||
@@ -389,6 +415,14 @@ private:
|
||||
QRect blockRect;
|
||||
};
|
||||
|
||||
struct GroupInfo
|
||||
{
|
||||
bool operator==(const GroupInfo&) const = default;
|
||||
|
||||
bool drawPaper = true;
|
||||
PDFReal transparency = 1.0;
|
||||
};
|
||||
|
||||
static constexpr size_t INVALID_BLOCK_INDEX = std::numeric_limits<size_t>::max();
|
||||
|
||||
// Minimal/maximal zoom is from 8% to 6400 %, according to the PDF 1.7 Reference,
|
||||
@@ -413,6 +447,8 @@ private:
|
||||
void updateHorizontalScrollbarFromOffset();
|
||||
void updateVerticalScrollbarFromOffset();
|
||||
|
||||
GroupInfo getGroupInfo(int groupIndex) const;
|
||||
|
||||
template<typename T>
|
||||
struct Range
|
||||
{
|
||||
@@ -501,6 +537,11 @@ private:
|
||||
|
||||
/// Surface format for OpenGL
|
||||
QSurfaceFormat m_surfaceFormat;
|
||||
|
||||
/// Page group info for rendering. Group of pages
|
||||
/// can be rendered with transparency or without paper
|
||||
/// as overlay.
|
||||
std::map<PDFInteger, GroupInfo> m_groupInfos;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
@@ -103,7 +103,7 @@ public:
|
||||
void addInputInterface(IDrawWidgetInputInterface* inputInterface);
|
||||
|
||||
signals:
|
||||
void pageRenderingErrorsChanged(PDFInteger pageIndex, int errorsCount);
|
||||
void pageRenderingErrorsChanged(pdf::PDFInteger pageIndex, int errorsCount);
|
||||
|
||||
private:
|
||||
void updateRendererImpl();
|
||||
|
@@ -107,7 +107,7 @@ public:
|
||||
// into buckets of appropriate size.
|
||||
if (scope != Scope::Page)
|
||||
{
|
||||
const int buckets = 32 * QThread::idealThreadCount();
|
||||
const int buckets = 8 * QThread::idealThreadCount();
|
||||
bucketSize = qMax(1, count / buckets);
|
||||
}
|
||||
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include "pdfcms.h"
|
||||
|
||||
#include <QPainter>
|
||||
#include <QCryptographicHash>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
@@ -500,13 +501,18 @@ void PDFPrecompiledPageGenerator::setCompositionMode(QPainter::CompositionMode m
|
||||
m_precompiledPage->addSetCompositionMode(mode);
|
||||
}
|
||||
|
||||
void PDFPrecompiledPage::draw(QPainter* painter, const QRectF& cropBox, const QMatrix& pagePointToDevicePointMatrix, PDFRenderer::Features features) const
|
||||
void PDFPrecompiledPage::draw(QPainter* painter,
|
||||
const QRectF& cropBox,
|
||||
const QMatrix& pagePointToDevicePointMatrix,
|
||||
PDFRenderer::Features features,
|
||||
PDFReal opacity) const
|
||||
{
|
||||
Q_ASSERT(painter);
|
||||
Q_ASSERT(pagePointToDevicePointMatrix.isInvertible());
|
||||
|
||||
painter->save();
|
||||
painter->setWorldMatrix(QMatrix());
|
||||
painter->setOpacity(opacity);
|
||||
|
||||
if (features.testFlag(PDFRenderer::ClipToCropBox))
|
||||
{
|
||||
@@ -831,4 +837,224 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError>
|
||||
}
|
||||
}
|
||||
|
||||
PDFPrecompiledPage::GraphicPieceInfos PDFPrecompiledPage::calculateGraphicPieceInfos(QRectF mediaBox,
|
||||
PDFReal epsilon) const
|
||||
{
|
||||
GraphicPieceInfos infos;
|
||||
|
||||
struct State
|
||||
{
|
||||
QMatrix matrix;
|
||||
};
|
||||
std::stack<State> stateStack;
|
||||
stateStack.emplace();
|
||||
|
||||
// Check, if epsilon is not too small
|
||||
if (qFuzzyIsNull(epsilon))
|
||||
{
|
||||
epsilon = 0.000001;
|
||||
}
|
||||
PDFReal factor = 1.0 / epsilon;
|
||||
|
||||
QImage shadingTestImage;
|
||||
|
||||
// Process all instructions
|
||||
for (const Instruction& instruction : m_instructions)
|
||||
{
|
||||
switch (instruction.type)
|
||||
{
|
||||
case InstructionType::DrawPath:
|
||||
{
|
||||
const PathPaintData& data = m_paths[instruction.dataIndex];
|
||||
|
||||
GraphicPieceInfo info;
|
||||
QByteArray serializedPath;
|
||||
|
||||
// Serialize data
|
||||
if (true)
|
||||
{
|
||||
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
|
||||
|
||||
stream << data.isText;
|
||||
stream << data.pen;
|
||||
stream << data.brush;
|
||||
|
||||
// Translate map to page coordinates
|
||||
QPainterPath pagePath = stateStack.top().matrix.map(data.path);
|
||||
|
||||
info.type = data.isText ? GraphicPieceInfo::Type::Text : GraphicPieceInfo::Type::VectorGraphics;
|
||||
info.boundingRect = pagePath.controlPointRect();
|
||||
info.pagePath = pagePath;
|
||||
|
||||
const int elementCount = pagePath.elementCount();
|
||||
for (int i = 0; i < elementCount; ++i)
|
||||
{
|
||||
QPainterPath::Element element = pagePath.elementAt(i);
|
||||
|
||||
PDFReal roundedX = qFloor(element.x * factor);
|
||||
PDFReal roundedY = qFloor(element.y * factor);
|
||||
|
||||
stream << roundedX;
|
||||
stream << roundedY;
|
||||
stream << element.type;
|
||||
}
|
||||
}
|
||||
|
||||
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
|
||||
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
|
||||
|
||||
size_t size = qMin<size_t>(hash.length(), info.hash.size());
|
||||
std::copy(hash.data(), hash.data() + size, info.hash.data());
|
||||
|
||||
infos.emplace_back(std::move(info));
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::DrawImage:
|
||||
{
|
||||
const ImageData& data = m_images[instruction.dataIndex];
|
||||
const QImage& image = data.image;
|
||||
|
||||
GraphicPieceInfo info;
|
||||
QByteArray serializedPath;
|
||||
QByteArray serializedImage;
|
||||
|
||||
// Serialize data
|
||||
if (true)
|
||||
{
|
||||
QDataStream stream(&serializedPath, QIODevice::WriteOnly);
|
||||
QDataStream streamImage(&serializedImage, QIODevice::WriteOnly);
|
||||
|
||||
// Jakub Melka: serialize image position
|
||||
QMatrix worldMatrix = stateStack.top().matrix;
|
||||
|
||||
QPainterPath pagePath;
|
||||
pagePath.addRect(0, 0, 1, 1);
|
||||
pagePath = worldMatrix.map(pagePath);
|
||||
|
||||
info.type = GraphicPieceInfo::Type::Image;
|
||||
info.boundingRect = pagePath.controlPointRect();
|
||||
info.pagePath = pagePath;
|
||||
|
||||
const int elementCount = pagePath.elementCount();
|
||||
for (int i = 0; i < elementCount; ++i)
|
||||
{
|
||||
QPainterPath::Element element = pagePath.elementAt(i);
|
||||
|
||||
PDFReal roundedX = qRound(element.x * factor);
|
||||
PDFReal roundedY = qRound(element.y * factor);
|
||||
|
||||
stream << roundedX;
|
||||
stream << roundedY;
|
||||
stream << element.type;
|
||||
}
|
||||
|
||||
// serialize image data
|
||||
stream.writeBytes(reinterpret_cast<const char*>(image.bits()), image.sizeInBytes());
|
||||
streamImage.writeBytes(reinterpret_cast<const char*>(image.bits()), image.sizeInBytes());
|
||||
}
|
||||
|
||||
QByteArray hash = QCryptographicHash::hash(serializedPath, QCryptographicHash::Sha512);
|
||||
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
|
||||
|
||||
QByteArray imageHash = QCryptographicHash::hash(serializedImage, QCryptographicHash::Sha512);
|
||||
|
||||
size_t size = qMin<size_t>(hash.length(), info.hash.size());
|
||||
std::copy(hash.data(), hash.data() + size, info.hash.data());
|
||||
|
||||
size_t sizeImage = qMin<size_t>(imageHash.length(), info.imageHash.size());
|
||||
std::copy(imageHash.data(), imageHash.data() + sizeImage, info.imageHash.data());
|
||||
|
||||
infos.emplace_back(std::move(info));
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::DrawMesh:
|
||||
{
|
||||
const MeshPaintData& data = m_meshes[instruction.dataIndex];
|
||||
|
||||
if (shadingTestImage.isNull())
|
||||
{
|
||||
QSizeF mediaBoxSize = mediaBox.size();
|
||||
mediaBoxSize = mediaBoxSize.scaled(256, 256, Qt::KeepAspectRatio);
|
||||
QSize imageSize = mediaBoxSize.toSize();
|
||||
shadingTestImage = QImage(imageSize, QImage::Format_ARGB32);
|
||||
}
|
||||
|
||||
shadingTestImage.fill(Qt::transparent);
|
||||
|
||||
QMatrix pagePointToDevicePointMatrix;
|
||||
pagePointToDevicePointMatrix.scale(shadingTestImage.width() / mediaBox.width(), -shadingTestImage.height() / mediaBox.height());
|
||||
|
||||
{
|
||||
QPainter painter(&shadingTestImage);
|
||||
painter.setWorldMatrix(pagePointToDevicePointMatrix);
|
||||
data.mesh.paint(&painter, data.alpha);
|
||||
}
|
||||
|
||||
GraphicPieceInfo info;
|
||||
QByteArray serializedMesh;
|
||||
|
||||
// Serialize data
|
||||
if (true)
|
||||
{
|
||||
QDataStream stream(&serializedMesh, QIODevice::WriteOnly);
|
||||
|
||||
// serialize image data
|
||||
stream.writeBytes(reinterpret_cast<const char*>(shadingTestImage.bits()), shadingTestImage.sizeInBytes());
|
||||
}
|
||||
|
||||
QByteArray hash = QCryptographicHash::hash(serializedMesh, QCryptographicHash::Sha512);
|
||||
Q_ASSERT(QCryptographicHash::hashLength(QCryptographicHash::Sha512) == 64);
|
||||
|
||||
size_t size = qMin<size_t>(hash.length(), info.hash.size());
|
||||
std::copy(hash.data(), hash.data() + size, info.hash.data());
|
||||
|
||||
info.boundingRect = QRectF();
|
||||
info.type = GraphicPieceInfo::Type::Shading;
|
||||
infos.emplace_back(std::move(info));
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::Clip:
|
||||
{
|
||||
// Do nothing, we are just collecting information
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::SaveGraphicState:
|
||||
{
|
||||
stateStack.push(stateStack.top());
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::RestoreGraphicState:
|
||||
{
|
||||
stateStack.pop();
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::SetWorldMatrix:
|
||||
{
|
||||
stateStack.top().matrix = m_matrices[instruction.dataIndex];
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::SetCompositionMode:
|
||||
{
|
||||
// Do nothing, we are just collecting information
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return infos;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@@ -187,7 +187,12 @@ public:
|
||||
/// \param cropBox Page's crop box
|
||||
/// \param pagePointToDevicePointMatrix Page point to device point transformation matrix
|
||||
/// \param features Renderer features
|
||||
void draw(QPainter* painter, const QRectF& cropBox, const QMatrix& pagePointToDevicePointMatrix, PDFRenderer::Features features) const;
|
||||
/// \param opacity Opacity of page graphics
|
||||
void draw(QPainter* painter,
|
||||
const QRectF& cropBox,
|
||||
const QMatrix& pagePointToDevicePointMatrix,
|
||||
PDFRenderer::Features features,
|
||||
PDFReal opacity) const;
|
||||
|
||||
/// Redact path - remove all content intersecting given path,
|
||||
/// and fill redact path with given color.
|
||||
@@ -234,6 +239,45 @@ public:
|
||||
PDFSnapInfo* getSnapInfo() { return &m_snapInfo; }
|
||||
const PDFSnapInfo* getSnapInfo() const { return &m_snapInfo; }
|
||||
|
||||
struct GraphicPieceInfo
|
||||
{
|
||||
enum class Type
|
||||
{
|
||||
Unknown,
|
||||
Text,
|
||||
VectorGraphics,
|
||||
Image,
|
||||
Shading
|
||||
};
|
||||
|
||||
bool operator<(const GraphicPieceInfo& other) const
|
||||
{
|
||||
return std::tie(type, hash) < std::tie(other.type, other.hash);
|
||||
}
|
||||
|
||||
bool isText() const { return type == Type::Text; }
|
||||
bool isVectorGraphics() const { return type == Type::VectorGraphics; }
|
||||
bool isImage() const { return type == Type::Image; }
|
||||
bool isShading() const { return type == Type::Shading; }
|
||||
|
||||
Type type = Type::Unknown;
|
||||
QRectF boundingRect;
|
||||
std::array<uint8_t, 64> hash = { }; ///< Hash of all data
|
||||
std::array<uint8_t, 64> imageHash = { }; ///< Hash of the image only
|
||||
QPainterPath pagePath;
|
||||
};
|
||||
|
||||
using GraphicPieceInfos = std::vector<GraphicPieceInfo>;
|
||||
|
||||
/// Creates information about piece of graphic in this page,
|
||||
/// for example, for comparation reasons. Parameter \p epsilon
|
||||
/// is for numerical precision - values under epsilon are considered
|
||||
/// as equal.
|
||||
/// \param mediaBox Page's media box
|
||||
/// \param epsilon Epsilon
|
||||
GraphicPieceInfos calculateGraphicPieceInfos(QRectF mediaBox,
|
||||
PDFReal epsilon) const;
|
||||
|
||||
private:
|
||||
struct PathPaintData
|
||||
{
|
||||
|
@@ -106,7 +106,7 @@ PDFDocument PDFRedact::perform(Options options)
|
||||
|
||||
QPainter* painter = contentStreamBuilder.begin(newPageReference);
|
||||
compiledPage.redact(redactPath, matrix, m_redactFillColor);
|
||||
compiledPage.draw(painter, QRectF(), matrix, PDFRenderer::None);
|
||||
compiledPage.draw(painter, QRectF(), matrix, PDFRenderer::None, 1.0);
|
||||
contentStreamBuilder.end(painter);
|
||||
}
|
||||
|
||||
|
@@ -244,7 +244,7 @@ QImage PDFRasterizer::render(PDFInteger pageIndex,
|
||||
QOpenGLPaintDevice device(size);
|
||||
QPainter painter(&device);
|
||||
painter.fillRect(QRect(QPoint(0, 0), size), compiledPage->getPaperColor());
|
||||
compiledPage->draw(&painter, page->getCropBox(), matrix, features);
|
||||
compiledPage->draw(&painter, page->getCropBox(), matrix, features, 1.0);
|
||||
|
||||
if (annotationManager)
|
||||
{
|
||||
@@ -276,7 +276,7 @@ QImage PDFRasterizer::render(PDFInteger pageIndex,
|
||||
image.fill(Qt::white);
|
||||
|
||||
QPainter painter(&image);
|
||||
compiledPage->draw(&painter, page->getCropBox(), matrix, features);
|
||||
compiledPage->draw(&painter, page->getCropBox(), matrix, features, 1.0);
|
||||
|
||||
if (annotationManager)
|
||||
{
|
||||
|
@@ -1176,6 +1176,7 @@ void PDFTextFlow::merge(const PDFTextFlow& next)
|
||||
m_text += next.m_text;
|
||||
m_boundingBox = m_boundingBox.united(next.m_boundingBox);
|
||||
m_characterPointers.insert(m_characterPointers.end(), next.m_characterPointers.cbegin(), next.m_characterPointers.cend());
|
||||
m_characterBoundingBoxes.insert(m_characterBoundingBoxes.end(), next.m_characterBoundingBoxes.cbegin(), next.m_characterBoundingBoxes.cend());
|
||||
}
|
||||
|
||||
PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags flags, PDFInteger pageIndex)
|
||||
@@ -1222,6 +1223,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
|
||||
{
|
||||
currentFlow.m_text += QChar(' ');
|
||||
currentFlow.m_characterPointers.emplace_back();
|
||||
currentFlow.m_characterBoundingBoxes.emplace_back();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1233,6 +1235,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
|
||||
pointer.lineIndex = textLineIndex;
|
||||
pointer.characterIndex = i;
|
||||
currentFlow.m_characterPointers.emplace_back(qMove(pointer));
|
||||
currentFlow.m_characterBoundingBoxes.emplace_back(currentCharacter.boundingBox.controlPointRect());
|
||||
}
|
||||
|
||||
// Remove soft hyphen, if it is enabled
|
||||
@@ -1240,6 +1243,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
|
||||
{
|
||||
currentFlow.m_text.chop(1);
|
||||
currentFlow.m_characterPointers.pop_back();
|
||||
currentFlow.m_characterBoundingBoxes.pop_back();
|
||||
|
||||
if (!flags.testFlag(AddLineBreaks))
|
||||
{
|
||||
@@ -1252,6 +1256,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
|
||||
// Add line break
|
||||
currentFlow.m_text += lineBreak;
|
||||
currentFlow.m_characterPointers.insert(currentFlow.m_characterPointers.end(), lineBreak.length(), PDFCharacterPointer());
|
||||
currentFlow.m_characterBoundingBoxes.insert(currentFlow.m_characterBoundingBoxes.end(), lineBreak.length(), QRectF());
|
||||
|
||||
++textLineIndex;
|
||||
}
|
||||
|
@@ -297,6 +297,9 @@ public:
|
||||
/// Returns whole text for this text flow
|
||||
QString getText() const { return m_text; }
|
||||
|
||||
/// Returns character bounding boxes
|
||||
std::vector<QRectF> getBoundingBoxes() const { return m_characterBoundingBoxes; }
|
||||
|
||||
/// Returns text form character pointers
|
||||
/// \param begin Begin character
|
||||
/// \param end End character
|
||||
@@ -330,6 +333,7 @@ private:
|
||||
QString m_text;
|
||||
QRectF m_boundingBox;
|
||||
std::vector<PDFCharacterPointer> m_characterPointers;
|
||||
std::vector<QRectF> m_characterBoundingBoxes;
|
||||
};
|
||||
|
||||
/// Text layout of single page. Can handle various fonts, various angles of lines
|
||||
|
@@ -367,6 +367,15 @@ std::vector<PDFInteger> PDFClosedIntervalSet::unfold() const
|
||||
return result;
|
||||
}
|
||||
|
||||
void PDFClosedIntervalSet::translate(PDFInteger offset)
|
||||
{
|
||||
for (auto& interval : m_intervals)
|
||||
{
|
||||
interval.first += offset;
|
||||
interval.second += offset;
|
||||
}
|
||||
}
|
||||
|
||||
PDFClosedIntervalSet PDFClosedIntervalSet::parse(PDFInteger first, PDFInteger last, const QString& text, QString* errorMessage)
|
||||
{
|
||||
PDFClosedIntervalSet result;
|
||||
|
@@ -694,6 +694,10 @@ public:
|
||||
/// Returns true, if interval set is empty
|
||||
bool isEmpty() const { return m_intervals.empty(); }
|
||||
|
||||
/// Translates interval set by a given offset
|
||||
/// \param offset Offset
|
||||
void translate(PDFInteger offset);
|
||||
|
||||
/// Parses text into closed interval set, text should be in form "1,3,4,7,-11,12-,52-53,-",
|
||||
/// where 1,3,4,7 means single pages, -11 means range from \p first to 11, 12- means range
|
||||
/// from 12 to \p last, and 52-53 means closed interval [52, 53]. If text is not in this form,
|
||||
|
Reference in New Issue
Block a user