mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-02-28 17:37:46 +01:00
Advanced search (second part)
This commit is contained in:
parent
54ea7dcb7d
commit
bf4ede1574
@ -133,6 +133,9 @@ public:
|
||||
/// Returns true, if text layout is ready
|
||||
bool isTextLayoutReady() const { return m_textLayouts.has_value(); }
|
||||
|
||||
/// Returns text layout storage (if it is ready), or nullptr
|
||||
const PDFTextLayoutStorage* getTextLayoutStorage() const { return isTextLayoutReady() ? &m_textLayouts.value() : nullptr; }
|
||||
|
||||
signals:
|
||||
void textLayoutChanged();
|
||||
|
||||
|
@ -571,6 +571,64 @@ void PDFTextLayoutStorage::setTextLayout(PDFInteger pageIndex, const PDFTextLayo
|
||||
layoutStream << result;
|
||||
}
|
||||
|
||||
PDFFindResults PDFTextLayoutStorage::find(const QString& text, Qt::CaseSensitivity caseSensitivity, PDFTextFlow::FlowFlags flowFlags) const
|
||||
{
|
||||
PDFFindResults results;
|
||||
|
||||
QMutex resultsMutex;
|
||||
auto findImpl = [this, flowFlags, caseSensitivity, &results, &resultsMutex, &text](size_t pageIndex)
|
||||
{
|
||||
PDFTextLayout textLayout = getTextLayout(pageIndex);
|
||||
PDFTextFlows textFlows = PDFTextFlow::createTextFlows(textLayout, flowFlags, pageIndex);
|
||||
for (const PDFTextFlow& textFlow : textFlows)
|
||||
{
|
||||
PDFFindResults flowResults = textFlow.find(text, caseSensitivity);
|
||||
|
||||
// Jakub Melka: Do not lock mutex, if we didn't find anything. In that case, just skip to next flow.
|
||||
if (!flowResults.empty())
|
||||
{
|
||||
QMutexLocker lock(&resultsMutex);
|
||||
results.insert(results.end(), flowResults.begin(), flowResults.end());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto range = PDFIntegerRange<size_t>(0, m_offsets.size());
|
||||
std::for_each(std::execution::parallel_policy(), range.begin(), range.end(), findImpl);
|
||||
|
||||
std::sort(results.begin(), results.end());
|
||||
return results;
|
||||
}
|
||||
|
||||
PDFFindResults PDFTextLayoutStorage::find(const QRegularExpression& expression, PDFTextFlow::FlowFlags flowFlags) const
|
||||
{
|
||||
PDFFindResults results;
|
||||
|
||||
QMutex resultsMutex;
|
||||
auto findImpl = [this, flowFlags, &results, &resultsMutex, &expression](size_t pageIndex)
|
||||
{
|
||||
PDFTextLayout textLayout = getTextLayout(pageIndex);
|
||||
PDFTextFlows textFlows = PDFTextFlow::createTextFlows(textLayout, flowFlags, pageIndex);
|
||||
for (const PDFTextFlow& textFlow : textFlows)
|
||||
{
|
||||
PDFFindResults flowResults = textFlow.find(expression);
|
||||
|
||||
// Jakub Melka: Do not lock mutex, if we didn't find anything. In that case, just skip to next flow.
|
||||
if (!flowResults.empty())
|
||||
{
|
||||
QMutexLocker lock(&resultsMutex);
|
||||
results.insert(results.end(), flowResults.begin(), flowResults.end());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto range = PDFIntegerRange<size_t>(0, m_offsets.size());
|
||||
std::for_each(std::execution::parallel_policy(), range.begin(), range.end(), findImpl);
|
||||
|
||||
std::sort(results.begin(), results.end());
|
||||
return results;
|
||||
}
|
||||
|
||||
QDataStream& operator<<(QDataStream& stream, const PDFTextLayoutSettings& settings)
|
||||
{
|
||||
stream << settings.samples;
|
||||
@ -622,6 +680,33 @@ PDFFindResults PDFTextFlow::find(const QString& text, Qt::CaseSensitivity caseSe
|
||||
return results;
|
||||
}
|
||||
|
||||
PDFFindResults PDFTextFlow::find(const QRegularExpression& expression) const
|
||||
{
|
||||
PDFFindResults results;
|
||||
|
||||
QRegularExpressionMatchIterator iterator = expression.globalMatch(m_text, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
|
||||
while (iterator.hasNext())
|
||||
{
|
||||
QRegularExpressionMatch match = iterator.next();
|
||||
|
||||
Q_ASSERT(match.hasMatch());
|
||||
const int index = match.capturedStart();
|
||||
const int length = match.capturedLength();
|
||||
|
||||
PDFFindResult result;
|
||||
result.matched = match.captured();
|
||||
result.textSelectionItems = getTextSelectionItems(index, length);
|
||||
result.context = getContext(index, length);
|
||||
|
||||
if (!result.textSelectionItems.empty())
|
||||
{
|
||||
results.emplace_back(qMove(result));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
void PDFTextFlow::merge(const PDFTextFlow& next)
|
||||
{
|
||||
m_text += next.m_text;
|
||||
@ -728,8 +813,25 @@ PDFTextSelectionItems PDFTextFlow::getTextSelectionItems(size_t index, size_t le
|
||||
|
||||
auto it = std::next(m_characterPointers.cbegin(), index);
|
||||
auto itEnd = std::next(m_characterPointers.cbegin(), index + length);
|
||||
s
|
||||
while (it != itEnd)
|
||||
{
|
||||
// Skip invalid items, find first valid
|
||||
if (!it->isValid())
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto itSelectionStart = it;
|
||||
while (it != itEnd && it->isValid() && it->hasSameBlock(*itSelectionStart))
|
||||
{
|
||||
++it;
|
||||
}
|
||||
auto itSelectionEnd = std::prev(it);
|
||||
items.emplace_back(*itSelectionStart, *itSelectionEnd);
|
||||
}
|
||||
|
||||
std::sort(items.begin(), items.end());
|
||||
return items;
|
||||
}
|
||||
|
||||
@ -754,9 +856,22 @@ QString PDFTextFlow::getContext(size_t index, size_t length) const
|
||||
return m_text.mid(int(index), int(length));
|
||||
}
|
||||
|
||||
bool PDFCharacterPointer::hasSameBlock(const PDFCharacterPointer& other) const
|
||||
{
|
||||
return pageIndex == other.pageIndex && blockIndex == other.blockIndex;
|
||||
}
|
||||
|
||||
bool PDFCharacterPointer::hasSameLine(const PDFCharacterPointer& other) const
|
||||
{
|
||||
return pageIndex == other.pageIndex && blockIndex == other.blockIndex && lineIndex == other.lineIndex;
|
||||
return hasSameBlock(other) && lineIndex == other.lineIndex;
|
||||
}
|
||||
|
||||
bool PDFFindResult::operator<(const PDFFindResult& other) const
|
||||
{
|
||||
Q_ASSERT(!textSelectionItems.empty());
|
||||
Q_ASSERT(!other.textSelectionItems.empty());
|
||||
|
||||
return textSelectionItems.front() < other.textSelectionItems.front();
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -163,6 +163,9 @@ struct PDFCharacterPointer
|
||||
/// Returns true, if character pointer is valid and points to the correct location
|
||||
bool isValid() const { return pageIndex > -1; }
|
||||
|
||||
/// Returns true, if character belongs to same block
|
||||
bool hasSameBlock(const PDFCharacterPointer& other) const;
|
||||
|
||||
/// Returns true, if character belongs to same line
|
||||
bool hasSameLine(const PDFCharacterPointer& other) const;
|
||||
|
||||
@ -187,6 +190,8 @@ private:
|
||||
|
||||
struct PDFFindResult
|
||||
{
|
||||
bool operator<(const PDFFindResult& other) const;
|
||||
|
||||
/// Matched string during search
|
||||
QString matched;
|
||||
|
||||
@ -222,6 +227,10 @@ public:
|
||||
/// \param caseSensitivity Case sensitivity
|
||||
PDFFindResults find(const QString& text, Qt::CaseSensitivity caseSensitivity) const;
|
||||
|
||||
/// Finds regular expression matches in current text flow. All text occurences are returned.
|
||||
/// \param expression Regular expression to be matched
|
||||
PDFFindResults find(const QRegularExpression& expression) const;
|
||||
|
||||
/// Merge data from \p next flow (i.e. connect two consecutive flows)
|
||||
void merge(const PDFTextFlow& next);
|
||||
|
||||
@ -297,7 +306,7 @@ private:
|
||||
/// For writing, mutex is used to synchronize asynchronous writes, for reading
|
||||
/// no mutex is used at all. For this reason, both reading/writing at the same time
|
||||
/// is prohibited, it is not thread safe.
|
||||
class PDFTextLayoutStorage
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFTextLayoutStorage
|
||||
{
|
||||
public:
|
||||
explicit inline PDFTextLayoutStorage() = default;
|
||||
@ -320,6 +329,17 @@ public:
|
||||
/// \param mutex Mutex for locking (calls of setTextLayout from multiple threads)
|
||||
void setTextLayout(PDFInteger pageIndex, const PDFTextLayout& layout, QMutex* mutex);
|
||||
|
||||
/// Finds simple text in all pages. All text occurences are returned.
|
||||
/// \param text Text to be found
|
||||
/// \param caseSensitivity Case sensitivity
|
||||
/// \param flowFlags Text flow flags
|
||||
PDFFindResults find(const QString& text, Qt::CaseSensitivity caseSensitivity, PDFTextFlow::FlowFlags flowFlags) const;
|
||||
|
||||
/// Finds regular expression matches in current text flow. All text occurences are returned.
|
||||
/// \param expression Regular expression to be matched
|
||||
/// \param flowFlags Text flow flags
|
||||
PDFFindResults find(const QRegularExpression& expression, PDFTextFlow::FlowFlags flowFlags) const;
|
||||
|
||||
private:
|
||||
std::vector<int> m_offsets;
|
||||
QByteArray m_textLayouts;
|
||||
|
@ -35,6 +35,8 @@ PDFAdvancedFindWidget::PDFAdvancedFindWidget(pdf::PDFDrawWidgetProxy* proxy, QWi
|
||||
{
|
||||
ui->setupUi(this);
|
||||
|
||||
ui->resultsTableWidget->setHorizontalHeaderLabels({ tr("Page No."), tr("Phrase"), tr("Context") });
|
||||
|
||||
connect(ui->regularExpressionsCheckbox, &QCheckBox::clicked, this, &PDFAdvancedFindWidget::updateUI);
|
||||
connect(m_proxy, &pdf::PDFDrawWidgetProxy::textLayoutChanged, this, &PDFAdvancedFindWidget::performSearch);
|
||||
updateUI();
|
||||
@ -62,6 +64,7 @@ void PDFAdvancedFindWidget::on_searchButton_clicked()
|
||||
m_parameters.isRegularExpression = ui->regularExpressionsCheckbox->isChecked();
|
||||
m_parameters.isDotMatchingEverything = ui->dotMatchesEverythingCheckBox->isChecked();
|
||||
m_parameters.isMultiline = ui->multilineMatchingCheckBox->isChecked();
|
||||
m_parameters.isSoftHyphenRemoved = ui->removeSoftHyphenCheckBox->isChecked();
|
||||
m_parameters.isSearchFinished = m_parameters.phrase.isEmpty();
|
||||
|
||||
if (m_parameters.isSearchFinished)
|
||||
@ -85,6 +88,9 @@ void PDFAdvancedFindWidget::on_searchButton_clicked()
|
||||
}
|
||||
}
|
||||
|
||||
m_findResults.clear();
|
||||
updateResultsUI();
|
||||
|
||||
pdf::PDFAsynchronousTextLayoutCompiler* compiler = m_proxy->getTextLayoutCompiler();
|
||||
if (compiler->isTextLayoutReady())
|
||||
{
|
||||
@ -104,6 +110,25 @@ void PDFAdvancedFindWidget::updateUI()
|
||||
ui->regularExpressionSettingsGroupBox->setEnabled(enableRegularExpressionUI);
|
||||
}
|
||||
|
||||
void PDFAdvancedFindWidget::updateResultsUI()
|
||||
{
|
||||
ui->tabWidget->setTabText(ui->tabWidget->indexOf(ui->resultsTab), !m_findResults.empty() ? tr("Results (%1)").arg(m_findResults.size()) : tr("Results"));
|
||||
ui->resultsTableWidget->setRowCount(static_cast<int>(m_findResults.size()));
|
||||
|
||||
for (int i = 0, rowCount = int(m_findResults.size()); i < rowCount; ++i)
|
||||
{
|
||||
const pdf::PDFFindResult& findResult = m_findResults[i];
|
||||
ui->resultsTableWidget->setItem(i, 0, new QTableWidgetItem(QString::number(findResult.textSelectionItems.front().first.pageIndex + 1)));
|
||||
ui->resultsTableWidget->setItem(i, 1, new QTableWidgetItem(findResult.matched));
|
||||
ui->resultsTableWidget->setItem(i, 2, new QTableWidgetItem(findResult.context));
|
||||
}
|
||||
|
||||
if (!m_findResults.empty())
|
||||
{
|
||||
ui->tabWidget->setCurrentWidget(ui->resultsTab);
|
||||
}
|
||||
}
|
||||
|
||||
void PDFAdvancedFindWidget::performSearch()
|
||||
{
|
||||
if (m_parameters.isSearchFinished)
|
||||
@ -120,7 +145,62 @@ void PDFAdvancedFindWidget::performSearch()
|
||||
return;
|
||||
}
|
||||
|
||||
// Prepare string to search
|
||||
bool useRegularExpression = m_parameters.isRegularExpression;
|
||||
QString expression = m_parameters.phrase;
|
||||
|
||||
if (m_parameters.isWholeWordsOnly)
|
||||
{
|
||||
if (useRegularExpression)
|
||||
{
|
||||
expression = QString("\\b%1\\b").arg(expression);
|
||||
}
|
||||
else
|
||||
{
|
||||
expression = QString("\\b%1\\b").arg(QRegularExpression::escape(expression));
|
||||
}
|
||||
useRegularExpression = true;
|
||||
}
|
||||
|
||||
pdf::PDFTextFlow::FlowFlags flowFlags = pdf::PDFTextFlow::SeparateBlocks;
|
||||
if (m_parameters.isSoftHyphenRemoved)
|
||||
{
|
||||
flowFlags |= pdf::PDFTextFlow::RemoveSoftHyphen;
|
||||
}
|
||||
if (m_parameters.isRegularExpression)
|
||||
{
|
||||
flowFlags |= pdf::PDFTextFlow::AddLineBreaks;
|
||||
}
|
||||
|
||||
const pdf::PDFTextLayoutStorage* textLayoutStorage = compiler->getTextLayoutStorage();
|
||||
if (!useRegularExpression)
|
||||
{
|
||||
// Use simple text search
|
||||
Qt::CaseSensitivity caseSensitivity = m_parameters.isCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive;
|
||||
m_findResults = textLayoutStorage->find(expression, caseSensitivity, flowFlags);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use regular expression search
|
||||
QRegularExpression::PatternOptions patternOptions = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::OptimizeOnFirstUsageOption;
|
||||
if (!m_parameters.isCaseSensitive)
|
||||
{
|
||||
patternOptions |= QRegularExpression::CaseInsensitiveOption;
|
||||
}
|
||||
if (m_parameters.isDotMatchingEverything)
|
||||
{
|
||||
patternOptions |= QRegularExpression::DotMatchesEverythingOption;
|
||||
}
|
||||
if (m_parameters.isMultiline)
|
||||
{
|
||||
patternOptions |= QRegularExpression::MultilineOption;
|
||||
}
|
||||
|
||||
QRegularExpression regularExpression(expression, patternOptions);
|
||||
m_findResults = textLayoutStorage->find(regularExpression, flowFlags);
|
||||
}
|
||||
|
||||
updateResultsUI();
|
||||
}
|
||||
|
||||
} // namespace pdfviewer
|
||||
|
@ -19,6 +19,7 @@
|
||||
#define PDFADVANCEDFINDWIDGET_H
|
||||
|
||||
#include "pdfglobal.h"
|
||||
#include "pdftextlayout.h"
|
||||
|
||||
#include <QWidget>
|
||||
|
||||
@ -51,6 +52,7 @@ private slots:
|
||||
|
||||
private:
|
||||
void updateUI();
|
||||
void updateResultsUI();
|
||||
void performSearch();
|
||||
|
||||
struct SearchParameters
|
||||
@ -62,6 +64,7 @@ private:
|
||||
bool isDotMatchingEverything = false;
|
||||
bool isMultiline = false;
|
||||
bool isSearchFinished = false;
|
||||
bool isSoftHyphenRemoved = false;
|
||||
};
|
||||
|
||||
Ui::PDFAdvancedFindWidget* ui;
|
||||
@ -69,6 +72,7 @@ private:
|
||||
pdf::PDFDrawWidgetProxy* m_proxy;
|
||||
const pdf::PDFDocument* m_document;
|
||||
SearchParameters m_parameters;
|
||||
pdf::PDFFindResults m_findResults;
|
||||
};
|
||||
|
||||
} // namespace pdfviewer
|
||||
|
@ -30,27 +30,13 @@
|
||||
<string>Search Settings</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="searchSettingsGroupBoxLayout" columnstretch="0,1,0">
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="searchLabel">
|
||||
<property name="text">
|
||||
<string>Search for:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1" colspan="2">
|
||||
<widget class="QLineEdit" name="searchPhraseEdit"/>
|
||||
</item>
|
||||
<item row="4" column="2">
|
||||
<widget class="QPushButton" name="searchButton">
|
||||
<item row="3" column="1" colspan="2">
|
||||
<widget class="QCheckBox" name="regularExpressionsCheckbox">
|
||||
<property name="text">
|
||||
<string>Search</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" colspan="2">
|
||||
<widget class="QCheckBox" name="caseSensitiveCheckBox">
|
||||
<property name="text">
|
||||
<string>Case sensitive</string>
|
||||
<string>Use regular expressions</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -61,10 +47,31 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="1" colspan="2">
|
||||
<widget class="QCheckBox" name="regularExpressionsCheckbox">
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="searchLabel">
|
||||
<property name="text">
|
||||
<string>Use regular expressions</string>
|
||||
<string>Search for:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" colspan="2">
|
||||
<widget class="QCheckBox" name="caseSensitiveCheckBox">
|
||||
<property name="text">
|
||||
<string>Case sensitive</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="1">
|
||||
<widget class="QCheckBox" name="removeSoftHyphenCheckBox">
|
||||
<property name="text">
|
||||
<string>Remove soft hyphen at end of line</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="2">
|
||||
<widget class="QPushButton" name="searchButton">
|
||||
<property name="text">
|
||||
<string>Search</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -118,12 +125,28 @@
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_3">
|
||||
<item>
|
||||
<widget class="QTreeWidget" name="treeWidget">
|
||||
<column>
|
||||
<property name="text">
|
||||
<string notr="true">1</string>
|
||||
</property>
|
||||
</column>
|
||||
<widget class="QTableWidget" name="resultsTableWidget">
|
||||
<property name="editTriggers">
|
||||
<set>QAbstractItemView::NoEditTriggers</set>
|
||||
</property>
|
||||
<property name="alternatingRowColors">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="selectionBehavior">
|
||||
<enum>QAbstractItemView::SelectRows</enum>
|
||||
</property>
|
||||
<property name="sortingEnabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="columnCount">
|
||||
<number>3</number>
|
||||
</property>
|
||||
<attribute name="horizontalHeaderStretchLastSection">
|
||||
<bool>true</bool>
|
||||
</attribute>
|
||||
<column/>
|
||||
<column/>
|
||||
<column/>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
|
Loading…
x
Reference in New Issue
Block a user