diff --git a/PdfForQtLib/sources/pdfcompiler.cpp b/PdfForQtLib/sources/pdfcompiler.cpp index 63e86c0..330f424 100644 --- a/PdfForQtLib/sources/pdfcompiler.cpp +++ b/PdfForQtLib/sources/pdfcompiler.cpp @@ -338,6 +338,22 @@ PDFTextLayout PDFAsynchronousTextLayoutCompiler::getTextLayout(PDFInteger pageIn return PDFTextLayout(); } +PDFTextLayoutGetter PDFAsynchronousTextLayoutCompiler::getTextLayoutLazy(PDFInteger pageIndex) +{ + if (m_state != State::Active || !m_proxy->getDocument()) + { + // Engine is not active, always return empty layout + return PDFTextLayoutGetter(nullptr, pageIndex); + } + + if (m_textLayouts) + { + return m_textLayouts->getTextLayoutLazy(pageIndex); + } + + return PDFTextLayoutGetter(nullptr, pageIndex); +} + void PDFAsynchronousTextLayoutCompiler::makeTextLayout() { if (m_state != State::Active || !m_proxy->getDocument()) diff --git a/PdfForQtLib/sources/pdfcompiler.h b/PdfForQtLib/sources/pdfcompiler.h index f5a90b7..166d452 100644 --- a/PdfForQtLib/sources/pdfcompiler.h +++ b/PdfForQtLib/sources/pdfcompiler.h @@ -125,6 +125,11 @@ public: /// \param pageIndex Page index PDFTextLayout getTextLayout(PDFInteger pageIndex); + /// Returns getter for text layout of the page. If page index is invalid, + /// then empty text layout getter is returned. + /// \param pageIndex Page index + PDFTextLayoutGetter getTextLayoutLazy(PDFInteger pageIndex); + /// Create text layout for the document. Function is asynchronous, /// it returns immediately. After text layout is created, signal /// \p textLayoutChanged is emitted. diff --git a/PdfForQtLib/sources/pdfdrawspacecontroller.cpp b/PdfForQtLib/sources/pdfdrawspacecontroller.cpp index 3fa43a8..5cbae41 100644 --- a/PdfForQtLib/sources/pdfdrawspacecontroller.cpp +++ b/PdfForQtLib/sources/pdfdrawspacecontroller.cpp @@ -622,12 +622,13 @@ void PDFDrawWidgetProxy::draw(QPainter* painter, QRect rect) const PDFPage* page = m_controller->getDocument()->getCatalog()->getPage(item.pageIndex); QMatrix matrix = PDFRenderer::createPagePointToDevicePointMatrix(page, placedRect); compiledPage->draw(painter, page->getCropBox(), matrix, m_features); + PDFTextLayoutGetter layoutGetter = m_textLayoutCompiler->getTextLayoutLazy(item.pageIndex); // Draw text blocks/text lines, if it is enabled if (m_features.testFlag(PDFRenderer::DebugTextBlocks)) { m_textLayoutCompiler->makeTextLayout(); - PDFTextLayout layout = m_textLayoutCompiler->getTextLayout(item.pageIndex); + const PDFTextLayout& layout = layoutGetter; const PDFTextBlocks& textBlocks = layout.getTextBlocks(); painter->save(); @@ -650,7 +651,7 @@ void PDFDrawWidgetProxy::draw(QPainter* painter, QRect rect) if (m_features.testFlag(PDFRenderer::DebugTextLines)) { m_textLayoutCompiler->makeTextLayout(); - PDFTextLayout layout = m_textLayoutCompiler->getTextLayout(item.pageIndex); + const PDFTextLayout& layout = layoutGetter; const PDFTextBlocks& textBlocks = layout.getTextBlocks(); painter->save(); diff --git a/PdfForQtLib/sources/pdftextlayout.cpp b/PdfForQtLib/sources/pdftextlayout.cpp index e73621d..fdd4c72 100644 --- a/PdfForQtLib/sources/pdftextlayout.cpp +++ b/PdfForQtLib/sources/pdftextlayout.cpp @@ -18,6 +18,8 @@ #include "pdftextlayout.h" #include "pdfutils.h" +#include + #include namespace pdf @@ -431,6 +433,16 @@ PDFTextLine::PDFTextLine(TextCharacters characters) : m_topLeft = boundingBox.topLeft(); } +PDFReal PDFTextLine::getAngle() const +{ + if (!m_characters.empty()) + { + return m_characters.front().angle; + } + + return 0.0; +} + void PDFTextLine::applyTransform(const QMatrix& matrix) { m_boundingBox = matrix.map(m_boundingBox); @@ -481,6 +493,16 @@ PDFTextBlock::PDFTextBlock(PDFTextLines textLines) : m_topLeft = boundingBox.topLeft(); } +PDFReal PDFTextBlock::getAngle() const +{ + if (!m_lines.empty()) + { + return m_lines.front().getAngle(); + } + + return 0.0; +} + void PDFTextBlock::applyTransform(const QMatrix& matrix) { m_boundingBox = matrix.map(m_boundingBox); @@ -661,6 +683,40 @@ void PDFTextSelection::build() std::sort(m_items.begin(), m_items.end()); } +PDFTextSelection::iterator PDFTextSelection::begin(PDFInteger pageIndex) const +{ + Q_ASSERT(std::is_sorted(m_items.cbegin(), m_items.end())); + + PDFCharacterPointer pointer; + pointer.pageIndex = pageIndex; + pointer.blockIndex = 0; + pointer.lineIndex = 0; + pointer.characterIndex = 0; + + PDFTextSelectionColoredItem item; + item.start = pointer; + item.end = pointer; + + return std::lower_bound(m_items.cbegin(), m_items.end(), item); +} + +PDFTextSelection::iterator PDFTextSelection::end(PDFInteger pageIndex) const +{ + Q_ASSERT(std::is_sorted(m_items.cbegin(), m_items.end())); + + PDFCharacterPointer pointer; + pointer.pageIndex = pageIndex; + pointer.blockIndex = std::numeric_limits::max(); + pointer.lineIndex = std::numeric_limits::max(); + pointer.characterIndex = std::numeric_limits::max(); + + PDFTextSelectionColoredItem item; + item.start = pointer; + item.end = pointer; + + return std::upper_bound(m_items.cbegin(), m_items.end(), item); +} + PDFFindResults PDFTextFlow::find(const QString& text, Qt::CaseSensitivity caseSensitivity) const { PDFFindResults results; @@ -880,4 +936,121 @@ bool PDFFindResult::operator<(const PDFFindResult& other) const return textSelectionItems.front() < other.textSelectionItems.front(); } +PDFTextLayout PDFTextLayoutGetter::getTextLayoutImpl() const +{ + return m_storage ? m_storage->getTextLayout(m_pageIndex) : PDFTextLayout(); +} + +void PDFTextSelectionPainter::draw(QPainter* painter, PDFInteger pageIndex, PDFTextLayoutGetter& textLayoutGetter, const QMatrix& matrix) +{ + Q_ASSERT(painter); + + auto it = m_selection->begin(pageIndex); + auto itEnd = m_selection->end(pageIndex); + + if (it == itEnd) + { + // Jakub Melka: no text is selected on current page; do nothing + return; + } + + painter->save(); + + const PDFTextLayout& layout = textLayoutGetter; + const PDFTextBlocks& blocks = layout.getTextBlocks(); + for (; it != itEnd; ++it) + { + const PDFTextSelectionColoredItem& item = *it; + const PDFCharacterPointer& start = item.start; + const PDFCharacterPointer& end = item.end; + + Q_ASSERT(start.pageIndex == end.pageIndex); + Q_ASSERT(start.blockIndex == end.blockIndex); + + if (start.blockIndex >= blocks.size()) + { + // Selection is invalid, do nothing + continue; + } + + PDFTextBlock block = blocks[start.blockIndex]; + + // Fix angle of block, so we will get correct selection rectangles (parallel to lines) + QMatrix angleMatrix; + angleMatrix.rotate(block.getAngle()); + block.applyTransform(angleMatrix); + + QPainterPath path; + + const size_t lineStart = start.lineIndex; + const size_t lineEnd = end.lineIndex; + Q_ASSERT(lineEnd >= lineStart); + + const PDFTextLines& lines = block.getLines(); + for (size_t lineIndex = lineStart; lineIndex <= lineEnd; ++lineIndex) + { + if (lineIndex >= lines.size()) + { + // Selection is invalid, do nothing + continue; + } + + const PDFTextLine& line = lines[lineIndex]; + const TextCharacters& characters = line.getCharacters(); + + if (characters.empty()) + { + // Selection is invalid, do nothing + continue; + } + + // First determine, which characters will be selected + size_t characterStart = 0; + size_t characterEnd = characters.size() - 1; + + if (lineIndex == lineStart) + { + characterStart = start.characterIndex; + } + if (lineIndex == lineEnd) + { + characterEnd = end.characterIndex; + } + + // Validate indices, then calculate bounding box + if (!(characterStart <= characterEnd && characterEnd < characters.size())) + { + continue; + } + + QRectF boundingBox; + for (size_t i = characterStart; i <= characterEnd; ++i) + { + boundingBox = boundingBox.united(characters[i].boundingBox.boundingRect()); + } + + if (boundingBox.isValid()) + { + // Enlarge height by some percent + PDFReal heightAdvance = boundingBox.height() * HEIGHT_INCREASE_FACTOR * 0.5; + boundingBox.adjust(0, -heightAdvance, 0, heightAdvance); + path.addRect(boundingBox); + } + } + + QMatrix transformMatrix = angleMatrix.inverted() * matrix; + path = transformMatrix.map(path); + + QColor penColor = item.color; + QColor brushColor = item.color; + brushColor.setAlphaF(SELECTION_ALPHA); + + painter->setPen(penColor); + painter->setBrush(QBrush(brushColor, Qt::SolidPattern)); + painter->drawPath(path); + } + + painter->restore(); +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdftextlayout.h b/PdfForQtLib/sources/pdftextlayout.h index 1afa566..3724541 100644 --- a/PdfForQtLib/sources/pdftextlayout.h +++ b/PdfForQtLib/sources/pdftextlayout.h @@ -19,6 +19,7 @@ #define PDFTEXTLAYOUT_H #include "pdfglobal.h" +#include "pdfutils.h" #include #include @@ -30,6 +31,7 @@ namespace pdf { class PDFTextLayout; +class PDFTextLayoutStorage; struct PDFTextCharacterInfo { @@ -118,6 +120,9 @@ public: const QPainterPath& getBoundingBox() const { return m_boundingBox; } const QPointF& getTopLeft() const { return m_topLeft; } + /// Get angle inclination of block + PDFReal getAngle() const; + void applyTransform(const QMatrix& matrix); friend QDataStream& operator<<(QDataStream& stream, const PDFTextLine& line); @@ -142,6 +147,9 @@ public: const QPainterPath& getBoundingBox() const { return m_boundingBox; } const QPointF& getTopLeft() const { return m_topLeft; } + /// Get angle inclination of block + PDFReal getAngle() const; + void applyTransform(const QMatrix& matrix); friend QDataStream& operator<<(QDataStream& stream, const PDFTextBlock& block); @@ -205,6 +213,8 @@ class PDFTextSelection public: explicit PDFTextSelection() = default; + using iterator = PDFTextSelectionColoredItems::const_iterator; + /// Adds text selection items to selection /// \param items Items /// \param color Color for items (must include alpha channel) @@ -214,6 +224,12 @@ public: /// which is not build, can't be used for rendering. void build(); + /// Returns iterator to start of page range + iterator begin(PDFInteger pageIndex) const; + + /// Returns iterator to end of page range + iterator end(PDFInteger pageIndex) const; + private: PDFTextSelectionColoredItems m_items; }; @@ -332,6 +348,58 @@ private: PDFTextBlocks m_blocks; }; +/// Lazy getter for text layouts from storage. This is used, when we do not want to +/// get text layout each time, because it is time expensive. If text layout is not needed, +/// then nothing happens. Text layout is returned only, if conversion operator is used. +class PDFTextLayoutGetter +{ +public: + explicit PDFTextLayoutGetter(const PDFTextLayoutStorage* storage, PDFInteger pageIndex) : + m_storage(storage), + m_pageIndex(pageIndex) + { + + } + + /// Cast operator, casts to constant reference to PDFTextLayout + operator const PDFTextLayout&() + { + return m_textLayout.get(this, &PDFTextLayoutGetter::getTextLayoutImpl); + } + +private: + PDFTextLayout getTextLayoutImpl() const; + + const PDFTextLayoutStorage* m_storage; + PDFInteger m_pageIndex; + PDFCachedItem m_textLayout; +}; + +/// Paints text selection on various pages using page to device point matrix +class PDFTextSelectionPainter +{ +public: + explicit inline PDFTextSelectionPainter(const PDFTextSelection* selection) : + m_selection(selection) + { + + } + + /// Draws text selection on the painter, using text layout and matrix. If current text selection + /// doesn't contain items from active page, then text layout is not accessed. + /// \param painter Painter + /// \param pageIndex Page index + /// \param textLayoutGetter Text layout getter + /// \param matrix Matrix which translates from page space to device space + void draw(QPainter* painter, PDFInteger pageIndex, PDFTextLayoutGetter& textLayoutGetter, const QMatrix& matrix); + +private: + static constexpr const PDFReal HEIGHT_INCREASE_FACTOR = 0.25; + static constexpr const PDFReal SELECTION_ALPHA = 0.25; + + const PDFTextSelection* m_selection; +}; + /// Storage for text layouts. For reading and writing, this object is thread safe. /// For writing, mutex is used to synchronize asynchronous writes, for reading /// no mutex is used at all. For this reason, both reading/writing at the same time @@ -352,6 +420,12 @@ public: /// \param pageIndex Page index PDFTextLayout getTextLayout(PDFInteger pageIndex) const; + /// Returns text layout for particular page. If page index is invalid, + /// then empty text layout is returned. Function is not thread safe, if + /// function \p setTextLayout is called from another thread. + /// \param pageIndex Page index + PDFTextLayoutGetter getTextLayoutLazy(PDFInteger pageIndex) const { return PDFTextLayoutGetter(this, pageIndex); } + /// Sets text layout to the particular index. Index must be valid and from /// range 0 to \p pageCount - 1. Function is not thread safe. /// \param pageIndex Page index