From 354a93f4d6db81a5a2736f1d3539f493c43bbd1a Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Sun, 15 Aug 2021 17:34:05 +0200 Subject: [PATCH] AudioBook Plugin: Initial commit, document text flow adjustments --- Pdf4QtLib/sources/pdfdocumenttextflow.cpp | 185 ++++++++++++++---- Pdf4QtLib/sources/pdfdocumenttextflow.h | 89 ++++++++- Pdf4QtLib/sources/pdftextlayout.cpp | 2 + Pdf4QtLib/sources/pdftextlayout.h | 4 + .../AudioBookPlugin/AudioBookPlugin.json | 7 + .../AudioBookPlugin/AudioBookPlugin.pro | 48 +++++ .../AudioBookPlugin/audiobookplugin.cpp | 81 ++++++++ .../AudioBookPlugin/audiobookplugin.h | 56 ++++++ .../AudioBookPlugin/create-text-stream.svg | 100 ++++++++++ Pdf4QtViewerPlugins/AudioBookPlugin/icons.qrc | 5 + Pdf4QtViewerPlugins/Pdf4QtViewerPlugins.pro | 3 +- README.txt | 2 + 12 files changed, 547 insertions(+), 35 deletions(-) create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.json create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.pro create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.cpp create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.h create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/create-text-stream.svg create mode 100644 Pdf4QtViewerPlugins/AudioBookPlugin/icons.qrc diff --git a/Pdf4QtLib/sources/pdfdocumenttextflow.cpp b/Pdf4QtLib/sources/pdfdocumenttextflow.cpp index 391ae4e..2f7f1b9 100644 --- a/Pdf4QtLib/sources/pdfdocumenttextflow.cpp +++ b/Pdf4QtLib/sources/pdfdocumenttextflow.cpp @@ -89,19 +89,21 @@ struct PDFStructureTreeTextItem }; PDFStructureTreeTextItem() = default; - PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text) : - type(type), item(item), text(qMove(text)) + PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect) : + type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect) { } - static PDFStructureTreeTextItem createText(QString text) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text)); } - static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString()); } - static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString()); } + static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect); } + static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF()); } + static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF()); } Type type = Type::Text; const PDFStructureItem* item = nullptr; QString text; + PDFInteger pageIndex = -1; + QRectF boundingRect; }; using PDFStructureTreeTextSequence = std::vector; @@ -118,6 +120,7 @@ public: SkipArtifact = 0x0001, ///< Skip content marked as 'Artifact' AdjustReversedText = 0x0002, ///< Adjust reversed text CreateTreeMapping = 0x0004, ///< Create text mapping to structure tree item + BoundingBoxes = 0x0008, ///< Compute bounding boxes of the texts }; Q_DECLARE_FLAGS(Options, Option) @@ -139,11 +142,20 @@ public: /// \param pageNumber Page number const PDFStructureTreeTextSequence& getTextSequence(PDFInteger pageNumber) const; + struct TextItem + { + QRectF boundingRect; + PDFInteger pageIndex = -1; + QString text; + }; + + using TextItems = std::vector; + /// Returns text for given structure tree item. If structure tree item /// is not found, then empty list is returned. This functionality /// requires, that \p CreateTreeMapping flag is being set. /// \param item Item - const QStringList& getText(const PDFStructureItem* item) const; + const TextItems& getText(const PDFStructureItem* item) const; private: QList m_errors; @@ -151,7 +163,7 @@ private: const PDFStructureTree* m_tree; QStringList m_unmatchedText; std::map m_textSequences; - std::map m_textForItems; + std::map m_textForItems; Options m_options; }; @@ -177,7 +189,8 @@ public: m_features(features), m_tree(tree), m_mapping(mapping), - m_extractorOptions(extractorOptions) + m_extractorOptions(extractorOptions), + m_pageIndex(document->getCatalog()->getPageIndexFromPageReference(page->getPageReference())) { } @@ -191,6 +204,7 @@ protected: virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override; virtual void performMarkedContentBegin(const QByteArray& tag, const PDFObject& properties) override; virtual void performMarkedContentEnd() override; + virtual void performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule) override; private: const PDFStructureItem* getStructureTreeItemFromMCID(PDFInteger mcid) const; @@ -213,11 +227,35 @@ private: const std::map* m_mapping; std::vector m_markedContentInfoStack; QString m_currentText; + QRectF m_currentBoundingBox; PDFStructureTreeTextSequence m_textSequence; QStringList m_unmatchedText; PDFStructureTreeTextExtractor::Options m_extractorOptions; + PDFInteger m_pageIndex; }; +void PDFStructureTreeTextContentProcessor::performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule) +{ + if (!text) + { + // Jakub Melka: This should not occur + return; + } + + if (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes)) + { + return; + } + + Q_UNUSED(stroke); + Q_UNUSED(fill); + Q_UNUSED(fillRule); + + QMatrix matrix = getCurrentWorldMatrix(); + QPainterPath worldPath = matrix.map(path); + m_currentBoundingBox = m_currentBoundingBox.united(worldPath.controlPointRect()); +} + void PDFStructureTreeTextContentProcessor::finishText() { m_currentText = m_currentText.trimmed(); @@ -233,9 +271,10 @@ void PDFStructureTreeTextContentProcessor::finishText() } m_currentText = qMove(reversed); } - m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText))); + m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText), m_pageIndex, m_currentBoundingBox)); } m_currentText = QString(); + m_currentBoundingBox = QRectF(); } bool PDFStructureTreeTextContentProcessor::isArtifact() const @@ -306,6 +345,7 @@ void PDFStructureTreeTextContentProcessor::performMarkedContentEnd() { m_unmatchedText << qMove(m_currentText); } + m_currentBoundingBox = QRectF(); } } @@ -333,8 +373,10 @@ bool PDFStructureTreeTextContentProcessor::isContentKindSuppressed(ContentKind k { switch (kind) { - case ContentKind::Shapes: case ContentKind::Text: + return !m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes); + + case ContentKind::Shapes: case ContentKind::Images: case ContentKind::Shading: return true; @@ -430,7 +472,7 @@ void PDFStructureTreeTextExtractor::perform(const std::vector& pageI case PDFStructureTreeTextItem::Type::Text: if (!stack.empty()) { - m_textForItems[stack.top()] << sequenceItem.text; + m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text }); } break; } @@ -451,7 +493,7 @@ const PDFStructureTreeTextSequence& PDFStructureTreeTextExtractor::getTextSequen return dummy; } -const QStringList& PDFStructureTreeTextExtractor::getText(const PDFStructureItem* item) const +const PDFStructureTreeTextExtractor::TextItems& PDFStructureTreeTextExtractor::getText(const PDFStructureItem* item) const { auto it = m_textForItems.find(item); if (it != m_textForItems.cend()) @@ -459,7 +501,7 @@ const QStringList& PDFStructureTreeTextExtractor::getText(const PDFStructureItem return it->second; } - static const QStringList dummy; + static const TextItems dummy; return dummy; } @@ -489,9 +531,9 @@ private: void PDFStructureTreeTextFlowCollector::visitStructureTree(const PDFStructureTree* structureTree) { - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemStart, -1, QString()}); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemStart}); acceptChildren(structureTree); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemEnd, -1, QString()}); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemEnd}); } void PDFStructureTreeTextFlowCollector::markHasContent() @@ -505,7 +547,7 @@ void PDFStructureTreeTextFlowCollector::markHasContent() void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructureElement* structureElement) { size_t index = m_items->size(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemStart, -1, QString()}); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemStart}); // Mark stack so we can delete unused items m_hasContentStack.push_back(false); @@ -520,43 +562,43 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure if (!title.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureTitle, -1, }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureTitle}); } if (!language.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureLanguage, -1, language }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, language, PDFDocumentTextFlow::StructureLanguage }); } if (!alternativeDescription.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureAlternativeDescription, -1, alternativeDescription }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, alternativeDescription, PDFDocumentTextFlow::StructureAlternativeDescription }); } if (!expandedForm.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureExpandedForm, -1, expandedForm }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, expandedForm, PDFDocumentTextFlow::StructureExpandedForm }); } if (!actualText.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureActualText, -1, actualText }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, actualText, PDFDocumentTextFlow::StructureActualText }); } if (!phoneme.isEmpty()) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructurePhoneme, -1, phoneme }); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, phoneme, PDFDocumentTextFlow::StructurePhoneme }); } - for (const QString& string : m_extractor->getText(structureElement)) + for (const auto& textItem : m_extractor->getText(structureElement)) { markHasContent(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::Text, -1, string}); + m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text }); } acceptChildren(structureElement); @@ -564,7 +606,7 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure const bool hasContent = m_hasContentStack.back(); m_hasContentStack.pop_back(); - m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemEnd, -1, QString()}); + m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemEnd }); if (!hasContent) { @@ -643,12 +685,12 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume PDFTextFlows textFlows = PDFTextFlow::createTextFlows(textLayout, PDFTextFlow::FlowFlags(PDFTextFlow::SeparateBlocks) | PDFTextFlow::RemoveSoftHyphen, pageIndex); PDFDocumentTextFlow::Items flowItems; - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageStart, pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1) }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart }); for (const PDFTextFlow& textFlow : textFlows) { - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::Text, pageIndex, textFlow.getText() }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text }); } - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageEnd, pageIndex, QString() }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd }); QMutexLocker lock(&mutex); items[pageIndex] = qMove(flowItems); @@ -677,7 +719,9 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume break; } - PDFStructureTreeTextExtractor extractor(document, &structureTree, PDFStructureTreeTextExtractor::SkipArtifact | PDFStructureTreeTextExtractor::AdjustReversedText | PDFStructureTreeTextExtractor::CreateTreeMapping); + PDFStructureTreeTextExtractor::Options options = PDFStructureTreeTextExtractor::SkipArtifact | PDFStructureTreeTextExtractor::AdjustReversedText | PDFStructureTreeTextExtractor::CreateTreeMapping; + options.setFlag(PDFStructureTreeTextExtractor::BoundingBoxes, m_calculateBoundingBoxes); + PDFStructureTreeTextExtractor extractor(document, &structureTree, options); extractor.perform(pageIndices); PDFDocumentTextFlow::Items flowItems; @@ -691,21 +735,23 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume case Algorithm::Content: { - PDFStructureTreeTextExtractor extractor(document, &structureTree, PDFStructureTreeTextExtractor::None); + PDFStructureTreeTextExtractor::Options options = PDFStructureTreeTextExtractor::None; + options.setFlag(PDFStructureTreeTextExtractor::BoundingBoxes, m_calculateBoundingBoxes); + PDFStructureTreeTextExtractor extractor(document, &structureTree, options); extractor.perform(pageIndices); PDFDocumentTextFlow::Items flowItems; for (PDFInteger pageIndex : pageIndices) { - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageStart, pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1) }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart }); for (const PDFStructureTreeTextItem& sequenceItem : extractor.getTextSequence(pageIndex)) { if (sequenceItem.type == PDFStructureTreeTextItem::Type::Text) { - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::Text, pageIndex, sequenceItem.text }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text }); } } - flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageEnd, pageIndex, QString() }); + flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd }); } result = PDFDocumentTextFlow(qMove(flowItems)); @@ -721,4 +767,77 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume return result; } +PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* document, Algorithm algorithm) +{ + std::vector pageIndices; + pageIndices.resize(document->getCatalog()->getPageCount(), 0); + std::iota(pageIndices.begin(), pageIndices.end(), 0); + + return create(document, pageIndices, algorithm); +} + +void PDFDocumentTextFlowFactory::setCalculateBoundingBoxes(bool calculateBoundingBoxes) +{ + m_calculateBoundingBoxes = calculateBoundingBoxes; +} + +void PDFDocumentTextFlowEditor::setTextFlow(PDFDocumentTextFlow textFlow) +{ + m_originalTextFlow = std::move(textFlow); + createEditedFromOriginalTextFlow(); +} + +void PDFDocumentTextFlowEditor::removeItem(size_t index) +{ + getEditedItem(index)->editedItemFlags.setFlag(Removed, true); +} + +void PDFDocumentTextFlowEditor::addItem(size_t index) +{ + getEditedItem(index)->editedItemFlags.setFlag(Removed, false); +} + +void PDFDocumentTextFlowEditor::clear() +{ + m_originalTextFlow = PDFDocumentTextFlow(); + m_editedTextFlow.clear(); +} + +void PDFDocumentTextFlowEditor::setText(const QString& text, size_t index) +{ + EditedItem* item = getEditedItem(index); + item->text = text; + updateModifiedFlag(index); +} + +void PDFDocumentTextFlowEditor::createEditedFromOriginalTextFlow() +{ + const size_t count = m_originalTextFlow.getSize(); + m_editedTextFlow.reserve(count); + + for (size_t i = 0; i < count; ++i) + { + const PDFDocumentTextFlow::Item* originalItem = getOriginalItem(i); + + if (originalItem->text.isEmpty()) + { + continue; + } + + EditedItem editedItem; + static_cast(editedItem) = *originalItem; + editedItem.originalIndex = i; + editedItem.editedItemFlags = None; + m_editedTextFlow.emplace_back(std::move(editedItem)); + } +} + +void PDFDocumentTextFlowEditor::updateModifiedFlag(size_t index) +{ + const bool isModified = getText(index) != getOriginalItem(index)->text; + + EditedItem* item = getEditedItem(index); + item->editedItemFlags.setFlag(Modified, isModified); +} + } // namespace pdf diff --git a/Pdf4QtLib/sources/pdfdocumenttextflow.h b/Pdf4QtLib/sources/pdfdocumenttextflow.h index 3b577dc..15e8d93 100644 --- a/Pdf4QtLib/sources/pdfdocumenttextflow.h +++ b/Pdf4QtLib/sources/pdfdocumenttextflow.h @@ -51,9 +51,10 @@ public: struct Item { - Flags flags = None; + QRectF boundingRect; ///< Bounding rect in page coordinates PDFInteger pageIndex = 0; QString text; + Flags flags = None; }; using Items = std::vector; @@ -66,6 +67,13 @@ public: const Items& getItems() const { return m_items; } + /// Returns item at a given index + /// \param index Index + const Item* getItem(size_t index) const { return &m_items.at(index); } + + /// Returns text flow item count + size_t getSize() const { return m_items.size(); } + /// Returns true, if text flow is empty bool isEmpty() const { return m_items.empty(); } @@ -96,14 +104,93 @@ public: const std::vector& pageIndices, Algorithm algorithm); + /// Performs document text flow analysis using given algorithm. Text flow + /// is created for all pages. + /// \param document Document + /// \param algorithm Algorithm + PDFDocumentTextFlow create(const PDFDocument* document, Algorithm algorithm); + /// Has some error/warning occured during text layout creation? bool hasError() const { return !m_errors.isEmpty(); } /// Returns a list of errors/warnings const QList& getErrors() const { return m_errors; } + /// Sets if bounding boxes for text blocks should be calculated + /// \param calculateBoundingBoxes Perform bounding box calculation? + void setCalculateBoundingBoxes(bool calculateBoundingBoxes); + private: QList m_errors; + bool m_calculateBoundingBoxes = false; +}; + +/// Editor which can edit document text flow, modify user text, +/// change order of text items, restore original state of a text flow, +/// and many other features. +class PDF4QTLIBSHARED_EXPORT PDFDocumentTextFlowEditor +{ +public: + inline PDFDocumentTextFlowEditor() = default; + + /// Sets a text flow and initializes edited text flow + /// \param textFlow Text flow + void setTextFlow(PDFDocumentTextFlow textFlow); + + void removeItem(size_t index); + void addItem(size_t index); + + void clear(); + + enum EditedItemFlag + { + None = 0x0000, + Removed = 0x0001, + Modified = 0x0002 + }; + Q_DECLARE_FLAGS(EditedItemFlags, EditedItemFlag) + + struct EditedItem : public PDFDocumentTextFlow::Item + { + size_t originalIndex = 0; ///< Index of original item + EditedItemFlags editedItemFlags = None; + }; + + using EditedItems = std::vector; + + /// Returns true, if item is active + /// \param index Index + bool isActive(size_t index) const { return !getEditedItem(index)->editedItemFlags.testFlag(Removed); } + + /// Returns true, if item is removed + /// \param index Index + bool isRemoved(size_t index) const { return !isActive(index); } + + /// Returns true, if item is modified + /// \param index Index + bool isModified(size_t index) const { return getEditedItem(index)->editedItemFlags.testFlag(Modified); } + + /// Returns edited text (or original, if edited text is not modified) + /// for a given index. + /// \param index Index + const QString& getText(size_t index) const { return getEditedItem(index)->text; } + + /// Sets edited text for a given index + void setText(const QString& text, size_t index); + + /// Returns true, if text flow is empty + bool isEmpty() const { return m_originalTextFlow.isEmpty(); } + +private: + void createEditedFromOriginalTextFlow(); + void updateModifiedFlag(size_t index); + + const PDFDocumentTextFlow::Item* getOriginalItem(size_t index) const { return m_originalTextFlow.getItem(index); } + EditedItem* getEditedItem(size_t index) { return &m_editedTextFlow.at(index); } + const EditedItem* getEditedItem(size_t index) const { return &m_editedTextFlow.at(index); } + + PDFDocumentTextFlow m_originalTextFlow; + EditedItems m_editedTextFlow; }; } // namespace pdf diff --git a/Pdf4QtLib/sources/pdftextlayout.cpp b/Pdf4QtLib/sources/pdftextlayout.cpp index 1c5a784..46ca667 100644 --- a/Pdf4QtLib/sources/pdftextlayout.cpp +++ b/Pdf4QtLib/sources/pdftextlayout.cpp @@ -1174,6 +1174,7 @@ QString PDFTextFlow::getText(const PDFCharacterPointer& begin, const PDFCharacte void PDFTextFlow::merge(const PDFTextFlow& next) { m_text += next.m_text; + m_boundingBox = m_boundingBox.united(next.m_boundingBox); m_characterPointers.insert(m_characterPointers.end(), next.m_characterPointers.cbegin(), next.m_characterPointers.cend()); } @@ -1204,6 +1205,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags for (const PDFTextBlock& textBlock : layout.getTextBlocks()) { PDFTextFlow currentFlow; + currentFlow.m_boundingBox = textBlock.getBoundingBox().controlPointRect(); size_t textLineIndex = 0; for (const PDFTextLine& textLine : textBlock.getLines()) diff --git a/Pdf4QtLib/sources/pdftextlayout.h b/Pdf4QtLib/sources/pdftextlayout.h index 52ba44b..56c0779 100644 --- a/Pdf4QtLib/sources/pdftextlayout.h +++ b/Pdf4QtLib/sources/pdftextlayout.h @@ -305,6 +305,9 @@ public: /// Merge data from \p next flow (i.e. connect two consecutive flows) void merge(const PDFTextFlow& next); + /// Returns bounding box of a text flow on the page + QRectF getBoundingBox() const { return m_boundingBox; } + /// Creates text flows from text layout, according to creation flags. /// \param layout Layout, from which is text flow created /// \param flags Flow creation flags @@ -325,6 +328,7 @@ private: QString getContext(size_t index, size_t length) const; QString m_text; + QRectF m_boundingBox; std::vector m_characterPointers; }; diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.json b/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.json new file mode 100644 index 0000000..15fe577 --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.json @@ -0,0 +1,7 @@ +{ + "Name" : "AudioBook", + "Author" : "Jakub Melka", + "Version" : "1.0.0", + "License" : "LGPL v3", + "Description" : "Convert document to an audio book." +} diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.pro b/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.pro new file mode 100644 index 0000000..f7b404c --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/AudioBookPlugin.pro @@ -0,0 +1,48 @@ +# Copyright (C) 2021 Jakub Melka +# +# This file is part of PDF4QT. +# +# PDF4QT is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# with the written consent of the copyright owner, any later version. +# +# PDF4QT is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with PDF4QT. If not, see . + +TEMPLATE = lib +DEFINES += AUDIOBOOKPLUGIN_LIBRARY + +QT += gui widgets + +LIBS += -L$$OUT_PWD/../.. + +LIBS += -lPdf4QtLib + +QMAKE_CXXFLAGS += /std:c++latest /utf-8 + +INCLUDEPATH += $$PWD/../../Pdf4QtLib/Sources + +DESTDIR = $$OUT_PWD/../../pdfplugins + +CONFIG += c++11 + +SOURCES += \ + audiobookplugin.cpp + +HEADERS += \ + audiobookplugin.h + +CONFIG += force_debug_info + +DISTFILES += \ + AudioBookPlugin.json + +RESOURCES += \ + icons.qrc + diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.cpp b/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.cpp new file mode 100644 index 0000000..0743d34 --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2021 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#include "audiobookplugin.h" + +#include + +namespace pdfplugin +{ + +AudioBookPlugin::AudioBookPlugin() : + pdf::PDFPlugin(nullptr), + m_createTextStreamAction(nullptr) +{ + +} + +void AudioBookPlugin::setWidget(pdf::PDFWidget* widget) +{ + Q_ASSERT(!m_widget); + + BaseClass::setWidget(widget); + + m_createTextStreamAction = new QAction(QIcon(":/pdfplugins/audiobook/create-text-stream.svg"), tr("Create Text Stream for Audio Book"), this); + m_createTextStreamAction->setObjectName("actionAudioBook_CreateTextStream"); + + connect(m_createTextStreamAction, &QAction::triggered, this, &AudioBookPlugin::onCreateTextStreamTriggered); + + updateActions(); +} + +void AudioBookPlugin::setDocument(const pdf::PDFModifiedDocument& document) +{ + BaseClass::setDocument(document); + + if (document.hasReset()) + { + m_textFlowEditor.clear(); + updateActions(); + } +} + +std::vector AudioBookPlugin::getActions() const +{ + return { m_createTextStreamAction }; +} + +void AudioBookPlugin::onCreateTextStreamTriggered() +{ + Q_ASSERT(m_document); + + if (!m_textFlowEditor.isEmpty()) + { + return; + } + + pdf::PDFDocumentTextFlowFactory factory; + pdf::PDFDocumentTextFlow textFlow = factory.create(m_document, pdf::PDFDocumentTextFlowFactory::Algorithm::Auto); + m_textFlowEditor.setTextFlow(std::move(textFlow)); +} + +void AudioBookPlugin::updateActions() +{ + m_createTextStreamAction->setEnabled(m_document); +} + +} // namespace pdfplugin diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.h b/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.h new file mode 100644 index 0000000..917c1a4 --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/audiobookplugin.h @@ -0,0 +1,56 @@ +// Copyright (C) 2021 Jakub Melka +// +// This file is part of PDF4QT. +// +// PDF4QT is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// with the written consent of the copyright owner, any later version. +// +// PDF4QT is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDF4QT. If not, see . + +#ifndef AUDIOBOOKPLUGIN_H +#define AUDIOBOOKPLUGIN_H + +#include "pdfplugin.h" +#include "pdfdocumenttextflow.h" + +#include + +namespace pdfplugin +{ + +class AudioBookPlugin : public pdf::PDFPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID "PDF4QT.AudioBookPlugin" FILE "AudioBookPlugin.json") + +private: + using BaseClass = pdf::PDFPlugin; + +public: + AudioBookPlugin(); + + virtual void setWidget(pdf::PDFWidget* widget) override; + virtual void setDocument(const pdf::PDFModifiedDocument& document) override; + virtual std::vector getActions() const override; + +private: + void onCreateTextStreamTriggered(); + + void updateActions(); + + QAction* m_createTextStreamAction; + + pdf::PDFDocumentTextFlowEditor m_textFlowEditor; +}; + +} // namespace pdfplugin + +#endif // AUDIOBOOKPLUGIN_H diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/create-text-stream.svg b/Pdf4QtViewerPlugins/AudioBookPlugin/create-text-stream.svg new file mode 100644 index 0000000..4d77c0b --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/create-text-stream.svg @@ -0,0 +1,100 @@ + + + + + + + + + + + + image/svg+xml + + + + + + Jakub Melka + + + + + + + + + + + + + + + + + + + + diff --git a/Pdf4QtViewerPlugins/AudioBookPlugin/icons.qrc b/Pdf4QtViewerPlugins/AudioBookPlugin/icons.qrc new file mode 100644 index 0000000..436de28 --- /dev/null +++ b/Pdf4QtViewerPlugins/AudioBookPlugin/icons.qrc @@ -0,0 +1,5 @@ + + + create-text-stream.svg + + diff --git a/Pdf4QtViewerPlugins/Pdf4QtViewerPlugins.pro b/Pdf4QtViewerPlugins/Pdf4QtViewerPlugins.pro index bafb7e3..10ecb5d 100644 --- a/Pdf4QtViewerPlugins/Pdf4QtViewerPlugins.pro +++ b/Pdf4QtViewerPlugins/Pdf4QtViewerPlugins.pro @@ -23,6 +23,7 @@ SUBDIRS += \ SoftProofingPlugin \ RedactPlugin \ OutputPreviewPlugin \ - ObjectInspectorPlugin + ObjectInspectorPlugin \ + AudioBookPlugin diff --git a/README.txt b/README.txt index 49ab4c5..355bfe2 100644 --- a/README.txt +++ b/README.txt @@ -43,6 +43,8 @@ Software have following features (the list is not complete): - file attachments - optimalization (compressing documents) - command line tool + - audio book conversion + - internal structure inspector 4. THIRD PARTY LIBRARIES