AudioBook Plugin: Initial commit, document text flow adjustments

This commit is contained in:
Jakub Melka
2021-08-15 17:34:05 +02:00
parent 0559cd7c50
commit 354a93f4d6
12 changed files with 547 additions and 35 deletions

View File

@ -89,19 +89,21 @@ struct PDFStructureTreeTextItem
}; };
PDFStructureTreeTextItem() = default; PDFStructureTreeTextItem() = default;
PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text) : PDFStructureTreeTextItem(Type type, const PDFStructureItem* item, QString text, PDFInteger pageIndex, QRectF boundingRect) :
type(type), item(item), text(qMove(text)) type(type), item(item), text(qMove(text)), pageIndex(pageIndex), boundingRect(boundingRect)
{ {
} }
static PDFStructureTreeTextItem createText(QString text) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text)); } static PDFStructureTreeTextItem createText(QString text, PDFInteger pageIndex, QRectF boundingRect) { return PDFStructureTreeTextItem(Type::Text, nullptr, qMove(text), pageIndex, boundingRect); }
static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString()); } static PDFStructureTreeTextItem createStartTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::StartTag, item, QString(), -1, QRectF()); }
static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString()); } static PDFStructureTreeTextItem createEndTag(const PDFStructureItem* item) { return PDFStructureTreeTextItem(Type::EndTag, item, QString(), -1, QRectF()); }
Type type = Type::Text; Type type = Type::Text;
const PDFStructureItem* item = nullptr; const PDFStructureItem* item = nullptr;
QString text; QString text;
PDFInteger pageIndex = -1;
QRectF boundingRect;
}; };
using PDFStructureTreeTextSequence = std::vector<PDFStructureTreeTextItem>; using PDFStructureTreeTextSequence = std::vector<PDFStructureTreeTextItem>;
@ -118,6 +120,7 @@ public:
SkipArtifact = 0x0001, ///< Skip content marked as 'Artifact' SkipArtifact = 0x0001, ///< Skip content marked as 'Artifact'
AdjustReversedText = 0x0002, ///< Adjust reversed text AdjustReversedText = 0x0002, ///< Adjust reversed text
CreateTreeMapping = 0x0004, ///< Create text mapping to structure tree item CreateTreeMapping = 0x0004, ///< Create text mapping to structure tree item
BoundingBoxes = 0x0008, ///< Compute bounding boxes of the texts
}; };
Q_DECLARE_FLAGS(Options, Option) Q_DECLARE_FLAGS(Options, Option)
@ -139,11 +142,20 @@ public:
/// \param pageNumber Page number /// \param pageNumber Page number
const PDFStructureTreeTextSequence& getTextSequence(PDFInteger pageNumber) const; const PDFStructureTreeTextSequence& getTextSequence(PDFInteger pageNumber) const;
struct TextItem
{
QRectF boundingRect;
PDFInteger pageIndex = -1;
QString text;
};
using TextItems = std::vector<TextItem>;
/// Returns text for given structure tree item. If structure tree item /// Returns text for given structure tree item. If structure tree item
/// is not found, then empty list is returned. This functionality /// is not found, then empty list is returned. This functionality
/// requires, that \p CreateTreeMapping flag is being set. /// requires, that \p CreateTreeMapping flag is being set.
/// \param item Item /// \param item Item
const QStringList& getText(const PDFStructureItem* item) const; const TextItems& getText(const PDFStructureItem* item) const;
private: private:
QList<PDFRenderError> m_errors; QList<PDFRenderError> m_errors;
@ -151,7 +163,7 @@ private:
const PDFStructureTree* m_tree; const PDFStructureTree* m_tree;
QStringList m_unmatchedText; QStringList m_unmatchedText;
std::map<PDFInteger, PDFStructureTreeTextSequence> m_textSequences; std::map<PDFInteger, PDFStructureTreeTextSequence> m_textSequences;
std::map<const PDFStructureItem*, QStringList> m_textForItems; std::map<const PDFStructureItem*, TextItems> m_textForItems;
Options m_options; Options m_options;
}; };
@ -177,7 +189,8 @@ public:
m_features(features), m_features(features),
m_tree(tree), m_tree(tree),
m_mapping(mapping), m_mapping(mapping),
m_extractorOptions(extractorOptions) m_extractorOptions(extractorOptions),
m_pageIndex(document->getCatalog()->getPageIndexFromPageReference(page->getPageReference()))
{ {
} }
@ -191,6 +204,7 @@ protected:
virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override; virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override;
virtual void performMarkedContentBegin(const QByteArray& tag, const PDFObject& properties) override; virtual void performMarkedContentBegin(const QByteArray& tag, const PDFObject& properties) override;
virtual void performMarkedContentEnd() override; virtual void performMarkedContentEnd() override;
virtual void performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule) override;
private: private:
const PDFStructureItem* getStructureTreeItemFromMCID(PDFInteger mcid) const; const PDFStructureItem* getStructureTreeItemFromMCID(PDFInteger mcid) const;
@ -213,11 +227,35 @@ private:
const std::map<PDFObjectReference, const PDFStructureItem*>* m_mapping; const std::map<PDFObjectReference, const PDFStructureItem*>* m_mapping;
std::vector<MarkedContentInfo> m_markedContentInfoStack; std::vector<MarkedContentInfo> m_markedContentInfoStack;
QString m_currentText; QString m_currentText;
QRectF m_currentBoundingBox;
PDFStructureTreeTextSequence m_textSequence; PDFStructureTreeTextSequence m_textSequence;
QStringList m_unmatchedText; QStringList m_unmatchedText;
PDFStructureTreeTextExtractor::Options m_extractorOptions; PDFStructureTreeTextExtractor::Options m_extractorOptions;
PDFInteger m_pageIndex;
}; };
void PDFStructureTreeTextContentProcessor::performPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule)
{
if (!text)
{
// Jakub Melka: This should not occur
return;
}
if (!m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes))
{
return;
}
Q_UNUSED(stroke);
Q_UNUSED(fill);
Q_UNUSED(fillRule);
QMatrix matrix = getCurrentWorldMatrix();
QPainterPath worldPath = matrix.map(path);
m_currentBoundingBox = m_currentBoundingBox.united(worldPath.controlPointRect());
}
void PDFStructureTreeTextContentProcessor::finishText() void PDFStructureTreeTextContentProcessor::finishText()
{ {
m_currentText = m_currentText.trimmed(); m_currentText = m_currentText.trimmed();
@ -233,9 +271,10 @@ void PDFStructureTreeTextContentProcessor::finishText()
} }
m_currentText = qMove(reversed); m_currentText = qMove(reversed);
} }
m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText))); m_textSequence.emplace_back(PDFStructureTreeTextItem::createText(qMove(m_currentText), m_pageIndex, m_currentBoundingBox));
} }
m_currentText = QString(); m_currentText = QString();
m_currentBoundingBox = QRectF();
} }
bool PDFStructureTreeTextContentProcessor::isArtifact() const bool PDFStructureTreeTextContentProcessor::isArtifact() const
@ -306,6 +345,7 @@ void PDFStructureTreeTextContentProcessor::performMarkedContentEnd()
{ {
m_unmatchedText << qMove(m_currentText); m_unmatchedText << qMove(m_currentText);
} }
m_currentBoundingBox = QRectF();
} }
} }
@ -333,8 +373,10 @@ bool PDFStructureTreeTextContentProcessor::isContentKindSuppressed(ContentKind k
{ {
switch (kind) switch (kind)
{ {
case ContentKind::Shapes:
case ContentKind::Text: case ContentKind::Text:
return !m_extractorOptions.testFlag(PDFStructureTreeTextExtractor::BoundingBoxes);
case ContentKind::Shapes:
case ContentKind::Images: case ContentKind::Images:
case ContentKind::Shading: case ContentKind::Shading:
return true; return true;
@ -430,7 +472,7 @@ void PDFStructureTreeTextExtractor::perform(const std::vector<PDFInteger>& pageI
case PDFStructureTreeTextItem::Type::Text: case PDFStructureTreeTextItem::Type::Text:
if (!stack.empty()) if (!stack.empty())
{ {
m_textForItems[stack.top()] << sequenceItem.text; m_textForItems[stack.top()].emplace_back(TextItem{ sequenceItem.boundingRect, sequenceItem.pageIndex, sequenceItem.text });
} }
break; break;
} }
@ -451,7 +493,7 @@ const PDFStructureTreeTextSequence& PDFStructureTreeTextExtractor::getTextSequen
return dummy; return dummy;
} }
const QStringList& PDFStructureTreeTextExtractor::getText(const PDFStructureItem* item) const const PDFStructureTreeTextExtractor::TextItems& PDFStructureTreeTextExtractor::getText(const PDFStructureItem* item) const
{ {
auto it = m_textForItems.find(item); auto it = m_textForItems.find(item);
if (it != m_textForItems.cend()) if (it != m_textForItems.cend())
@ -459,7 +501,7 @@ const QStringList& PDFStructureTreeTextExtractor::getText(const PDFStructureItem
return it->second; return it->second;
} }
static const QStringList dummy; static const TextItems dummy;
return dummy; return dummy;
} }
@ -489,9 +531,9 @@ private:
void PDFStructureTreeTextFlowCollector::visitStructureTree(const PDFStructureTree* structureTree) void PDFStructureTreeTextFlowCollector::visitStructureTree(const PDFStructureTree* structureTree)
{ {
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemStart, -1, QString()}); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemStart});
acceptChildren(structureTree); acceptChildren(structureTree);
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemEnd, -1, QString()}); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemEnd});
} }
void PDFStructureTreeTextFlowCollector::markHasContent() void PDFStructureTreeTextFlowCollector::markHasContent()
@ -505,7 +547,7 @@ void PDFStructureTreeTextFlowCollector::markHasContent()
void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructureElement* structureElement) void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructureElement* structureElement)
{ {
size_t index = m_items->size(); size_t index = m_items->size();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemStart, -1, QString()}); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemStart});
// Mark stack so we can delete unused items // Mark stack so we can delete unused items
m_hasContentStack.push_back(false); m_hasContentStack.push_back(false);
@ -520,43 +562,43 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure
if (!title.isEmpty()) if (!title.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureTitle, -1, }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureTitle});
} }
if (!language.isEmpty()) if (!language.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureLanguage, -1, language }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, language, PDFDocumentTextFlow::StructureLanguage });
} }
if (!alternativeDescription.isEmpty()) if (!alternativeDescription.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureAlternativeDescription, -1, alternativeDescription }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, alternativeDescription, PDFDocumentTextFlow::StructureAlternativeDescription });
} }
if (!expandedForm.isEmpty()) if (!expandedForm.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureExpandedForm, -1, expandedForm }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, expandedForm, PDFDocumentTextFlow::StructureExpandedForm });
} }
if (!actualText.isEmpty()) if (!actualText.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureActualText, -1, actualText }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, actualText, PDFDocumentTextFlow::StructureActualText });
} }
if (!phoneme.isEmpty()) if (!phoneme.isEmpty())
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructurePhoneme, -1, phoneme }); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, phoneme, PDFDocumentTextFlow::StructurePhoneme });
} }
for (const QString& string : m_extractor->getText(structureElement)) for (const auto& textItem : m_extractor->getText(structureElement))
{ {
markHasContent(); markHasContent();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::Text, -1, string}); m_items->push_back(PDFDocumentTextFlow::Item{ textItem.boundingRect, textItem.pageIndex, textItem.text, PDFDocumentTextFlow::Text });
} }
acceptChildren(structureElement); acceptChildren(structureElement);
@ -564,7 +606,7 @@ void PDFStructureTreeTextFlowCollector::visitStructureElement(const PDFStructure
const bool hasContent = m_hasContentStack.back(); const bool hasContent = m_hasContentStack.back();
m_hasContentStack.pop_back(); m_hasContentStack.pop_back();
m_items->push_back(PDFDocumentTextFlow::Item{PDFDocumentTextFlow::StructureItemEnd, -1, QString()}); m_items->push_back(PDFDocumentTextFlow::Item{ QRectF(), -1, QString(), PDFDocumentTextFlow::StructureItemEnd });
if (!hasContent) if (!hasContent)
{ {
@ -643,12 +685,12 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
PDFTextFlows textFlows = PDFTextFlow::createTextFlows(textLayout, PDFTextFlow::FlowFlags(PDFTextFlow::SeparateBlocks) | PDFTextFlow::RemoveSoftHyphen, pageIndex); PDFTextFlows textFlows = PDFTextFlow::createTextFlows(textLayout, PDFTextFlow::FlowFlags(PDFTextFlow::SeparateBlocks) | PDFTextFlow::RemoveSoftHyphen, pageIndex);
PDFDocumentTextFlow::Items flowItems; PDFDocumentTextFlow::Items flowItems;
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageStart, pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1) }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart });
for (const PDFTextFlow& textFlow : textFlows) for (const PDFTextFlow& textFlow : textFlows)
{ {
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::Text, pageIndex, textFlow.getText() }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ textFlow.getBoundingBox(), pageIndex, textFlow.getText(), PDFDocumentTextFlow::Text });
} }
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageEnd, pageIndex, QString() }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
QMutexLocker lock(&mutex); QMutexLocker lock(&mutex);
items[pageIndex] = qMove(flowItems); items[pageIndex] = qMove(flowItems);
@ -677,7 +719,9 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
break; break;
} }
PDFStructureTreeTextExtractor extractor(document, &structureTree, PDFStructureTreeTextExtractor::SkipArtifact | PDFStructureTreeTextExtractor::AdjustReversedText | PDFStructureTreeTextExtractor::CreateTreeMapping); PDFStructureTreeTextExtractor::Options options = PDFStructureTreeTextExtractor::SkipArtifact | PDFStructureTreeTextExtractor::AdjustReversedText | PDFStructureTreeTextExtractor::CreateTreeMapping;
options.setFlag(PDFStructureTreeTextExtractor::BoundingBoxes, m_calculateBoundingBoxes);
PDFStructureTreeTextExtractor extractor(document, &structureTree, options);
extractor.perform(pageIndices); extractor.perform(pageIndices);
PDFDocumentTextFlow::Items flowItems; PDFDocumentTextFlow::Items flowItems;
@ -691,21 +735,23 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
case Algorithm::Content: case Algorithm::Content:
{ {
PDFStructureTreeTextExtractor extractor(document, &structureTree, PDFStructureTreeTextExtractor::None); PDFStructureTreeTextExtractor::Options options = PDFStructureTreeTextExtractor::None;
options.setFlag(PDFStructureTreeTextExtractor::BoundingBoxes, m_calculateBoundingBoxes);
PDFStructureTreeTextExtractor extractor(document, &structureTree, options);
extractor.perform(pageIndices); extractor.perform(pageIndices);
PDFDocumentTextFlow::Items flowItems; PDFDocumentTextFlow::Items flowItems;
for (PDFInteger pageIndex : pageIndices) for (PDFInteger pageIndex : pageIndices)
{ {
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageStart, pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1) }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, PDFTranslationContext::tr("Page %1").arg(pageIndex + 1), PDFDocumentTextFlow::PageStart });
for (const PDFStructureTreeTextItem& sequenceItem : extractor.getTextSequence(pageIndex)) for (const PDFStructureTreeTextItem& sequenceItem : extractor.getTextSequence(pageIndex))
{ {
if (sequenceItem.type == PDFStructureTreeTextItem::Type::Text) if (sequenceItem.type == PDFStructureTreeTextItem::Type::Text)
{ {
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::Text, pageIndex, sequenceItem.text }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ sequenceItem.boundingRect, pageIndex, sequenceItem.text, PDFDocumentTextFlow::Text });
} }
} }
flowItems.emplace_back(PDFDocumentTextFlow::Item{ PDFDocumentTextFlow::PageEnd, pageIndex, QString() }); flowItems.emplace_back(PDFDocumentTextFlow::Item{ QRectF(), pageIndex, QString(), PDFDocumentTextFlow::PageEnd });
} }
result = PDFDocumentTextFlow(qMove(flowItems)); result = PDFDocumentTextFlow(qMove(flowItems));
@ -721,4 +767,77 @@ PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* docume
return result; return result;
} }
PDFDocumentTextFlow PDFDocumentTextFlowFactory::create(const PDFDocument* document, Algorithm algorithm)
{
std::vector<pdf::PDFInteger> pageIndices;
pageIndices.resize(document->getCatalog()->getPageCount(), 0);
std::iota(pageIndices.begin(), pageIndices.end(), 0);
return create(document, pageIndices, algorithm);
}
void PDFDocumentTextFlowFactory::setCalculateBoundingBoxes(bool calculateBoundingBoxes)
{
m_calculateBoundingBoxes = calculateBoundingBoxes;
}
void PDFDocumentTextFlowEditor::setTextFlow(PDFDocumentTextFlow textFlow)
{
m_originalTextFlow = std::move(textFlow);
createEditedFromOriginalTextFlow();
}
void PDFDocumentTextFlowEditor::removeItem(size_t index)
{
getEditedItem(index)->editedItemFlags.setFlag(Removed, true);
}
void PDFDocumentTextFlowEditor::addItem(size_t index)
{
getEditedItem(index)->editedItemFlags.setFlag(Removed, false);
}
void PDFDocumentTextFlowEditor::clear()
{
m_originalTextFlow = PDFDocumentTextFlow();
m_editedTextFlow.clear();
}
void PDFDocumentTextFlowEditor::setText(const QString& text, size_t index)
{
EditedItem* item = getEditedItem(index);
item->text = text;
updateModifiedFlag(index);
}
void PDFDocumentTextFlowEditor::createEditedFromOriginalTextFlow()
{
const size_t count = m_originalTextFlow.getSize();
m_editedTextFlow.reserve(count);
for (size_t i = 0; i < count; ++i)
{
const PDFDocumentTextFlow::Item* originalItem = getOriginalItem(i);
if (originalItem->text.isEmpty())
{
continue;
}
EditedItem editedItem;
static_cast<PDFDocumentTextFlow::Item&>(editedItem) = *originalItem;
editedItem.originalIndex = i;
editedItem.editedItemFlags = None;
m_editedTextFlow.emplace_back(std::move(editedItem));
}
}
void PDFDocumentTextFlowEditor::updateModifiedFlag(size_t index)
{
const bool isModified = getText(index) != getOriginalItem(index)->text;
EditedItem* item = getEditedItem(index);
item->editedItemFlags.setFlag(Modified, isModified);
}
} // namespace pdf } // namespace pdf

View File

@ -51,9 +51,10 @@ public:
struct Item struct Item
{ {
Flags flags = None; QRectF boundingRect; ///< Bounding rect in page coordinates
PDFInteger pageIndex = 0; PDFInteger pageIndex = 0;
QString text; QString text;
Flags flags = None;
}; };
using Items = std::vector<Item>; using Items = std::vector<Item>;
@ -66,6 +67,13 @@ public:
const Items& getItems() const { return m_items; } const Items& getItems() const { return m_items; }
/// Returns item at a given index
/// \param index Index
const Item* getItem(size_t index) const { return &m_items.at(index); }
/// Returns text flow item count
size_t getSize() const { return m_items.size(); }
/// Returns true, if text flow is empty /// Returns true, if text flow is empty
bool isEmpty() const { return m_items.empty(); } bool isEmpty() const { return m_items.empty(); }
@ -96,14 +104,93 @@ public:
const std::vector<PDFInteger>& pageIndices, const std::vector<PDFInteger>& pageIndices,
Algorithm algorithm); Algorithm algorithm);
/// Performs document text flow analysis using given algorithm. Text flow
/// is created for all pages.
/// \param document Document
/// \param algorithm Algorithm
PDFDocumentTextFlow create(const PDFDocument* document, Algorithm algorithm);
/// Has some error/warning occured during text layout creation? /// Has some error/warning occured during text layout creation?
bool hasError() const { return !m_errors.isEmpty(); } bool hasError() const { return !m_errors.isEmpty(); }
/// Returns a list of errors/warnings /// Returns a list of errors/warnings
const QList<PDFRenderError>& getErrors() const { return m_errors; } const QList<PDFRenderError>& getErrors() const { return m_errors; }
/// Sets if bounding boxes for text blocks should be calculated
/// \param calculateBoundingBoxes Perform bounding box calculation?
void setCalculateBoundingBoxes(bool calculateBoundingBoxes);
private: private:
QList<PDFRenderError> m_errors; QList<PDFRenderError> m_errors;
bool m_calculateBoundingBoxes = false;
};
/// Editor which can edit document text flow, modify user text,
/// change order of text items, restore original state of a text flow,
/// and many other features.
class PDF4QTLIBSHARED_EXPORT PDFDocumentTextFlowEditor
{
public:
inline PDFDocumentTextFlowEditor() = default;
/// Sets a text flow and initializes edited text flow
/// \param textFlow Text flow
void setTextFlow(PDFDocumentTextFlow textFlow);
void removeItem(size_t index);
void addItem(size_t index);
void clear();
enum EditedItemFlag
{
None = 0x0000,
Removed = 0x0001,
Modified = 0x0002
};
Q_DECLARE_FLAGS(EditedItemFlags, EditedItemFlag)
struct EditedItem : public PDFDocumentTextFlow::Item
{
size_t originalIndex = 0; ///< Index of original item
EditedItemFlags editedItemFlags = None;
};
using EditedItems = std::vector<EditedItem>;
/// Returns true, if item is active
/// \param index Index
bool isActive(size_t index) const { return !getEditedItem(index)->editedItemFlags.testFlag(Removed); }
/// Returns true, if item is removed
/// \param index Index
bool isRemoved(size_t index) const { return !isActive(index); }
/// Returns true, if item is modified
/// \param index Index
bool isModified(size_t index) const { return getEditedItem(index)->editedItemFlags.testFlag(Modified); }
/// Returns edited text (or original, if edited text is not modified)
/// for a given index.
/// \param index Index
const QString& getText(size_t index) const { return getEditedItem(index)->text; }
/// Sets edited text for a given index
void setText(const QString& text, size_t index);
/// Returns true, if text flow is empty
bool isEmpty() const { return m_originalTextFlow.isEmpty(); }
private:
void createEditedFromOriginalTextFlow();
void updateModifiedFlag(size_t index);
const PDFDocumentTextFlow::Item* getOriginalItem(size_t index) const { return m_originalTextFlow.getItem(index); }
EditedItem* getEditedItem(size_t index) { return &m_editedTextFlow.at(index); }
const EditedItem* getEditedItem(size_t index) const { return &m_editedTextFlow.at(index); }
PDFDocumentTextFlow m_originalTextFlow;
EditedItems m_editedTextFlow;
}; };
} // namespace pdf } // namespace pdf

View File

@ -1174,6 +1174,7 @@ QString PDFTextFlow::getText(const PDFCharacterPointer& begin, const PDFCharacte
void PDFTextFlow::merge(const PDFTextFlow& next) void PDFTextFlow::merge(const PDFTextFlow& next)
{ {
m_text += next.m_text; m_text += next.m_text;
m_boundingBox = m_boundingBox.united(next.m_boundingBox);
m_characterPointers.insert(m_characterPointers.end(), next.m_characterPointers.cbegin(), next.m_characterPointers.cend()); m_characterPointers.insert(m_characterPointers.end(), next.m_characterPointers.cbegin(), next.m_characterPointers.cend());
} }
@ -1204,6 +1205,7 @@ PDFTextFlows PDFTextFlow::createTextFlows(const PDFTextLayout& layout, FlowFlags
for (const PDFTextBlock& textBlock : layout.getTextBlocks()) for (const PDFTextBlock& textBlock : layout.getTextBlocks())
{ {
PDFTextFlow currentFlow; PDFTextFlow currentFlow;
currentFlow.m_boundingBox = textBlock.getBoundingBox().controlPointRect();
size_t textLineIndex = 0; size_t textLineIndex = 0;
for (const PDFTextLine& textLine : textBlock.getLines()) for (const PDFTextLine& textLine : textBlock.getLines())

View File

@ -305,6 +305,9 @@ public:
/// Merge data from \p next flow (i.e. connect two consecutive flows) /// Merge data from \p next flow (i.e. connect two consecutive flows)
void merge(const PDFTextFlow& next); void merge(const PDFTextFlow& next);
/// Returns bounding box of a text flow on the page
QRectF getBoundingBox() const { return m_boundingBox; }
/// Creates text flows from text layout, according to creation flags. /// Creates text flows from text layout, according to creation flags.
/// \param layout Layout, from which is text flow created /// \param layout Layout, from which is text flow created
/// \param flags Flow creation flags /// \param flags Flow creation flags
@ -325,6 +328,7 @@ private:
QString getContext(size_t index, size_t length) const; QString getContext(size_t index, size_t length) const;
QString m_text; QString m_text;
QRectF m_boundingBox;
std::vector<PDFCharacterPointer> m_characterPointers; std::vector<PDFCharacterPointer> m_characterPointers;
}; };

View File

@ -0,0 +1,7 @@
{
"Name" : "AudioBook",
"Author" : "Jakub Melka",
"Version" : "1.0.0",
"License" : "LGPL v3",
"Description" : "Convert document to an audio book."
}

View File

@ -0,0 +1,48 @@
# Copyright (C) 2021 Jakub Melka
#
# This file is part of PDF4QT.
#
# PDF4QT is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# with the written consent of the copyright owner, any later version.
#
# PDF4QT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
TEMPLATE = lib
DEFINES += AUDIOBOOKPLUGIN_LIBRARY
QT += gui widgets
LIBS += -L$$OUT_PWD/../..
LIBS += -lPdf4QtLib
QMAKE_CXXFLAGS += /std:c++latest /utf-8
INCLUDEPATH += $$PWD/../../Pdf4QtLib/Sources
DESTDIR = $$OUT_PWD/../../pdfplugins
CONFIG += c++11
SOURCES += \
audiobookplugin.cpp
HEADERS += \
audiobookplugin.h
CONFIG += force_debug_info
DISTFILES += \
AudioBookPlugin.json
RESOURCES += \
icons.qrc

View File

@ -0,0 +1,81 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#include "audiobookplugin.h"
#include <QAction>
namespace pdfplugin
{
AudioBookPlugin::AudioBookPlugin() :
pdf::PDFPlugin(nullptr),
m_createTextStreamAction(nullptr)
{
}
void AudioBookPlugin::setWidget(pdf::PDFWidget* widget)
{
Q_ASSERT(!m_widget);
BaseClass::setWidget(widget);
m_createTextStreamAction = new QAction(QIcon(":/pdfplugins/audiobook/create-text-stream.svg"), tr("Create Text Stream for Audio Book"), this);
m_createTextStreamAction->setObjectName("actionAudioBook_CreateTextStream");
connect(m_createTextStreamAction, &QAction::triggered, this, &AudioBookPlugin::onCreateTextStreamTriggered);
updateActions();
}
void AudioBookPlugin::setDocument(const pdf::PDFModifiedDocument& document)
{
BaseClass::setDocument(document);
if (document.hasReset())
{
m_textFlowEditor.clear();
updateActions();
}
}
std::vector<QAction*> AudioBookPlugin::getActions() const
{
return { m_createTextStreamAction };
}
void AudioBookPlugin::onCreateTextStreamTriggered()
{
Q_ASSERT(m_document);
if (!m_textFlowEditor.isEmpty())
{
return;
}
pdf::PDFDocumentTextFlowFactory factory;
pdf::PDFDocumentTextFlow textFlow = factory.create(m_document, pdf::PDFDocumentTextFlowFactory::Algorithm::Auto);
m_textFlowEditor.setTextFlow(std::move(textFlow));
}
void AudioBookPlugin::updateActions()
{
m_createTextStreamAction->setEnabled(m_document);
}
} // namespace pdfplugin

View File

@ -0,0 +1,56 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of PDF4QT.
//
// PDF4QT is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// PDF4QT is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
#ifndef AUDIOBOOKPLUGIN_H
#define AUDIOBOOKPLUGIN_H
#include "pdfplugin.h"
#include "pdfdocumenttextflow.h"
#include <QObject>
namespace pdfplugin
{
class AudioBookPlugin : public pdf::PDFPlugin
{
Q_OBJECT
Q_PLUGIN_METADATA(IID "PDF4QT.AudioBookPlugin" FILE "AudioBookPlugin.json")
private:
using BaseClass = pdf::PDFPlugin;
public:
AudioBookPlugin();
virtual void setWidget(pdf::PDFWidget* widget) override;
virtual void setDocument(const pdf::PDFModifiedDocument& document) override;
virtual std::vector<QAction*> getActions() const override;
private:
void onCreateTextStreamTriggered();
void updateActions();
QAction* m_createTextStreamAction;
pdf::PDFDocumentTextFlowEditor m_textFlowEditor;
};
} // namespace pdfplugin
#endif // AUDIOBOOKPLUGIN_H

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 8.4 KiB

View File

@ -0,0 +1,5 @@
<RCC>
<qresource prefix="/pdfplugins/audiobook">
<file>create-text-stream.svg</file>
</qresource>
</RCC>

View File

@ -23,6 +23,7 @@ SUBDIRS += \
SoftProofingPlugin \ SoftProofingPlugin \
RedactPlugin \ RedactPlugin \
OutputPreviewPlugin \ OutputPreviewPlugin \
ObjectInspectorPlugin ObjectInspectorPlugin \
AudioBookPlugin

View File

@ -43,6 +43,8 @@ Software have following features (the list is not complete):
- file attachments - file attachments
- optimalization (compressing documents) - optimalization (compressing documents)
- command line tool - command line tool
- audio book conversion
- internal structure inspector
4. THIRD PARTY LIBRARIES 4. THIRD PARTY LIBRARIES