Text layouts for every page

This commit is contained in:
Jakub Melka 2019-12-31 17:39:31 +01:00
parent c803317b6b
commit c832c4ecef
11 changed files with 509 additions and 125 deletions

View File

@ -21,6 +21,8 @@
#include <QtConcurrent/QtConcurrent>
#include <execution>
namespace pdf
{
@ -72,6 +74,7 @@ void PDFAsynchronousPageCompiler::stop()
}
m_tasks.clear();
m_cache.clear();
m_textLayouts.dirty();
m_state = State::Inactive;
break;
@ -129,6 +132,17 @@ const PDFPrecompiledPage* PDFAsynchronousPageCompiler::getCompiledPage(PDFIntege
return page;
}
PDFTextLayout PDFAsynchronousPageCompiler::getTextLayout(PDFInteger pageIndex)
{
if (m_state != State::Active || !m_proxy->getDocument())
{
// Engine is not active, always return empty layout
return PDFTextLayout();
}
return m_textLayouts.get(this, &PDFAsynchronousPageCompiler::getTextLayoutsImpl).getTextLayout(pageIndex);
}
void PDFAsynchronousPageCompiler::onPageCompiled()
{
std::vector<PDFInteger> compiledPages;
@ -177,4 +191,115 @@ void PDFAsynchronousPageCompiler::onPageCompiled()
}
}
class PDFTextLayoutGenerator : public PDFPageContentProcessor
{
using BaseClass = PDFPageContentProcessor;
public:
explicit PDFTextLayoutGenerator(PDFRenderer::Features features,
const PDFPage* page,
const PDFDocument* document,
const PDFFontCache* fontCache,
const PDFCMS* cms,
const PDFOptionalContentActivity* optionalContentActivity,
QMatrix pagePointToDevicePointMatrix,
const PDFMeshQualitySettings& meshQualitySettings) :
BaseClass(page, document, fontCache, cms, optionalContentActivity, pagePointToDevicePointMatrix, meshQualitySettings),
m_features(features)
{
}
/// Creates text layout from the text
PDFTextLayout createTextLayout();
protected:
virtual bool isContentSuppressedByOC(PDFObjectReference ocgOrOcmd) override;
virtual bool isContentKindSuppressed(ContentKind kind) const override;
virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override;
private:
PDFRenderer::Features m_features;
PDFTextLayout m_textLayout;
};
PDFTextLayout PDFTextLayoutGenerator::createTextLayout()
{
m_textLayout.perform();
m_textLayout.optimize();
return qMove(m_textLayout);
}
bool PDFTextLayoutGenerator::isContentSuppressedByOC(PDFObjectReference ocgOrOcmd)
{
if (m_features.testFlag(PDFRenderer::IgnoreOptionalContent))
{
return false;
}
return PDFPageContentProcessor::isContentSuppressedByOC(ocgOrOcmd);
}
bool PDFTextLayoutGenerator::isContentKindSuppressed(ContentKind kind) const
{
switch (kind)
{
case ContentKind::Shapes:
case ContentKind::Text:
case ContentKind::Images:
case ContentKind::Shading:
return true;
case ContentKind::Tiling:
return false; // Tiling can have text
default:
{
Q_ASSERT(false);
break;
}
}
return false;
}
void PDFTextLayoutGenerator::performOutputCharacter(const PDFTextCharacterInfo& info)
{
m_textLayout.addCharacter(info);
}
PDFTextLayoutStorage PDFAsynchronousPageCompiler::getTextLayoutsImpl()
{
m_proxy->getFontCache()->setCacheShrinkEnabled(false);
const PDFCatalog* catalog = m_proxy->getDocument()->getCatalog();
PDFCMSPointer cms = m_proxy->getCMSManager()->getCurrentCMS();
PDFTextLayoutStorage result(catalog->getPageCount());
QMutex mutex;
auto generateTextLayout = [this, &result, &mutex, &cms, catalog](PDFInteger pageIndex)
{
if (!catalog->getPage(pageIndex))
{
// Invalid page index
result.setTextLayout(pageIndex, PDFTextLayout(), &mutex);
return;
}
const PDFPage* page = catalog->getPage(pageIndex);
Q_ASSERT(page);
PDFTextLayoutGenerator generator(m_proxy->getFeatures(), page, m_proxy->getDocument(), m_proxy->getFontCache(), cms.data(), m_proxy->getOptionalContentActivity(), QMatrix(), m_proxy->getMeshQualitySettings());
generator.processContents();
result.setTextLayout(pageIndex, generator.createTextLayout(), &mutex);
};
auto pageRange = PDFIntegerRange<PDFInteger>(0, catalog->getPageCount());
std::for_each(std::execution::parallel_policy(), pageRange.begin(), pageRange.end(), generateTextLayout);
// We allow font cache shrinking, when we aren't doing something in parallel.
m_proxy->getFontCache()->setCacheShrinkEnabled(m_tasks.empty());
return result;
}
} // namespace pdf

View File

@ -20,6 +20,7 @@
#include "pdfrenderer.h"
#include "pdfpainter.h"
#include "pdftextlayout.h"
#include <QCache>
#include <QFuture>
@ -72,6 +73,11 @@ public:
/// \param compile Compile the page, if it is not found in the cache
const PDFPrecompiledPage* getCompiledPage(PDFInteger pageIndex, bool compile);
/// Returns text layout of the page. If page index is invalid,
/// then empty text layout is returned.
/// \param pageIndex Page index
PDFTextLayout getTextLayout(PDFInteger pageIndex);
signals:
void pageImageChanged(bool all, const std::vector<PDFInteger>& pages);
void renderingError(PDFInteger pageIndex, const QList<PDFRenderError>& errors);
@ -79,6 +85,9 @@ signals:
private:
void onPageCompiled();
/// Returns text layouts for all pages
PDFTextLayoutStorage getTextLayoutsImpl();
struct CompileTask
{
QFuture<PDFPrecompiledPage> taskFuture;
@ -89,6 +98,7 @@ private:
State m_state = State::Inactive;
QCache<PDFInteger, PDFPrecompiledPage> m_cache;
std::map<PDFInteger, CompileTask> m_tasks;
PDFCachedItem<PDFTextLayoutStorage> m_textLayouts;
};
} // namespace pdf

View File

@ -626,7 +626,7 @@ void PDFDrawWidgetProxy::draw(QPainter* painter, QRect rect)
// Draw text blocks/text lines, if it is enabled
if (m_features.testFlag(PDFRenderer::DebugTextBlocks))
{
const PDFTextLayout& layout = compiledPage->getTextLayout();
PDFTextLayout layout = m_compiler->getTextLayout(item.pageIndex);
const PDFTextBlocks& textBlocks = layout.getTextBlocks();
painter->save();
@ -648,7 +648,7 @@ void PDFDrawWidgetProxy::draw(QPainter* painter, QRect rect)
}
if (m_features.testFlag(PDFRenderer::DebugTextLines))
{
const PDFTextLayout& layout = compiledPage->getTextLayout();
PDFTextLayout layout = m_compiler->getTextLayout(item.pageIndex);
const PDFTextBlocks& textBlocks = layout.getTextBlocks();
painter->save();

View File

@ -392,6 +392,12 @@ void PDFPageContentProcessor::performOutputCharacter(const PDFTextCharacterInfo&
Q_UNUSED(info);
}
bool PDFPageContentProcessor::isContentKindSuppressed(PDFPageContentProcessor::ContentKind kind) const
{
Q_UNUSED(kind);
return false;
}
bool PDFPageContentProcessor::isContentSuppressed() const
{
return std::any_of(m_markedContentStack.cbegin(), m_markedContentStack.cend(), [](const MarkedContentState& state) { return state.contentSuppressed; });
@ -630,7 +636,7 @@ void PDFPageContentProcessor::processForm(const QMatrix& matrix,
void PDFPageContentProcessor::processPathPainting(const QPainterPath& path, bool stroke, bool fill, bool text, Qt::FillRule fillRule)
{
if (isContentSuppressed())
if (isContentSuppressed() || (text && isContentKindSuppressed(ContentKind::Text)) || (!text && isContentKindSuppressed(ContentKind::Shapes)))
{
// Content is suppressed, do not paint anything
return;
@ -651,51 +657,59 @@ void PDFPageContentProcessor::processPathPainting(const QPainterPath& path, bool
{
case PatternType::Tiling:
{
const PDFTilingPattern* tilingPattern = pattern->getTilingPattern();
processTillingPatternPainting(tilingPattern, path, patternColorSpace->getUncoloredPatternColorSpace(), patternColorSpace->getUncoloredPatternColor());
if (!isContentKindSuppressed(ContentKind::Tiling))
{
// Tiling is enabled
const PDFTilingPattern* tilingPattern = pattern->getTilingPattern();
processTillingPatternPainting(tilingPattern, path, patternColorSpace->getUncoloredPatternColorSpace(), patternColorSpace->getUncoloredPatternColor());
}
break;
}
case PatternType::Shading:
{
PDFPageContentProcessorGraphicStateSaveRestoreGuard guard(this);
const PDFShadingPattern* shadingPattern = pattern->getShadingPattern();
// Apply pattern graphic state
const PDFObject& patternGraphicState = m_document->getObject(shadingPattern->getPatternGraphicState());
if (!patternGraphicState.isNull())
if (!isContentKindSuppressed(ContentKind::Shading))
{
if (patternGraphicState.isDictionary())
// Shading is enabled
PDFPageContentProcessorGraphicStateSaveRestoreGuard guard(this);
const PDFShadingPattern* shadingPattern = pattern->getShadingPattern();
// Apply pattern graphic state
const PDFObject& patternGraphicState = m_document->getObject(shadingPattern->getPatternGraphicState());
if (!patternGraphicState.isNull())
{
processApplyGraphicState(patternGraphicState.getDictionary());
if (patternGraphicState.isDictionary())
{
processApplyGraphicState(patternGraphicState.getDictionary());
}
else
{
throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Shading pattern graphic state is invalid."));
}
}
// We must create a mesh and then draw pattern
PDFMeshQualitySettings settings = m_meshQualitySettings;
settings.deviceSpaceMeshingArea = getPageBoundingRectDeviceSpace();
settings.userSpaceToDeviceSpaceMatrix = getPatternBaseMatrix();
settings.initResolution();
PDFMesh mesh = shadingPattern->createMesh(settings, m_CMS, m_graphicState.getRenderingIntent(), this);
// Now, merge the current path to the mesh clipping path
QPainterPath boundingPath = mesh.getBoundingPath();
if (boundingPath.isEmpty())
{
boundingPath = getCurrentWorldMatrix().map(path);
}
else
{
throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Shading pattern graphic state is invalid."));
boundingPath = boundingPath.intersected(path);
}
mesh.setBoundingPath(boundingPath);
performMeshPainting(mesh);
}
// We must create a mesh and then draw pattern
PDFMeshQualitySettings settings = m_meshQualitySettings;
settings.deviceSpaceMeshingArea = getPageBoundingRectDeviceSpace();
settings.userSpaceToDeviceSpaceMatrix = getPatternBaseMatrix();
settings.initResolution();
PDFMesh mesh = shadingPattern->createMesh(settings, m_CMS, m_graphicState.getRenderingIntent(), this);
// Now, merge the current path to the mesh clipping path
QPainterPath boundingPath = mesh.getBoundingPath();
if (boundingPath.isEmpty())
{
boundingPath = getCurrentWorldMatrix().map(path);
}
else
{
boundingPath = boundingPath.intersected(path);
}
mesh.setBoundingPath(boundingPath);
performMeshPainting(mesh);
break;
}
@ -725,80 +739,88 @@ void PDFPageContentProcessor::processPathPainting(const QPainterPath& path, bool
{
case PatternType::Tiling:
{
const PDFTilingPattern* tilingPattern = pattern->getTilingPattern();
// We must stroke the path.
QPainterPathStroker stroker;
stroker.setCapStyle(m_graphicState.getLineCapStyle());
stroker.setWidth(m_graphicState.getLineWidth());
stroker.setMiterLimit(m_graphicState.getMitterLimit());
stroker.setJoinStyle(m_graphicState.getLineJoinStyle());
const PDFLineDashPattern& lineDashPattern = m_graphicState.getLineDashPattern();
if (!lineDashPattern.isSolid())
if (!isContentKindSuppressed(ContentKind::Tiling))
{
stroker.setDashPattern(QVector<PDFReal>::fromStdVector(lineDashPattern.getDashArray()));
stroker.setDashOffset(lineDashPattern.getDashOffset());
// Tiling is enabled
const PDFTilingPattern* tilingPattern = pattern->getTilingPattern();
// We must stroke the path.
QPainterPathStroker stroker;
stroker.setCapStyle(m_graphicState.getLineCapStyle());
stroker.setWidth(m_graphicState.getLineWidth());
stroker.setMiterLimit(m_graphicState.getMitterLimit());
stroker.setJoinStyle(m_graphicState.getLineJoinStyle());
const PDFLineDashPattern& lineDashPattern = m_graphicState.getLineDashPattern();
if (!lineDashPattern.isSolid())
{
stroker.setDashPattern(QVector<PDFReal>::fromStdVector(lineDashPattern.getDashArray()));
stroker.setDashOffset(lineDashPattern.getDashOffset());
}
QPainterPath strokedPath = stroker.createStroke(path);
processTillingPatternPainting(tilingPattern, strokedPath, patternColorSpace->getUncoloredPatternColorSpace(), patternColorSpace->getUncoloredPatternColor());
}
QPainterPath strokedPath = stroker.createStroke(path);
processTillingPatternPainting(tilingPattern, strokedPath, patternColorSpace->getUncoloredPatternColorSpace(), patternColorSpace->getUncoloredPatternColor());
break;
}
case PatternType::Shading:
{
PDFPageContentProcessorGraphicStateSaveRestoreGuard guard(this);
const PDFShadingPattern* shadingPattern = pattern->getShadingPattern();
// Apply pattern graphic state
const PDFObject& patternGraphicState = m_document->getObject(shadingPattern->getPatternGraphicState());
if (!patternGraphicState.isNull())
if (!isContentKindSuppressed(ContentKind::Shading))
{
if (patternGraphicState.isDictionary())
// Shading is enabled
PDFPageContentProcessorGraphicStateSaveRestoreGuard guard(this);
const PDFShadingPattern* shadingPattern = pattern->getShadingPattern();
// Apply pattern graphic state
const PDFObject& patternGraphicState = m_document->getObject(shadingPattern->getPatternGraphicState());
if (!patternGraphicState.isNull())
{
processApplyGraphicState(patternGraphicState.getDictionary());
if (patternGraphicState.isDictionary())
{
processApplyGraphicState(patternGraphicState.getDictionary());
}
else
{
throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Shading pattern graphic state is invalid."));
}
}
// We must create a mesh and then draw pattern
PDFMeshQualitySettings settings = m_meshQualitySettings;
settings.deviceSpaceMeshingArea = getPageBoundingRectDeviceSpace();
settings.userSpaceToDeviceSpaceMatrix = getPatternBaseMatrix();
settings.initResolution();
PDFMesh mesh = shadingPattern->createMesh(settings, m_CMS, m_graphicState.getRenderingIntent(), this);
// We must stroke the path.
QPainterPathStroker stroker;
stroker.setCapStyle(m_graphicState.getLineCapStyle());
stroker.setWidth(m_graphicState.getLineWidth());
stroker.setMiterLimit(m_graphicState.getMitterLimit());
stroker.setJoinStyle(m_graphicState.getLineJoinStyle());
const PDFLineDashPattern& lineDashPattern = m_graphicState.getLineDashPattern();
if (!lineDashPattern.isSolid())
{
stroker.setDashPattern(QVector<PDFReal>::fromStdVector(lineDashPattern.getDashArray()));
stroker.setDashOffset(lineDashPattern.getDashOffset());
}
QPainterPath strokedPath = stroker.createStroke(path);
QPainterPath boundingPath = mesh.getBoundingPath();
if (boundingPath.isEmpty())
{
boundingPath = getCurrentWorldMatrix().map(strokedPath);
}
else
{
throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Shading pattern graphic state is invalid."));
boundingPath = boundingPath.intersected(strokedPath);
}
mesh.setBoundingPath(boundingPath);
performMeshPainting(mesh);
}
// We must create a mesh and then draw pattern
PDFMeshQualitySettings settings = m_meshQualitySettings;
settings.deviceSpaceMeshingArea = getPageBoundingRectDeviceSpace();
settings.userSpaceToDeviceSpaceMatrix = getPatternBaseMatrix();
settings.initResolution();
PDFMesh mesh = shadingPattern->createMesh(settings, m_CMS, m_graphicState.getRenderingIntent(), this);
// We must stroke the path.
QPainterPathStroker stroker;
stroker.setCapStyle(m_graphicState.getLineCapStyle());
stroker.setWidth(m_graphicState.getLineWidth());
stroker.setMiterLimit(m_graphicState.getMitterLimit());
stroker.setJoinStyle(m_graphicState.getLineJoinStyle());
const PDFLineDashPattern& lineDashPattern = m_graphicState.getLineDashPattern();
if (!lineDashPattern.isSolid())
{
stroker.setDashPattern(QVector<PDFReal>::fromStdVector(lineDashPattern.getDashArray()));
stroker.setDashOffset(lineDashPattern.getDashOffset());
}
QPainterPath strokedPath = stroker.createStroke(path);
QPainterPath boundingPath = mesh.getBoundingPath();
if (boundingPath.isEmpty())
{
boundingPath = getCurrentWorldMatrix().map(strokedPath);
}
else
{
boundingPath = boundingPath.intersected(strokedPath);
}
mesh.setBoundingPath(boundingPath);
performMeshPainting(mesh);
break;
}
@ -2626,6 +2648,12 @@ void PDFPageContentProcessor::operatorTextSetSpacingAndShowText(PDFReal t_w, PDF
void PDFPageContentProcessor::operatorShadingPaintShape(PDFPageContentProcessor::PDFOperandName name)
{
if (isContentKindSuppressed(ContentKind::Shading))
{
// Images are suppressed
return;
}
QMatrix matrix = getCurrentWorldMatrix();
PDFPageContentProcessorStateGuard guard(this);
PDFTemporaryValueChange guard2(&m_patternBaseMatrix, matrix);
@ -2654,6 +2682,12 @@ void PDFPageContentProcessor::operatorShadingPaintShape(PDFPageContentProcessor:
void PDFPageContentProcessor::paintXObjectImage(const PDFStream* stream)
{
if (isContentKindSuppressed(ContentKind::Images))
{
// Images are suppressed
return;
}
PDFColorSpacePointer colorSpace;
const PDFDictionary* streamDictionary = stream->getDictionary();

View File

@ -493,6 +493,19 @@ protected:
/// Implement to react on character printing
virtual void performOutputCharacter(const PDFTextCharacterInfo& info);
enum class ContentKind
{
Shapes, ///< General shapes (they can be also shaded / tiled)
Text, ///< Text outlines (they can be also shaded / tiled)
Images, ///< Images
Shading, ///< Shading
Tiling, ///< Tiling
};
/// Override this function to disable particular content type (for example
/// shading, images, ...)
virtual bool isContentKindSuppressed(ContentKind kind) const;
/// Returns current graphic state
const PDFPageContentProcessorState* getGraphicState() const { return &m_graphicState; }

View File

@ -447,11 +447,6 @@ void PDFPrecompiledPageGenerator::setCompositionMode(QPainter::CompositionMode m
m_precompiledPage->addSetCompositionMode(mode);
}
void PDFPrecompiledPageGenerator::performOutputCharacter(const PDFTextCharacterInfo& info)
{
m_precompiledPage->addCharacter(info);
}
void PDFPrecompiledPage::draw(QPainter* painter, const QRectF& cropBox, const QMatrix& pagePointToDevicePointMatrix, PDFRenderer::Features features) const
{
Q_ASSERT(painter);
@ -605,16 +600,6 @@ void PDFPrecompiledPage::addSetCompositionMode(QPainter::CompositionMode composi
m_compositionModes.push_back(compositionMode);
}
void PDFPrecompiledPage::addCharacter(const PDFTextCharacterInfo& info)
{
m_textLayout.addCharacter(info);
}
void PDFPrecompiledPage::createTextLayout()
{
m_textLayout.perform();
}
void PDFPrecompiledPage::optimize()
{
m_instructions.shrink_to_fit();
@ -624,7 +609,6 @@ void PDFPrecompiledPage::optimize()
m_meshes.shrink_to_fit();
m_matrices.shrink_to_fit();
m_compositionModes.shrink_to_fit();
m_textLayout.optimize();
}
void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError> errors)
@ -667,8 +651,6 @@ void PDFPrecompiledPage::finalize(qint64 compilingTimeNS, QList<PDFRenderError>
{
m_memoryConsumptionEstimate += data.mesh.getMemoryConsumptionEstimate();
}
m_memoryConsumptionEstimate += m_textLayout.getMemoryConsumptionEstimate();
}
} // namespace pdf

View File

@ -192,10 +192,6 @@ public:
void addRestoreGraphicState() { m_instructions.emplace_back(InstructionType::RestoreGraphicState, 0); }
void addSetWorldMatrix(const QMatrix& matrix);
void addSetCompositionMode(QPainter::CompositionMode compositionMode);
void addCharacter(const PDFTextCharacterInfo& info);
/// Creates text layout for the page
void createTextLayout();
/// Optimizes page memory allocation to contain less space
void optimize();
@ -217,9 +213,6 @@ public:
/// Returns memory consumption estimate
qint64 getMemoryConsumptionEstimate() const { return m_memoryConsumptionEstimate; }
/// Returns text layout of the page
const PDFTextLayout& getTextLayout() const { return m_textLayout; }
private:
struct PathPaintData
{
@ -287,7 +280,6 @@ private:
std::vector<QMatrix> m_matrices;
std::vector<QPainter::CompositionMode> m_compositionModes;
QList<PDFRenderError> m_errors;
PDFTextLayout m_textLayout;
};
/// Processor, which processes PDF's page commands and writes them to the precompiled page.
@ -320,7 +312,6 @@ protected:
virtual void performRestoreGraphicState(ProcessOrder order) override;
virtual void setWorldMatrix(const QMatrix& matrix) override;
virtual void setCompositionMode(QPainter::CompositionMode mode) override;
virtual void performOutputCharacter(const PDFTextCharacterInfo& info) override;
private:
PDFPrecompiledPage* m_precompiledPage;

View File

@ -145,7 +145,6 @@ void PDFRenderer::compile(PDFPrecompiledPage* precompiledPage, size_t pageIndex)
PDFPrecompiledPageGenerator generator(precompiledPage, m_features, page, m_document, m_fontCache, m_cms, m_optionalContentActivity, m_meshQualitySettings);
QList<PDFRenderError> errors = generator.processContents();
precompiledPage->optimize();
precompiledPage->createTextLayout();
precompiledPage->finalize(timer.nsecsElapsed(), qMove(errors));
timer.invalidate();
}

View File

@ -50,7 +50,7 @@ public:
ClipToCropBox = 0x0010, ///< Clip page content to crop box (items outside crop box will not be visible)
DisplayTimes = 0x0020, ///< Display page compile/draw time
DebugTextBlocks = 0x0040, ///< Debug text block layout algorithm
DebugTextLines = 0x0080, ///< Debug text line layout algorithm
DebugTextLines = 0x0080 ///< Debug text line layout algorithm
};
Q_DECLARE_FLAGS(Features, Feature)

View File

@ -23,6 +23,55 @@
namespace pdf
{
template<typename T>
QDataStream& operator>>(QDataStream& stream, std::vector<T>& vector)
{
std::vector<T>::size_type size = 0;
stream >> size;
vector.resize(size);
for (T& item : vector)
{
stream >> item;
}
return stream;
}
template<typename T>
QDataStream& operator<<(QDataStream& stream, const std::vector<T>& vector)
{
stream << vector.size();
for (const T& item : vector)
{
stream << item;
}
return stream;
}
template<typename T>
QDataStream& operator>>(QDataStream& stream, std::set<T>& set)
{
std::set<T>::size_type size = 0;
stream >> size;
for (size_t i = 0; i < size; ++i)
{
T item;
stream >> item;
set.insert(set.end(), qMove(item));
}
return stream;
}
template<typename T>
QDataStream& operator<<(QDataStream& stream, const std::set<T>& set)
{
stream << set.size();
for (const T& item : set)
{
stream << item;
}
return stream;
}
PDFTextLayout::PDFTextLayout()
{
@ -75,6 +124,24 @@ qint64 PDFTextLayout::getMemoryConsumptionEstimate() const
return estimate;
}
QDataStream& operator>>(QDataStream& stream, PDFTextLayout& layout)
{
stream >> layout.m_characters;
stream >> layout.m_angles;
stream >> layout.m_settings;
stream >> layout.m_blocks;
return stream;
}
QDataStream& operator<<(QDataStream& stream, const PDFTextLayout& layout)
{
stream << layout.m_characters;
stream << layout.m_angles;
stream << layout.m_settings;
stream << layout.m_blocks;
return stream;
}
struct NearestCharacterInfo
{
size_t index = std::numeric_limits<size_t>::max();
@ -374,6 +441,22 @@ void PDFTextLine::applyTransform(const QMatrix& matrix)
}
}
QDataStream& operator>>(QDataStream& stream, PDFTextLine& line)
{
stream >> line.m_characters;
stream >> line.m_boundingBox;
stream >> line.m_topLeft;
return stream;
}
QDataStream& operator<<(QDataStream& stream, const PDFTextLine& line)
{
stream << line.m_characters;
stream << line.m_boundingBox;
stream << line.m_topLeft;
return stream;
}
PDFTextBlock::PDFTextBlock(PDFTextLines textLines) :
m_lines(qMove(textLines))
{
@ -408,10 +491,106 @@ void PDFTextBlock::applyTransform(const QMatrix& matrix)
}
}
QDataStream& operator>>(QDataStream& stream, PDFTextBlock& block)
{
stream >> block.m_lines;
stream >> block.m_boundingBox;
stream >> block.m_topLeft;
return stream;
}
QDataStream& operator<<(QDataStream& stream, const PDFTextBlock& block)
{
stream << block.m_lines;
stream << block.m_boundingBox;
stream << block.m_topLeft;
return stream;
}
void TextCharacter::applyTransform(const QMatrix& matrix)
{
position = matrix.map(position);
boundingBox = matrix.map(boundingBox);
}
QDataStream& operator<<(QDataStream& stream, const TextCharacter& character)
{
stream << character.character;
stream << character.position;
stream << character.angle;
stream << character.fontSize;
stream << character.advance;
stream << character.boundingBox;
return stream;
}
QDataStream& operator>>(QDataStream& stream, TextCharacter& character)
{
stream >> character.character;
stream >> character.position;
stream >> character.angle;
stream >> character.fontSize;
stream >> character.advance;
stream >> character.boundingBox;
return stream;
}
PDFTextLayout PDFTextLayoutStorage::getTextLayout(PDFInteger pageIndex) const
{
PDFTextLayout result;
if (pageIndex >= 0 && pageIndex < static_cast<PDFInteger>(m_offsets.size()))
{
QDataStream layoutStream(const_cast<QByteArray*>(&m_textLayouts), QIODevice::ReadOnly);
layoutStream.skipRawData(m_offsets[pageIndex]);
QByteArray buffer;
layoutStream >> buffer;
buffer = qUncompress(buffer);
QDataStream stream(&buffer, QIODevice::ReadOnly);
stream >> result;
}
return result;
}
void PDFTextLayoutStorage::setTextLayout(PDFInteger pageIndex, const PDFTextLayout& layout, QMutex* mutex)
{
QByteArray result;
{
QDataStream stream(&result, QIODevice::WriteOnly);
stream << layout;
}
result = qCompress(result, 9);
QMutexLocker lock(mutex);
m_offsets[pageIndex] = m_textLayouts.size();
QDataStream layoutStream(&m_textLayouts, QIODevice::Append | QIODevice::WriteOnly);
layoutStream << result;
}
QDataStream& operator<<(QDataStream& stream, const PDFTextLayoutSettings& settings)
{
stream << settings.samples;
stream << settings.distanceSensitivity;
stream << settings.charactersOnLineSensitivity;
stream << settings.fontSensitivity;
stream << settings.blockVerticalSensitivity;
stream << settings.blockOverlapSensitivity;
return stream;
}
QDataStream& operator>>(QDataStream& stream, PDFTextLayoutSettings& settings)
{
stream >> settings.samples;
stream >> settings.distanceSensitivity;
stream >> settings.charactersOnLineSensitivity;
stream >> settings.fontSensitivity;
stream >> settings.blockVerticalSensitivity;
stream >> settings.blockOverlapSensitivity;
return stream;
}
} // namespace pdf

View File

@ -20,6 +20,7 @@
#include "pdfglobal.h"
#include <QDataStream>
#include <QPainterPath>
#include <set>
@ -75,6 +76,9 @@ struct PDFTextLayoutSettings
/// Minimal horizontal overlap for two lines considered to be in one block
PDFReal blockOverlapSensitivity = 0.3;
friend QDataStream& operator<<(QDataStream& stream, const PDFTextLayoutSettings& settings);
friend QDataStream& operator>>(QDataStream& stream, PDFTextLayoutSettings& settings);
};
/// Represents character in device space coordinates. All values (dimensions,
@ -89,6 +93,9 @@ struct TextCharacter
QPainterPath boundingBox;
void applyTransform(const QMatrix& matrix);
friend QDataStream& operator<<(QDataStream& stream, const TextCharacter& character);
friend QDataStream& operator>>(QDataStream& stream, TextCharacter& character);
};
using TextCharacters = std::vector<TextCharacter>;
@ -97,6 +104,8 @@ using TextCharacters = std::vector<TextCharacter>;
class PDFTextLine
{
public:
explicit inline PDFTextLine() = default;
/// Construct new line from characters. Characters are sorted in x-coordinate
/// and bounding box is computed.
/// \param characters
@ -108,6 +117,9 @@ public:
void applyTransform(const QMatrix& matrix);
friend QDataStream& operator<<(QDataStream& stream, const PDFTextLine& line);
friend QDataStream& operator>>(QDataStream& stream, PDFTextLine& line);
private:
TextCharacters m_characters;
QPainterPath m_boundingBox;
@ -120,6 +132,7 @@ using PDFTextLines = std::vector<PDFTextLine>;
class PDFTextBlock
{
public:
explicit inline PDFTextBlock() = default;
explicit inline PDFTextBlock(PDFTextLines textLines);
const PDFTextLines& getLines() const { return m_lines; }
@ -128,6 +141,9 @@ public:
void applyTransform(const QMatrix& matrix);
friend QDataStream& operator<<(QDataStream& stream, const PDFTextBlock& block);
friend QDataStream& operator>>(QDataStream& stream, PDFTextBlock& block);
private:
PDFTextLines m_lines;
QPainterPath m_boundingBox;
@ -158,6 +174,9 @@ public:
/// Returns recognized text blocks
const PDFTextBlocks& getTextBlocks() const { return m_blocks; }
friend QDataStream& operator<<(QDataStream& stream, const PDFTextLayout& layout);
friend QDataStream& operator>>(QDataStream& stream, PDFTextLayout& layout);
private:
/// Makes layout for particular angle
void performDoLayout(PDFReal angle);
@ -178,6 +197,38 @@ private:
PDFTextBlocks m_blocks;
};
/// Storage for text layouts. For reading and writing, this object is thread safe.
/// For writing, mutex is used to synchronize asynchronous writes, for reading
/// no mutex is used at all. For this reason, both reading/writing at the same time
/// is prohibited, it is not thread safe.
class PDFTextLayoutStorage
{
public:
explicit inline PDFTextLayoutStorage() = default;
explicit inline PDFTextLayoutStorage(PDFInteger pageCount) :
m_offsets(pageCount, 0)
{
}
/// Returns text layout for particular page. If page index is invalid,
/// then empty text layout is returned. Function is not thread safe, if
/// function \p setTextLayout is called from another thread.
/// \param pageIndex Page index
PDFTextLayout getTextLayout(PDFInteger pageIndex) const;
/// Sets text layout to the particular index. Index must be valid and from
/// range 0 to \p pageCount - 1. Function is not thread safe.
/// \param pageIndex Page index
/// \param layout Text layout
/// \param mutex Mutex for locking (calls of setTextLayout from multiple threads)
void setTextLayout(PDFInteger pageIndex, const PDFTextLayout& layout, QMutex* mutex);
private:
std::vector<int> m_offsets;
QByteArray m_textLayouts;
};
} // namespace pdf
#endif // PDFTEXTLAYOUT_H