2021-04-30 20:12:10 +02:00
// Copyright (C) 2019-2021 Jakub Melka
2019-12-14 19:09:34 +01:00
//
2020-12-20 19:03:58 +01:00
// This file is part of Pdf4Qt.
2019-12-14 19:09:34 +01:00
//
2020-12-20 19:03:58 +01:00
// Pdf4Qt is free software: you can redistribute it and/or modify
2019-12-14 19:09:34 +01:00
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
2021-04-30 20:12:10 +02:00
// with the written consent of the copyright owner, any later version.
2019-12-14 19:09:34 +01:00
//
2020-12-20 19:03:58 +01:00
// Pdf4Qt is distributed in the hope that it will be useful,
2019-12-14 19:09:34 +01:00
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
2020-12-20 19:03:58 +01:00
// along with Pdf4Qt. If not, see <https://www.gnu.org/licenses/>.
2019-12-14 19:09:34 +01:00
# include "pdfcompiler.h"
2019-12-25 17:56:17 +01:00
# include "pdfcms.h"
2019-12-14 19:09:34 +01:00
# include "pdfdrawspacecontroller.h"
2020-01-01 18:23:18 +01:00
# include "pdfprogress.h"
2020-01-18 14:55:26 +01:00
# include "pdfexecutionpolicy.h"
2019-12-14 19:09:34 +01:00
# include <QtConcurrent/QtConcurrent>
2019-12-31 17:39:31 +01:00
# include <execution>
2019-12-14 19:09:34 +01:00
namespace pdf
{
PDFAsynchronousPageCompiler : : PDFAsynchronousPageCompiler ( PDFDrawWidgetProxy * proxy ) :
BaseClass ( proxy ) ,
m_proxy ( proxy )
{
m_cache . setMaxCost ( 128 * 1024 * 1024 ) ;
}
void PDFAsynchronousPageCompiler : : start ( )
{
switch ( m_state )
{
case State : : Inactive :
{
m_state = State : : Active ;
break ;
}
case State : : Active :
break ; // We have nothing to do...
case State : : Stopping :
{
// We shouldn't call this function while stopping!
Q_ASSERT ( false ) ;
break ;
}
}
}
2020-04-25 14:21:06 +02:00
void PDFAsynchronousPageCompiler : : stop ( bool clearCache )
2019-12-14 19:09:34 +01:00
{
switch ( m_state )
{
case State : : Inactive :
break ; // We have nothing to do...
case State : : Active :
{
// Stop the engine
m_state = State : : Stopping ;
for ( const auto & taskItem : m_tasks )
{
disconnect ( taskItem . second . taskWatcher , & QFutureWatcher < PDFPrecompiledPage > : : finished , this , & PDFAsynchronousPageCompiler : : onPageCompiled ) ;
taskItem . second . taskWatcher - > waitForFinished ( ) ;
}
m_tasks . clear ( ) ;
2020-04-25 14:21:06 +02:00
if ( clearCache )
{
m_cache . clear ( ) ;
}
2019-12-14 19:09:34 +01:00
m_state = State : : Inactive ;
break ;
}
case State : : Stopping :
{
// We shouldn't call this function while stopping!
Q_ASSERT ( false ) ;
break ;
}
}
}
void PDFAsynchronousPageCompiler : : reset ( )
{
2020-04-25 14:21:06 +02:00
stop ( true ) ;
2019-12-14 19:09:34 +01:00
start ( ) ;
}
2019-12-15 19:28:25 +01:00
void PDFAsynchronousPageCompiler : : setCacheLimit ( int limit )
{
m_cache . setMaxCost ( limit ) ;
}
2019-12-15 17:46:58 +01:00
const PDFPrecompiledPage * PDFAsynchronousPageCompiler : : getCompiledPage ( PDFInteger pageIndex , bool compile )
2019-12-14 19:09:34 +01:00
{
if ( m_state ! = State : : Active | | ! m_proxy - > getDocument ( ) )
{
// Engine is not active, always return nullptr
return nullptr ;
}
const PDFPrecompiledPage * page = m_cache . object ( pageIndex ) ;
if ( ! page & & compile & & ! m_tasks . count ( pageIndex ) )
{
// Compile the page
auto compilePage = [ this , pageIndex ] ( ) - > PDFPrecompiledPage
{
PDFPrecompiledPage compiledPage ;
2019-12-25 17:56:17 +01:00
PDFCMSPointer cms = m_proxy - > getCMSManager ( ) - > getCurrentCMS ( ) ;
PDFRenderer renderer ( m_proxy - > getDocument ( ) , m_proxy - > getFontCache ( ) , cms . data ( ) , m_proxy - > getOptionalContentActivity ( ) , m_proxy - > getFeatures ( ) , m_proxy - > getMeshQualitySettings ( ) ) ;
2019-12-14 19:09:34 +01:00
renderer . compile ( & compiledPage , pageIndex ) ;
return compiledPage ;
} ;
2020-01-01 18:23:18 +01:00
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( this , false ) ;
2019-12-14 19:09:34 +01:00
CompileTask & task = m_tasks [ pageIndex ] ;
task . taskFuture = QtConcurrent : : run ( compilePage ) ;
task . taskWatcher = new QFutureWatcher < PDFPrecompiledPage > ( this ) ;
connect ( task . taskWatcher , & QFutureWatcher < PDFPrecompiledPage > : : finished , this , & PDFAsynchronousPageCompiler : : onPageCompiled ) ;
2019-12-15 17:46:58 +01:00
task . taskWatcher - > setFuture ( task . taskFuture ) ;
2019-12-14 19:09:34 +01:00
}
return page ;
}
void PDFAsynchronousPageCompiler : : onPageCompiled ( )
{
std : : vector < PDFInteger > compiledPages ;
// Search all tasks for finished tasks
for ( auto it = m_tasks . begin ( ) ; it ! = m_tasks . end ( ) ; )
{
CompileTask & task = it - > second ;
if ( task . taskWatcher - > isFinished ( ) )
{
if ( m_state = = State : : Active )
{
// If we are in active state, try to store precompiled page
PDFPrecompiledPage * page = new PDFPrecompiledPage ( task . taskWatcher - > result ( ) ) ;
qint64 memoryConsumptionEstimate = page - > getMemoryConsumptionEstimate ( ) ;
if ( m_cache . insert ( it - > first , page , memoryConsumptionEstimate ) )
{
compiledPages . push_back ( it - > first ) ;
}
else
{
// We can't insert page to the cache, because cache size is too small. We will
// emit error string to inform the user, that cache is too small.
QString message = PDFTranslationContext : : tr ( " Precompiled page size is too high (%1 kB). Cache size is %2 kB. Increase the cache size! " ) . arg ( memoryConsumptionEstimate / 1024 ) . arg ( m_cache . maxCost ( ) / 1024 ) ;
emit renderingError ( it - > first , { PDFRenderError ( RenderErrorType : : Error , message ) } ) ;
}
}
task . taskWatcher - > deleteLater ( ) ;
it = m_tasks . erase ( it ) ;
}
else
{
// Just increment the counter
+ + it ;
}
}
2019-12-15 19:28:25 +01:00
// We allow font cache shrinking, when we aren't doing something in parallel.
2020-01-01 18:23:18 +01:00
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( this , m_tasks . empty ( ) ) ;
2019-12-15 19:28:25 +01:00
2019-12-14 19:09:34 +01:00
if ( ! compiledPages . empty ( ) )
{
Q_ASSERT ( std : : is_sorted ( compiledPages . cbegin ( ) , compiledPages . cend ( ) ) ) ;
emit pageImageChanged ( false , compiledPages ) ;
}
}
2019-12-31 17:39:31 +01:00
PDFTextLayout PDFTextLayoutGenerator : : createTextLayout ( )
{
m_textLayout . perform ( ) ;
m_textLayout . optimize ( ) ;
return qMove ( m_textLayout ) ;
}
bool PDFTextLayoutGenerator : : isContentSuppressedByOC ( PDFObjectReference ocgOrOcmd )
{
if ( m_features . testFlag ( PDFRenderer : : IgnoreOptionalContent ) )
{
return false ;
}
return PDFPageContentProcessor : : isContentSuppressedByOC ( ocgOrOcmd ) ;
}
bool PDFTextLayoutGenerator : : isContentKindSuppressed ( ContentKind kind ) const
{
switch ( kind )
{
case ContentKind : : Shapes :
case ContentKind : : Text :
case ContentKind : : Images :
case ContentKind : : Shading :
return true ;
case ContentKind : : Tiling :
return false ; // Tiling can have text
default :
{
Q_ASSERT ( false ) ;
break ;
}
}
return false ;
}
void PDFTextLayoutGenerator : : performOutputCharacter ( const PDFTextCharacterInfo & info )
{
2020-10-17 16:56:39 +02:00
if ( ! isContentSuppressed ( ) & & ! info . character . isSpace ( ) )
2020-01-02 12:06:09 +01:00
{
m_textLayout . addCharacter ( info ) ;
}
2019-12-31 17:39:31 +01:00
}
2020-01-01 18:23:18 +01:00
PDFAsynchronousTextLayoutCompiler : : PDFAsynchronousTextLayoutCompiler ( PDFDrawWidgetProxy * proxy ) :
BaseClass ( proxy ) ,
2020-01-02 12:06:09 +01:00
m_proxy ( proxy ) ,
2020-12-11 18:59:39 +01:00
m_isRunning ( false ) ,
m_cache ( std : : bind ( & PDFAsynchronousTextLayoutCompiler : : createTextLayout , this , std : : placeholders : : _1 ) )
2020-01-01 18:23:18 +01:00
{
connect ( & m_textLayoutCompileFutureWatcher , & QFutureWatcher < PDFTextLayoutStorage > : : finished , this , & PDFAsynchronousTextLayoutCompiler : : onTextLayoutCreated ) ;
}
void PDFAsynchronousTextLayoutCompiler : : start ( )
{
switch ( m_state )
{
case State : : Inactive :
{
m_state = State : : Active ;
break ;
}
case State : : Active :
break ; // We have nothing to do...
case State : : Stopping :
{
// We shouldn't call this function while stopping!
Q_ASSERT ( false ) ;
break ;
}
}
}
2020-04-25 14:21:06 +02:00
void PDFAsynchronousTextLayoutCompiler : : stop ( bool clearCache )
2019-12-31 17:39:31 +01:00
{
2020-01-01 18:23:18 +01:00
switch ( m_state )
{
case State : : Inactive :
break ; // We have nothing to do...
case State : : Active :
{
// Stop the engine
m_state = State : : Stopping ;
m_textLayoutCompileFutureWatcher . waitForFinished ( ) ;
2020-04-25 14:21:06 +02:00
if ( clearCache )
{
m_textLayouts = std : : nullopt ;
2020-12-11 18:59:39 +01:00
m_cache . clear ( ) ;
2020-04-25 14:21:06 +02:00
}
2020-01-01 18:23:18 +01:00
m_state = State : : Inactive ;
break ;
}
case State : : Stopping :
{
// We shouldn't call this function while stopping!
Q_ASSERT ( false ) ;
break ;
}
}
}
void PDFAsynchronousTextLayoutCompiler : : reset ( )
{
2020-04-25 14:21:06 +02:00
stop ( true ) ;
2020-01-01 18:23:18 +01:00
start ( ) ;
}
2020-12-11 18:59:39 +01:00
PDFTextLayout PDFAsynchronousTextLayoutCompiler : : createTextLayout ( PDFInteger pageIndex )
2020-01-01 18:23:18 +01:00
{
2020-12-11 18:59:39 +01:00
PDFTextLayout result ;
if ( isTextLayoutReady ( ) )
2020-01-01 18:23:18 +01:00
{
2020-12-11 18:59:39 +01:00
result = getTextLayout ( pageIndex ) ;
2020-01-01 18:23:18 +01:00
}
2020-12-11 18:59:39 +01:00
else
2020-01-01 18:23:18 +01:00
{
2020-12-11 18:59:39 +01:00
if ( m_state ! = State : : Active | | ! m_proxy - > getDocument ( ) )
{
// Engine is not active, do not calculate layout
return result ;
}
const PDFCatalog * catalog = m_proxy - > getDocument ( ) - > getCatalog ( ) ;
if ( pageIndex < 0 | | pageIndex > = PDFInteger ( catalog - > getPageCount ( ) ) )
{
return result ;
}
if ( ! catalog - > getPage ( pageIndex ) )
{
// Invalid page index
return result ;
}
const PDFPage * page = catalog - > getPage ( pageIndex ) ;
Q_ASSERT ( page ) ;
bool guard = false ;
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( & guard , false ) ;
PDFCMSPointer cms = m_proxy - > getCMSManager ( ) - > getCurrentCMS ( ) ;
PDFTextLayoutGenerator generator ( m_proxy - > getFeatures ( ) , page , m_proxy - > getDocument ( ) , m_proxy - > getFontCache ( ) , cms . data ( ) , m_proxy - > getOptionalContentActivity ( ) , QMatrix ( ) , m_proxy - > getMeshQualitySettings ( ) ) ;
generator . processContents ( ) ;
result = generator . createTextLayout ( ) ;
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( & guard , true ) ;
2020-01-01 18:23:18 +01:00
}
2020-12-11 18:59:39 +01:00
return result ;
2020-01-01 18:23:18 +01:00
}
2020-12-11 18:59:39 +01:00
PDFTextLayout PDFAsynchronousTextLayoutCompiler : : getTextLayout ( PDFInteger pageIndex )
2020-01-08 19:02:29 +01:00
{
if ( m_state ! = State : : Active | | ! m_proxy - > getDocument ( ) )
{
// Engine is not active, always return empty layout
2020-12-11 18:59:39 +01:00
return PDFTextLayout ( ) ;
2020-01-08 19:02:29 +01:00
}
if ( m_textLayouts )
{
2020-12-11 18:59:39 +01:00
return m_textLayouts - > getTextLayout ( pageIndex ) ;
2020-01-08 19:02:29 +01:00
}
2020-12-11 18:59:39 +01:00
return PDFTextLayout ( ) ;
}
PDFTextLayoutGetter PDFAsynchronousTextLayoutCompiler : : getTextLayoutLazy ( PDFInteger pageIndex )
{
return PDFTextLayoutGetter ( & m_cache , pageIndex ) ;
2020-01-08 19:02:29 +01:00
}
2020-01-26 17:06:50 +01:00
PDFTextSelection PDFAsynchronousTextLayoutCompiler : : getTextSelectionAll ( QColor color ) const
{
PDFTextSelection result ;
if ( m_textLayouts )
{
const PDFTextLayoutStorage & textLayouts = * m_textLayouts ;
QMutex mutex ;
PDFIntegerRange < size_t > pageRange ( 0 , textLayouts . getCount ( ) ) ;
2021-03-06 18:13:21 +01:00
auto selectPageText = [ & mutex , & textLayouts , & result , color ] ( PDFInteger pageIndex )
2020-01-26 17:06:50 +01:00
{
PDFTextLayout textLayout = textLayouts . getTextLayout ( pageIndex ) ;
PDFTextSelectionItems items ;
const PDFTextBlocks & blocks = textLayout . getTextBlocks ( ) ;
for ( size_t blockId = 0 , blockCount = blocks . size ( ) ; blockId < blockCount ; + + blockId )
{
const PDFTextBlock & block = blocks [ blockId ] ;
const PDFTextLines & lines = block . getLines ( ) ;
if ( ! lines . empty ( ) )
{
const PDFTextLine & lastLine = lines . back ( ) ;
Q_ASSERT ( ! lastLine . getCharacters ( ) . empty ( ) ) ;
PDFCharacterPointer ptrStart ;
ptrStart . pageIndex = pageIndex ;
ptrStart . blockIndex = blockId ;
ptrStart . lineIndex = 0 ;
ptrStart . characterIndex = 0 ;
PDFCharacterPointer ptrEnd ;
ptrEnd . pageIndex = pageIndex ;
ptrEnd . blockIndex = blockId ;
ptrEnd . lineIndex = lines . size ( ) - 1 ;
ptrEnd . characterIndex = lastLine . getCharacters ( ) . size ( ) - 1 ;
items . emplace_back ( ptrStart , ptrEnd ) ;
}
}
QMutexLocker lock ( & mutex ) ;
result . addItems ( qMove ( items ) , color ) ;
} ;
PDFExecutionPolicy : : execute ( PDFExecutionPolicy : : Scope : : Page , pageRange . begin ( ) , pageRange . end ( ) , selectPageText ) ;
}
result . build ( ) ;
return result ;
}
2020-01-01 18:23:18 +01:00
void PDFAsynchronousTextLayoutCompiler : : makeTextLayout ( )
{
if ( m_state ! = State : : Active | | ! m_proxy - > getDocument ( ) )
{
// Engine is not active, do not calculate layout
return ;
}
if ( m_textLayouts . has_value ( ) )
{
// Value is computed already
return ;
}
2020-01-02 12:06:09 +01:00
if ( m_isRunning )
2020-01-01 18:23:18 +01:00
{
// Text layout is already being processed
return ;
}
2020-01-02 12:06:09 +01:00
// Jakub Melka: Mark, that we are running (test for future is not enough,
// because future can finish before this function exits, for example)
m_isRunning = true ;
2020-01-01 18:23:18 +01:00
ProgressStartupInfo info ;
info . showDialog = true ;
2020-01-02 12:06:09 +01:00
info . text = tr ( " Indexing document contents... " ) ;
2020-01-01 18:23:18 +01:00
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( this , false ) ;
2019-12-31 17:39:31 +01:00
const PDFCatalog * catalog = m_proxy - > getDocument ( ) - > getCatalog ( ) ;
2020-01-01 18:23:18 +01:00
m_proxy - > getProgress ( ) - > start ( catalog - > getPageCount ( ) , qMove ( info ) ) ;
2019-12-31 17:39:31 +01:00
PDFCMSPointer cms = m_proxy - > getCMSManager ( ) - > getCurrentCMS ( ) ;
2020-01-01 18:23:18 +01:00
auto createTextLayout = [ this , cms , catalog ] ( ) - > PDFTextLayoutStorage
2019-12-31 17:39:31 +01:00
{
2020-01-01 18:23:18 +01:00
PDFTextLayoutStorage result ( catalog - > getPageCount ( ) ) ;
QMutex mutex ;
auto generateTextLayout = [ this , & result , & mutex , cms , catalog ] ( PDFInteger pageIndex )
2019-12-31 17:39:31 +01:00
{
2020-01-01 18:23:18 +01:00
if ( ! catalog - > getPage ( pageIndex ) )
{
// Invalid page index
result . setTextLayout ( pageIndex , PDFTextLayout ( ) , & mutex ) ;
return ;
}
2019-12-31 17:39:31 +01:00
2020-01-01 18:23:18 +01:00
const PDFPage * page = catalog - > getPage ( pageIndex ) ;
Q_ASSERT ( page ) ;
2019-12-31 17:39:31 +01:00
2020-01-01 18:23:18 +01:00
PDFTextLayoutGenerator generator ( m_proxy - > getFeatures ( ) , page , m_proxy - > getDocument ( ) , m_proxy - > getFontCache ( ) , cms . data ( ) , m_proxy - > getOptionalContentActivity ( ) , QMatrix ( ) , m_proxy - > getMeshQualitySettings ( ) ) ;
generator . processContents ( ) ;
result . setTextLayout ( pageIndex , generator . createTextLayout ( ) , & mutex ) ;
m_proxy - > getProgress ( ) - > step ( ) ;
} ;
auto pageRange = PDFIntegerRange < PDFInteger > ( 0 , catalog - > getPageCount ( ) ) ;
2020-01-18 14:55:26 +01:00
PDFExecutionPolicy : : execute ( PDFExecutionPolicy : : Scope : : Page , pageRange . begin ( ) , pageRange . end ( ) , generateTextLayout ) ;
2020-01-01 18:23:18 +01:00
return result ;
2019-12-31 17:39:31 +01:00
} ;
2020-01-01 18:23:18 +01:00
Q_ASSERT ( ! m_textLayoutCompileFuture . isRunning ( ) ) ;
m_textLayoutCompileFuture = QtConcurrent : : run ( createTextLayout ) ;
m_textLayoutCompileFutureWatcher . setFuture ( m_textLayoutCompileFuture ) ;
}
2019-12-31 17:39:31 +01:00
2020-01-01 18:23:18 +01:00
void PDFAsynchronousTextLayoutCompiler : : onTextLayoutCreated ( )
{
m_proxy - > getFontCache ( ) - > setCacheShrinkEnabled ( this , true ) ;
m_proxy - > getProgress ( ) - > finish ( ) ;
2020-12-11 18:59:39 +01:00
m_cache . clear ( ) ;
2019-12-31 17:39:31 +01:00
2020-01-01 18:23:18 +01:00
m_textLayouts = m_textLayoutCompileFuture . result ( ) ;
2020-01-02 12:06:09 +01:00
m_isRunning = false ;
2020-01-01 18:23:18 +01:00
emit textLayoutChanged ( ) ;
2019-12-31 17:39:31 +01:00
}
2019-12-14 19:09:34 +01:00
} // namespace pdf