2019-07-01 12:35:53 +02:00
// Copyright (C) 2018-2019 Jakub Melka
2018-11-17 16:48:30 +01:00
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
# include "pdfdocument.h"
2018-12-02 17:53:19 +01:00
# include "pdfencoding.h"
2019-04-29 17:03:19 +02:00
# include "pdfexception.h"
2019-02-09 18:40:56 +01:00
# include "pdfstreamfilters.h"
# include "pdfconstants.h"
2018-11-17 16:48:30 +01:00
2018-12-02 17:53:19 +01:00
namespace pdf
{
// Entries for "Info" entry in trailer dictionary
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY = " Info " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_TITLE = " Title " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_AUTHOR = " Author " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_SUBJECT = " Subject " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_KEYWORDS = " Keywords " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_CREATOR = " Creator " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_PRODUCER = " Producer " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_CREATION_DATE = " CreationDate " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_MODIFIED_DATE = " ModDate " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_TRAPPED = " Trapped " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_TRAPPED_TRUE = " True " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_TRAPPED_FALSE = " False " ;
static constexpr const char * PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = " Unknown " ;
2019-02-09 18:40:56 +01:00
QByteArray PDFDocument : : getDecodedStream ( const PDFStream * stream ) const
{
2019-08-13 15:48:01 +02:00
return PDFStreamFilterStorage : : getDecodedStream ( stream , std : : bind ( & PDFDocument : : getObject , this , std : : placeholders : : _1 ) , m_pdfObjectStorage . getSecurityHandler ( ) ) ;
2019-06-28 18:11:05 +02:00
}
2019-02-09 18:40:56 +01:00
2019-06-28 18:11:05 +02:00
const PDFDictionary * PDFDocument : : getTrailerDictionary ( ) const
{
const PDFObject & trailerDictionary = m_pdfObjectStorage . getTrailerDictionary ( ) ;
2019-02-09 18:40:56 +01:00
2019-06-28 18:11:05 +02:00
// Trailer object should be dictionary/stream here. It is verified in the document reader.
Q_ASSERT ( trailerDictionary . isDictionary ( ) | | trailerDictionary . isStream ( ) ) ;
2019-02-09 18:40:56 +01:00
2019-06-28 18:11:05 +02:00
if ( trailerDictionary . isDictionary ( ) )
2019-02-09 18:40:56 +01:00
{
2019-06-28 18:11:05 +02:00
return trailerDictionary . getDictionary ( ) ;
2019-02-09 18:40:56 +01:00
}
2019-06-28 18:11:05 +02:00
else if ( trailerDictionary . isStream ( ) )
2019-02-09 18:40:56 +01:00
{
2019-06-28 18:11:05 +02:00
return trailerDictionary . getStream ( ) - > getDictionary ( ) ;
2019-02-09 18:40:56 +01:00
}
2019-06-28 18:11:05 +02:00
return nullptr ;
2019-02-09 18:40:56 +01:00
}
2018-12-02 17:53:19 +01:00
void PDFDocument : : init ( )
{
initInfo ( ) ;
2018-12-24 17:09:23 +01:00
2019-06-28 18:11:05 +02:00
const PDFDictionary * dictionary = getTrailerDictionary ( ) ;
2018-12-24 17:09:23 +01:00
Q_ASSERT ( dictionary ) ;
m_catalog = PDFCatalog : : parse ( getObject ( dictionary - > get ( " Root " ) ) , this ) ;
2018-12-02 17:53:19 +01:00
}
void PDFDocument : : initInfo ( )
{
// Trailer object should be dictionary here. It is verified in the document reader.
2019-06-28 18:11:05 +02:00
const PDFDictionary * dictionary = getTrailerDictionary ( ) ;
2018-12-02 17:53:19 +01:00
Q_ASSERT ( dictionary ) ;
if ( dictionary - > hasKey ( PDF_DOCUMENT_INFO_ENTRY ) )
{
const PDFObject & info = getObject ( dictionary - > get ( PDF_DOCUMENT_INFO_ENTRY ) ) ;
if ( info . isDictionary ( ) )
{
const PDFDictionary * infoDictionary = info . getDictionary ( ) ;
Q_ASSERT ( infoDictionary ) ;
auto readTextString = [ this , infoDictionary ] ( const char * entry , QString & fillEntry )
{
if ( infoDictionary - > hasKey ( entry ) )
{
const PDFObject & stringObject = getObject ( infoDictionary - > get ( entry ) ) ;
if ( stringObject . isString ( ) )
{
// We have succesfully read the string, convert it according to encoding
fillEntry = PDFEncoding : : convertTextString ( stringObject . getString ( ) ) ;
}
else if ( ! stringObject . isNull ( ) )
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. String expected. " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
} ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_TITLE , m_info . title ) ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_AUTHOR , m_info . author ) ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_SUBJECT , m_info . subject ) ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_KEYWORDS , m_info . keywords ) ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_CREATOR , m_info . creator ) ;
readTextString ( PDF_DOCUMENT_INFO_ENTRY_PRODUCER , m_info . producer ) ;
auto readDate = [ this , infoDictionary ] ( const char * entry , QDateTime & fillEntry )
{
if ( infoDictionary - > hasKey ( entry ) )
{
const PDFObject & stringObject = getObject ( infoDictionary - > get ( entry ) ) ;
if ( stringObject . isString ( ) )
{
// We have succesfully read the string, convert it to date time
fillEntry = PDFEncoding : : convertToDateTime ( stringObject . getString ( ) ) ;
if ( ! fillEntry . isValid ( ) )
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. String with date time format expected. " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
else if ( ! stringObject . isNull ( ) )
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. String with date time format expected. " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
} ;
readDate ( PDF_DOCUMENT_INFO_ENTRY_CREATION_DATE , m_info . creationDate ) ;
readDate ( PDF_DOCUMENT_INFO_ENTRY_MODIFIED_DATE , m_info . modifiedDate ) ;
if ( infoDictionary - > hasKey ( PDF_DOCUMENT_INFO_ENTRY_TRAPPED ) )
{
const PDFObject & nameObject = getObject ( infoDictionary - > get ( PDF_DOCUMENT_INFO_ENTRY_TRAPPED ) ) ;
if ( nameObject . isName ( ) )
{
const QByteArray & name = nameObject . getString ( ) ;
if ( name = = PDF_DOCUMENT_INFO_ENTRY_TRAPPED_TRUE )
{
m_info . trapped = Info : : Trapped : : True ;
}
else if ( name = = PDF_DOCUMENT_INFO_ENTRY_TRAPPED_FALSE )
{
m_info . trapped = Info : : Trapped : : False ;
}
else if ( name = = PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN )
{
m_info . trapped = Info : : Trapped : : Unknown ;
}
else
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. Trapping information expected " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
else
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. Trapping information expected " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
}
else if ( ! info . isNull ( ) ) // Info may be invalid...
{
2019-09-27 18:41:56 +02:00
throw PDFException ( tr ( " Bad format of document info entry in trailer dictionary. " ) ) ;
2018-12-02 17:53:19 +01:00
}
}
}
const PDFObject & PDFObjectStorage : : getObject ( PDFObjectReference reference ) const
{
if ( reference . objectNumber > = 0 & &
reference . objectNumber < static_cast < PDFInteger > ( m_objects . size ( ) ) & &
m_objects [ reference . objectNumber ] . generation = = reference . generation )
{
return m_objects [ reference . objectNumber ] . object ;
}
else
{
static const PDFObject dummy ;
return dummy ;
}
}
2019-03-30 18:45:30 +01:00
QByteArray PDFDocumentDataLoaderDecorator : : readName ( const PDFObject & object )
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isName ( ) )
{
return dereferencedObject . getString ( ) ;
}
return QByteArray ( ) ;
}
2019-03-31 18:08:36 +02:00
QByteArray PDFDocumentDataLoaderDecorator : : readString ( const PDFObject & object )
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isString ( ) )
{
return dereferencedObject . getString ( ) ;
}
return QByteArray ( ) ;
}
2018-12-24 17:09:23 +01:00
PDFInteger PDFDocumentDataLoaderDecorator : : readInteger ( const PDFObject & object , PDFInteger defaultValue ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isInt ( ) )
{
return dereferencedObject . getInteger ( ) ;
}
return defaultValue ;
}
2019-02-23 15:44:14 +01:00
PDFReal PDFDocumentDataLoaderDecorator : : readNumber ( const PDFObject & object , PDFReal defaultValue ) const
2019-02-16 18:26:16 +01:00
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isReal ( ) )
{
return dereferencedObject . getReal ( ) ;
} else if ( dereferencedObject . isInt ( ) )
{
return dereferencedObject . getInteger ( ) ;
}
return defaultValue ;
}
2019-03-25 18:44:45 +01:00
bool PDFDocumentDataLoaderDecorator : : readBoolean ( const PDFObject & object , bool defaultValue ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isBool ( ) )
{
return dereferencedObject . getBool ( ) ;
}
return defaultValue ;
}
2018-12-24 17:09:23 +01:00
QString PDFDocumentDataLoaderDecorator : : readTextString ( const PDFObject & object , const QString & defaultValue ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isString ( ) )
{
return PDFEncoding : : convertTextString ( dereferencedObject . getString ( ) ) ;
}
return defaultValue ;
}
2018-12-26 18:00:17 +01:00
QRectF PDFDocumentDataLoaderDecorator : : readRectangle ( const PDFObject & object , const QRectF & defaultValue ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isArray ( ) )
{
const PDFArray * array = dereferencedObject . getArray ( ) ;
if ( array - > getCount ( ) = = 4 )
{
std : : array < PDFReal , 4 > items ;
for ( size_t i = 0 ; i < 4 ; + + i )
{
const PDFObject & object = m_document - > getObject ( array - > getItem ( i ) ) ;
if ( object . isReal ( ) )
{
items [ i ] = object . getReal ( ) ;
}
else if ( object . isInt ( ) )
{
items [ i ] = object . getInteger ( ) ;
}
else
{
return defaultValue ;
}
}
const PDFReal xMin = qMin ( items [ 0 ] , items [ 2 ] ) ;
const PDFReal xMax = qMax ( items [ 0 ] , items [ 2 ] ) ;
const PDFReal yMin = qMin ( items [ 1 ] , items [ 3 ] ) ;
const PDFReal yMax = qMax ( items [ 1 ] , items [ 3 ] ) ;
return QRectF ( xMin , yMin , xMax - xMin , yMax - yMin ) ;
}
}
return defaultValue ;
}
2019-08-25 18:16:37 +02:00
QMatrix PDFDocumentDataLoaderDecorator : : readMatrixFromDictionary ( const PDFDictionary * dictionary , const char * key , QMatrix defaultValue )
{
if ( dictionary - > hasKey ( key ) )
{
std : : vector < PDFReal > matrixNumbers = readNumberArrayFromDictionary ( dictionary , key ) ;
if ( matrixNumbers . size ( ) ! = 6 )
{
throw PDFRendererException ( RenderErrorType : : Error , PDFTranslationContext : : tr ( " Invalid number of matrix elements. Expected 6, actual %1. " ) . arg ( matrixNumbers . size ( ) ) ) ;
}
return QMatrix ( matrixNumbers [ 0 ] , matrixNumbers [ 1 ] , matrixNumbers [ 2 ] , matrixNumbers [ 3 ] , matrixNumbers [ 4 ] , matrixNumbers [ 5 ] ) ;
}
return defaultValue ;
}
2019-09-08 17:17:12 +02:00
std : : vector < PDFReal > PDFDocumentDataLoaderDecorator : : readNumberArrayFromDictionary ( const PDFDictionary * dictionary ,
const char * key ,
std : : vector < PDFReal > defaultValue )
2019-03-07 19:57:03 +01:00
{
if ( dictionary - > hasKey ( key ) )
{
2019-09-08 17:17:12 +02:00
return readNumberArray ( dictionary - > get ( key ) , defaultValue ) ;
2019-03-07 19:57:03 +01:00
}
2019-09-08 17:17:12 +02:00
return defaultValue ;
2019-03-07 19:57:03 +01:00
}
std : : vector < PDFInteger > PDFDocumentDataLoaderDecorator : : readIntegerArrayFromDictionary ( const PDFDictionary * dictionary , const char * key )
{
if ( dictionary - > hasKey ( key ) )
{
return readIntegerArray ( dictionary - > get ( key ) ) ;
}
return std : : vector < PDFInteger > ( ) ;
}
2019-02-16 18:26:16 +01:00
PDFReal PDFDocumentDataLoaderDecorator : : readNumberFromDictionary ( const PDFDictionary * dictionary , const char * key , PDFReal defaultValue ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readNumber ( dictionary - > get ( key ) , defaultValue ) ;
}
return defaultValue ;
}
2019-09-01 14:42:32 +02:00
PDFReal PDFDocumentDataLoaderDecorator : : readNumberFromDictionary ( const PDFDictionary * dictionary , const QByteArray & key , PDFReal defaultValue ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readNumber ( dictionary - > get ( key ) , defaultValue ) ;
}
return defaultValue ;
}
2019-02-16 18:26:16 +01:00
PDFInteger PDFDocumentDataLoaderDecorator : : readIntegerFromDictionary ( const PDFDictionary * dictionary , const char * key , PDFInteger defaultValue ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readInteger ( dictionary - > get ( key ) , defaultValue ) ;
}
return defaultValue ;
}
2019-06-23 18:35:32 +02:00
QString PDFDocumentDataLoaderDecorator : : readTextStringFromDictionary ( const PDFDictionary * dictionary , const char * key , const QString & defaultValue ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readTextString ( dictionary - > get ( key ) , defaultValue ) ;
}
return defaultValue ;
}
std : : vector < PDFObjectReference > PDFDocumentDataLoaderDecorator : : readReferenceArrayFromDictionary ( const PDFDictionary * dictionary , const char * key )
{
if ( dictionary - > hasKey ( key ) )
{
return readReferenceArray ( dictionary - > get ( key ) ) ;
}
return std : : vector < PDFObjectReference > ( ) ;
}
2019-09-08 17:17:12 +02:00
std : : vector < PDFReal > PDFDocumentDataLoaderDecorator : : readNumberArray ( const PDFObject & object , std : : vector < PDFReal > defaultValue ) const
2019-02-23 15:44:14 +01:00
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isArray ( ) )
{
const PDFArray * array = dereferencedObject . getArray ( ) ;
std : : vector < PDFReal > result ;
const size_t count = array - > getCount ( ) ;
result . reserve ( count ) ;
for ( size_t i = 0 ; i < count ; + + i )
{
const PDFReal number = readNumber ( array - > getItem ( i ) , std : : numeric_limits < PDFReal > : : quiet_NaN ( ) ) ;
if ( std : : isnan ( number ) )
{
2019-09-08 17:17:12 +02:00
return defaultValue ;
2019-02-23 15:44:14 +01:00
}
result . push_back ( number ) ;
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std : : move ( result ) ;
}
2019-09-08 17:17:12 +02:00
return defaultValue ;
2019-02-23 15:44:14 +01:00
}
2019-03-07 19:57:03 +01:00
std : : vector < PDFInteger > PDFDocumentDataLoaderDecorator : : readIntegerArray ( const PDFObject & object ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isArray ( ) )
{
const PDFArray * array = dereferencedObject . getArray ( ) ;
std : : vector < PDFInteger > result ;
const size_t count = array - > getCount ( ) ;
result . reserve ( count ) ;
for ( size_t i = 0 ; i < count ; + + i )
{
// This value is not representable in the current PDF parser. So we assume we
// can't get this value.
constexpr const PDFInteger INVALID_VALUE = std : : numeric_limits < PDFInteger > : : max ( ) ;
const PDFReal number = readInteger ( array - > getItem ( i ) , INVALID_VALUE ) ;
if ( number = = INVALID_VALUE )
{
return std : : vector < PDFInteger > ( ) ;
}
result . push_back ( number ) ;
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std : : move ( result ) ;
}
return std : : vector < PDFInteger > ( ) ;
}
2019-06-23 18:35:32 +02:00
std : : vector < PDFObjectReference > PDFDocumentDataLoaderDecorator : : readReferenceArray ( const PDFObject & object ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isArray ( ) )
{
const PDFArray * array = dereferencedObject . getArray ( ) ;
std : : vector < PDFObjectReference > result ;
const size_t count = array - > getCount ( ) ;
result . reserve ( count ) ;
for ( size_t i = 0 ; i < count ; + + i )
{
const PDFObject & referenceObject = array - > getItem ( i ) ;
if ( referenceObject . isReference ( ) )
{
result . push_back ( referenceObject . getReference ( ) ) ;
}
else
{
result . clear ( ) ;
break ;
}
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std : : move ( result ) ;
}
return std : : vector < PDFObjectReference > ( ) ;
}
std : : vector < QByteArray > PDFDocumentDataLoaderDecorator : : readNameArray ( const PDFObject & object ) const
{
const PDFObject & dereferencedObject = m_document - > getObject ( object ) ;
if ( dereferencedObject . isArray ( ) )
{
const PDFArray * array = dereferencedObject . getArray ( ) ;
std : : vector < QByteArray > result ;
const size_t count = array - > getCount ( ) ;
result . reserve ( count ) ;
for ( size_t i = 0 ; i < count ; + + i )
{
const PDFObject & nameObject = array - > getItem ( i ) ;
if ( nameObject . isName ( ) )
{
result . push_back ( nameObject . getString ( ) ) ;
}
else
{
result . clear ( ) ;
break ;
}
}
// We assume, that RVO (return value optimization) will not work for this function
// (multiple return points).
return std : : move ( result ) ;
}
return std : : vector < QByteArray > ( ) ;
}
std : : vector < QByteArray > PDFDocumentDataLoaderDecorator : : readNameArrayFromDictionary ( const PDFDictionary * dictionary , const char * key ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readNameArray ( dictionary - > get ( key ) ) ;
}
return std : : vector < QByteArray > ( ) ;
}
2019-03-25 18:44:45 +01:00
bool PDFDocumentDataLoaderDecorator : : readBooleanFromDictionary ( const PDFDictionary * dictionary , const char * key , bool defaultValue ) const
{
if ( dictionary - > hasKey ( key ) )
{
return readBoolean ( dictionary - > get ( key ) , defaultValue ) ;
}
return defaultValue ;
}
2019-03-30 18:45:30 +01:00
QByteArray PDFDocumentDataLoaderDecorator : : readNameFromDictionary ( const PDFDictionary * dictionary , const char * key )
{
if ( dictionary - > hasKey ( key ) )
{
return readName ( dictionary - > get ( key ) ) ;
}
return QByteArray ( ) ;
}
2019-03-31 18:08:36 +02:00
QByteArray PDFDocumentDataLoaderDecorator : : readStringFromDictionary ( const PDFDictionary * dictionary , const char * key )
{
if ( dictionary - > hasKey ( key ) )
{
return readString ( dictionary - > get ( key ) ) ;
}
return QByteArray ( ) ;
}
2018-12-02 17:53:19 +01:00
} // namespace pdf