diff --git a/PdfForQtLib/sources/pdfencoding.cpp b/PdfForQtLib/sources/pdfencoding.cpp index db5f45d..fd1a3d6 100644 --- a/PdfForQtLib/sources/pdfencoding.cpp +++ b/PdfForQtLib/sources/pdfencoding.cpp @@ -2193,6 +2193,10 @@ QString PDFEncoding::convertTextString(const QByteArray& stream) { return convertFromUnicode(stream); } + else if (hasUTF8LeadMarkings(stream)) + { + return QString::fromUtf8(stream); + } else { return convert(stream, Encoding::PDFDoc); @@ -2348,4 +2352,20 @@ bool PDFEncoding::hasUnicodeLeadMarkings(const QByteArray& stream) return false; } +bool PDFEncoding::hasUTF8LeadMarkings(const QByteArray& stream) +{ + if (stream.size() >= 3) + { + if (static_cast(stream[0]) == 239 && + static_cast(stream[1]) == 187 && + static_cast(stream[2]) == 191) + { + // UTF-8 + return true; + } + } + + return false; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfencoding.h b/PdfForQtLib/sources/pdfencoding.h index 929c134..b172333 100644 --- a/PdfForQtLib/sources/pdfencoding.h +++ b/PdfForQtLib/sources/pdfencoding.h @@ -105,11 +105,18 @@ public: static const encoding::EncodingTable* getTableForEncoding(Encoding encoding); private: - /// Returns true, if byte array has UTF-16BE unicode marking bytes at the + /// Returns true, if byte array has UTF-16BE/LE unicode marking bytes at the /// stream start. If they are present, then byte stream is probably encoded /// as unicode. /// \param stream Stream to be tested static bool hasUnicodeLeadMarkings(const QByteArray& stream); + + /// Returns true, if byte array has UTF-8 unicode marking bytes at the stream + /// start. If they are present, then byte stream is probably encoded + /// as UTF-8 string. + /// \note UTF-8 strings were added in PDF 2.0 specification + /// \param stream Stream to be tested + static bool hasUTF8LeadMarkings(const QByteArray& stream); }; } // namespace pdf diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp index d1caa32..7ecc625 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp @@ -157,9 +157,9 @@ static constexpr const std::pair void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject) { - auto getDictionary = [this, &resourcesObject](const char* resourceName) -> const pdf::PDFDictionary* + const PDFObject& resources = m_document->getObject(resourcesObject); + auto getDictionary = [this, &resources](const char* resourceName) -> const pdf::PDFDictionary* { - const PDFObject& resources = m_document->getObject(resourcesObject); if (resources.isDictionary() && resources.getDictionary()->hasKey(resourceName)) { const PDFObject& resourceDictionary = m_document->getObject(resources.getDictionary()->get(resourceName)); @@ -179,6 +179,43 @@ void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject) m_propertiesDictionary = getDictionary("Properties"); m_shadingDictionary = getDictionary("Shading"); m_patternDictionary = getDictionary("Pattern"); + m_procedureSets = NoProcSet; + + if (resources.isDictionary() && resources.getDictionary()->hasKey("ProcSet")) + { + PDFDocumentDataLoaderDecorator loader(m_document); + std::vector procedureSetNames = loader.readNameArrayFromDictionary(resources.getDictionary(), "ProcSet"); + + ProcedureSets newProcSet = EmptyProcSet; + for (const QByteArray& procedureSetName : procedureSetNames) + { + if (procedureSetName == "PDF") + { + newProcSet.setFlag(PDF); + } + else if (procedureSetName == "Text") + { + newProcSet.setFlag(Text); + } + else if (procedureSetName == "ImageB") + { + newProcSet.setFlag(ImageB); + } + else if (procedureSetName == "ImageC") + { + newProcSet.setFlag(ImageC); + } + else if (procedureSetName == "ImageI") + { + newProcSet.setFlag(ImageI); + } + } + + if (newProcSet) + { + m_procedureSets = newProcSet; + } + } } PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page, @@ -200,6 +237,7 @@ PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page, m_propertiesDictionary(nullptr), m_shadingDictionary(nullptr), m_patternDictionary(nullptr), + m_procedureSets(NoProcSet), m_textBeginEndState(0), m_compatibilityBeginEndState(0), m_drawingUncoloredTilingPatternState(0), @@ -3501,7 +3539,8 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::PDFPageContentProces m_extendedGraphicStateDictionary(processor->m_extendedGraphicStateDictionary), m_propertiesDictionary(processor->m_propertiesDictionary), m_shadingDictionary(processor->m_shadingDictionary), - m_patternDictionary(processor->m_patternDictionary) + m_patternDictionary(processor->m_patternDictionary), + m_procedureSets(processor->m_procedureSets) { m_processor->operatorSaveGraphicState(); } @@ -3516,6 +3555,7 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::~PDFPageContentProce m_processor->m_propertiesDictionary = m_propertiesDictionary; m_processor->m_shadingDictionary = m_shadingDictionary; m_processor->m_patternDictionary = m_patternDictionary; + m_processor->m_procedureSets = m_procedureSets; m_processor->operatorRestoreGraphicState(); } diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.h b/PdfForQtLib/sources/pdfpagecontentprocessor.h index fb442ff..c333ae0 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.h +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.h @@ -197,6 +197,18 @@ public: Invalid ///< Invalid operator, use for error reporting }; + enum ProcedureSet + { + EmptyProcSet = 0x0000, + NoProcSet = 0x0001, + PDF = 0x0002, + Text = 0x0004, + ImageB = 0x0008, + ImageC = 0x0010, + ImageI = 0x0020 + }; + Q_DECLARE_FLAGS(ProcedureSets, ProcedureSet) + /// Process the contents of the page QList processContents(); @@ -547,6 +559,10 @@ protected: /// Returns page bounding rectangle in device space const QRectF& getPageBoundingRectDeviceSpace() const { return m_pageBoundingRectDeviceSpace; } + /// Returns current procedure sets. Procedure sets are deprecated in PDF 2.0 and are here + /// only for compatibility purposes. See chapter 14.2 in PDF 2.0 specification. + ProcedureSets getProcedureSets() const { return m_procedureSets; } + private: /// Initializes the resources dictionaries void initDictionaries(const PDFObject& resourcesObject); @@ -621,6 +637,7 @@ private: const PDFDictionary* m_propertiesDictionary; const PDFDictionary* m_shadingDictionary; const PDFDictionary* m_patternDictionary; + ProcedureSets m_procedureSets; }; class PDFPageContentProcessorGraphicStateSaveRestoreGuard @@ -878,6 +895,7 @@ private: const PDFDictionary* m_propertiesDictionary; const PDFDictionary* m_shadingDictionary; const PDFDictionary* m_patternDictionary; + ProcedureSets m_procedureSets; // Default color spaces PDFColorSpacePointer m_deviceGrayColorSpace;