mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
UTF-8 support (PDF 2.0 compliance) and ProcedureSets (for compatibility)
This commit is contained in:
@ -2193,6 +2193,10 @@ QString PDFEncoding::convertTextString(const QByteArray& stream)
|
|||||||
{
|
{
|
||||||
return convertFromUnicode(stream);
|
return convertFromUnicode(stream);
|
||||||
}
|
}
|
||||||
|
else if (hasUTF8LeadMarkings(stream))
|
||||||
|
{
|
||||||
|
return QString::fromUtf8(stream);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return convert(stream, Encoding::PDFDoc);
|
return convert(stream, Encoding::PDFDoc);
|
||||||
@ -2348,4 +2352,20 @@ bool PDFEncoding::hasUnicodeLeadMarkings(const QByteArray& stream)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PDFEncoding::hasUTF8LeadMarkings(const QByteArray& stream)
|
||||||
|
{
|
||||||
|
if (stream.size() >= 3)
|
||||||
|
{
|
||||||
|
if (static_cast<unsigned char>(stream[0]) == 239 &&
|
||||||
|
static_cast<unsigned char>(stream[1]) == 187 &&
|
||||||
|
static_cast<unsigned char>(stream[2]) == 191)
|
||||||
|
{
|
||||||
|
// UTF-8
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
@ -105,11 +105,18 @@ public:
|
|||||||
static const encoding::EncodingTable* getTableForEncoding(Encoding encoding);
|
static const encoding::EncodingTable* getTableForEncoding(Encoding encoding);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Returns true, if byte array has UTF-16BE unicode marking bytes at the
|
/// Returns true, if byte array has UTF-16BE/LE unicode marking bytes at the
|
||||||
/// stream start. If they are present, then byte stream is probably encoded
|
/// stream start. If they are present, then byte stream is probably encoded
|
||||||
/// as unicode.
|
/// as unicode.
|
||||||
/// \param stream Stream to be tested
|
/// \param stream Stream to be tested
|
||||||
static bool hasUnicodeLeadMarkings(const QByteArray& stream);
|
static bool hasUnicodeLeadMarkings(const QByteArray& stream);
|
||||||
|
|
||||||
|
/// Returns true, if byte array has UTF-8 unicode marking bytes at the stream
|
||||||
|
/// start. If they are present, then byte stream is probably encoded
|
||||||
|
/// as UTF-8 string.
|
||||||
|
/// \note UTF-8 strings were added in PDF 2.0 specification
|
||||||
|
/// \param stream Stream to be tested
|
||||||
|
static bool hasUTF8LeadMarkings(const QByteArray& stream);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf
|
} // namespace pdf
|
||||||
|
@ -156,10 +156,10 @@ static constexpr const std::pair<const char*, PDFPageContentProcessor::Operator>
|
|||||||
};
|
};
|
||||||
|
|
||||||
void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject)
|
void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject)
|
||||||
{
|
|
||||||
auto getDictionary = [this, &resourcesObject](const char* resourceName) -> const pdf::PDFDictionary*
|
|
||||||
{
|
{
|
||||||
const PDFObject& resources = m_document->getObject(resourcesObject);
|
const PDFObject& resources = m_document->getObject(resourcesObject);
|
||||||
|
auto getDictionary = [this, &resources](const char* resourceName) -> const pdf::PDFDictionary*
|
||||||
|
{
|
||||||
if (resources.isDictionary() && resources.getDictionary()->hasKey(resourceName))
|
if (resources.isDictionary() && resources.getDictionary()->hasKey(resourceName))
|
||||||
{
|
{
|
||||||
const PDFObject& resourceDictionary = m_document->getObject(resources.getDictionary()->get(resourceName));
|
const PDFObject& resourceDictionary = m_document->getObject(resources.getDictionary()->get(resourceName));
|
||||||
@ -179,6 +179,43 @@ void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject)
|
|||||||
m_propertiesDictionary = getDictionary("Properties");
|
m_propertiesDictionary = getDictionary("Properties");
|
||||||
m_shadingDictionary = getDictionary("Shading");
|
m_shadingDictionary = getDictionary("Shading");
|
||||||
m_patternDictionary = getDictionary("Pattern");
|
m_patternDictionary = getDictionary("Pattern");
|
||||||
|
m_procedureSets = NoProcSet;
|
||||||
|
|
||||||
|
if (resources.isDictionary() && resources.getDictionary()->hasKey("ProcSet"))
|
||||||
|
{
|
||||||
|
PDFDocumentDataLoaderDecorator loader(m_document);
|
||||||
|
std::vector<QByteArray> procedureSetNames = loader.readNameArrayFromDictionary(resources.getDictionary(), "ProcSet");
|
||||||
|
|
||||||
|
ProcedureSets newProcSet = EmptyProcSet;
|
||||||
|
for (const QByteArray& procedureSetName : procedureSetNames)
|
||||||
|
{
|
||||||
|
if (procedureSetName == "PDF")
|
||||||
|
{
|
||||||
|
newProcSet.setFlag(PDF);
|
||||||
|
}
|
||||||
|
else if (procedureSetName == "Text")
|
||||||
|
{
|
||||||
|
newProcSet.setFlag(Text);
|
||||||
|
}
|
||||||
|
else if (procedureSetName == "ImageB")
|
||||||
|
{
|
||||||
|
newProcSet.setFlag(ImageB);
|
||||||
|
}
|
||||||
|
else if (procedureSetName == "ImageC")
|
||||||
|
{
|
||||||
|
newProcSet.setFlag(ImageC);
|
||||||
|
}
|
||||||
|
else if (procedureSetName == "ImageI")
|
||||||
|
{
|
||||||
|
newProcSet.setFlag(ImageI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newProcSet)
|
||||||
|
{
|
||||||
|
m_procedureSets = newProcSet;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page,
|
PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page,
|
||||||
@ -200,6 +237,7 @@ PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page,
|
|||||||
m_propertiesDictionary(nullptr),
|
m_propertiesDictionary(nullptr),
|
||||||
m_shadingDictionary(nullptr),
|
m_shadingDictionary(nullptr),
|
||||||
m_patternDictionary(nullptr),
|
m_patternDictionary(nullptr),
|
||||||
|
m_procedureSets(NoProcSet),
|
||||||
m_textBeginEndState(0),
|
m_textBeginEndState(0),
|
||||||
m_compatibilityBeginEndState(0),
|
m_compatibilityBeginEndState(0),
|
||||||
m_drawingUncoloredTilingPatternState(0),
|
m_drawingUncoloredTilingPatternState(0),
|
||||||
@ -3501,7 +3539,8 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::PDFPageContentProces
|
|||||||
m_extendedGraphicStateDictionary(processor->m_extendedGraphicStateDictionary),
|
m_extendedGraphicStateDictionary(processor->m_extendedGraphicStateDictionary),
|
||||||
m_propertiesDictionary(processor->m_propertiesDictionary),
|
m_propertiesDictionary(processor->m_propertiesDictionary),
|
||||||
m_shadingDictionary(processor->m_shadingDictionary),
|
m_shadingDictionary(processor->m_shadingDictionary),
|
||||||
m_patternDictionary(processor->m_patternDictionary)
|
m_patternDictionary(processor->m_patternDictionary),
|
||||||
|
m_procedureSets(processor->m_procedureSets)
|
||||||
{
|
{
|
||||||
m_processor->operatorSaveGraphicState();
|
m_processor->operatorSaveGraphicState();
|
||||||
}
|
}
|
||||||
@ -3516,6 +3555,7 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::~PDFPageContentProce
|
|||||||
m_processor->m_propertiesDictionary = m_propertiesDictionary;
|
m_processor->m_propertiesDictionary = m_propertiesDictionary;
|
||||||
m_processor->m_shadingDictionary = m_shadingDictionary;
|
m_processor->m_shadingDictionary = m_shadingDictionary;
|
||||||
m_processor->m_patternDictionary = m_patternDictionary;
|
m_processor->m_patternDictionary = m_patternDictionary;
|
||||||
|
m_processor->m_procedureSets = m_procedureSets;
|
||||||
|
|
||||||
m_processor->operatorRestoreGraphicState();
|
m_processor->operatorRestoreGraphicState();
|
||||||
}
|
}
|
||||||
|
@ -197,6 +197,18 @@ public:
|
|||||||
Invalid ///< Invalid operator, use for error reporting
|
Invalid ///< Invalid operator, use for error reporting
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ProcedureSet
|
||||||
|
{
|
||||||
|
EmptyProcSet = 0x0000,
|
||||||
|
NoProcSet = 0x0001,
|
||||||
|
PDF = 0x0002,
|
||||||
|
Text = 0x0004,
|
||||||
|
ImageB = 0x0008,
|
||||||
|
ImageC = 0x0010,
|
||||||
|
ImageI = 0x0020
|
||||||
|
};
|
||||||
|
Q_DECLARE_FLAGS(ProcedureSets, ProcedureSet)
|
||||||
|
|
||||||
/// Process the contents of the page
|
/// Process the contents of the page
|
||||||
QList<PDFRenderError> processContents();
|
QList<PDFRenderError> processContents();
|
||||||
|
|
||||||
@ -547,6 +559,10 @@ protected:
|
|||||||
/// Returns page bounding rectangle in device space
|
/// Returns page bounding rectangle in device space
|
||||||
const QRectF& getPageBoundingRectDeviceSpace() const { return m_pageBoundingRectDeviceSpace; }
|
const QRectF& getPageBoundingRectDeviceSpace() const { return m_pageBoundingRectDeviceSpace; }
|
||||||
|
|
||||||
|
/// Returns current procedure sets. Procedure sets are deprecated in PDF 2.0 and are here
|
||||||
|
/// only for compatibility purposes. See chapter 14.2 in PDF 2.0 specification.
|
||||||
|
ProcedureSets getProcedureSets() const { return m_procedureSets; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Initializes the resources dictionaries
|
/// Initializes the resources dictionaries
|
||||||
void initDictionaries(const PDFObject& resourcesObject);
|
void initDictionaries(const PDFObject& resourcesObject);
|
||||||
@ -621,6 +637,7 @@ private:
|
|||||||
const PDFDictionary* m_propertiesDictionary;
|
const PDFDictionary* m_propertiesDictionary;
|
||||||
const PDFDictionary* m_shadingDictionary;
|
const PDFDictionary* m_shadingDictionary;
|
||||||
const PDFDictionary* m_patternDictionary;
|
const PDFDictionary* m_patternDictionary;
|
||||||
|
ProcedureSets m_procedureSets;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PDFPageContentProcessorGraphicStateSaveRestoreGuard
|
class PDFPageContentProcessorGraphicStateSaveRestoreGuard
|
||||||
@ -878,6 +895,7 @@ private:
|
|||||||
const PDFDictionary* m_propertiesDictionary;
|
const PDFDictionary* m_propertiesDictionary;
|
||||||
const PDFDictionary* m_shadingDictionary;
|
const PDFDictionary* m_shadingDictionary;
|
||||||
const PDFDictionary* m_patternDictionary;
|
const PDFDictionary* m_patternDictionary;
|
||||||
|
ProcedureSets m_procedureSets;
|
||||||
|
|
||||||
// Default color spaces
|
// Default color spaces
|
||||||
PDFColorSpacePointer m_deviceGrayColorSpace;
|
PDFColorSpacePointer m_deviceGrayColorSpace;
|
||||||
|
Reference in New Issue
Block a user