mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
UTF-8 support (PDF 2.0 compliance) and ProcedureSets (for compatibility)
This commit is contained in:
@ -2193,6 +2193,10 @@ QString PDFEncoding::convertTextString(const QByteArray& stream)
|
||||
{
|
||||
return convertFromUnicode(stream);
|
||||
}
|
||||
else if (hasUTF8LeadMarkings(stream))
|
||||
{
|
||||
return QString::fromUtf8(stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
return convert(stream, Encoding::PDFDoc);
|
||||
@ -2348,4 +2352,20 @@ bool PDFEncoding::hasUnicodeLeadMarkings(const QByteArray& stream)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PDFEncoding::hasUTF8LeadMarkings(const QByteArray& stream)
|
||||
{
|
||||
if (stream.size() >= 3)
|
||||
{
|
||||
if (static_cast<unsigned char>(stream[0]) == 239 &&
|
||||
static_cast<unsigned char>(stream[1]) == 187 &&
|
||||
static_cast<unsigned char>(stream[2]) == 191)
|
||||
{
|
||||
// UTF-8
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -105,11 +105,18 @@ public:
|
||||
static const encoding::EncodingTable* getTableForEncoding(Encoding encoding);
|
||||
|
||||
private:
|
||||
/// Returns true, if byte array has UTF-16BE unicode marking bytes at the
|
||||
/// Returns true, if byte array has UTF-16BE/LE unicode marking bytes at the
|
||||
/// stream start. If they are present, then byte stream is probably encoded
|
||||
/// as unicode.
|
||||
/// \param stream Stream to be tested
|
||||
static bool hasUnicodeLeadMarkings(const QByteArray& stream);
|
||||
|
||||
/// Returns true, if byte array has UTF-8 unicode marking bytes at the stream
|
||||
/// start. If they are present, then byte stream is probably encoded
|
||||
/// as UTF-8 string.
|
||||
/// \note UTF-8 strings were added in PDF 2.0 specification
|
||||
/// \param stream Stream to be tested
|
||||
static bool hasUTF8LeadMarkings(const QByteArray& stream);
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -157,9 +157,9 @@ static constexpr const std::pair<const char*, PDFPageContentProcessor::Operator>
|
||||
|
||||
void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject)
|
||||
{
|
||||
auto getDictionary = [this, &resourcesObject](const char* resourceName) -> const pdf::PDFDictionary*
|
||||
const PDFObject& resources = m_document->getObject(resourcesObject);
|
||||
auto getDictionary = [this, &resources](const char* resourceName) -> const pdf::PDFDictionary*
|
||||
{
|
||||
const PDFObject& resources = m_document->getObject(resourcesObject);
|
||||
if (resources.isDictionary() && resources.getDictionary()->hasKey(resourceName))
|
||||
{
|
||||
const PDFObject& resourceDictionary = m_document->getObject(resources.getDictionary()->get(resourceName));
|
||||
@ -179,6 +179,43 @@ void PDFPageContentProcessor::initDictionaries(const PDFObject& resourcesObject)
|
||||
m_propertiesDictionary = getDictionary("Properties");
|
||||
m_shadingDictionary = getDictionary("Shading");
|
||||
m_patternDictionary = getDictionary("Pattern");
|
||||
m_procedureSets = NoProcSet;
|
||||
|
||||
if (resources.isDictionary() && resources.getDictionary()->hasKey("ProcSet"))
|
||||
{
|
||||
PDFDocumentDataLoaderDecorator loader(m_document);
|
||||
std::vector<QByteArray> procedureSetNames = loader.readNameArrayFromDictionary(resources.getDictionary(), "ProcSet");
|
||||
|
||||
ProcedureSets newProcSet = EmptyProcSet;
|
||||
for (const QByteArray& procedureSetName : procedureSetNames)
|
||||
{
|
||||
if (procedureSetName == "PDF")
|
||||
{
|
||||
newProcSet.setFlag(PDF);
|
||||
}
|
||||
else if (procedureSetName == "Text")
|
||||
{
|
||||
newProcSet.setFlag(Text);
|
||||
}
|
||||
else if (procedureSetName == "ImageB")
|
||||
{
|
||||
newProcSet.setFlag(ImageB);
|
||||
}
|
||||
else if (procedureSetName == "ImageC")
|
||||
{
|
||||
newProcSet.setFlag(ImageC);
|
||||
}
|
||||
else if (procedureSetName == "ImageI")
|
||||
{
|
||||
newProcSet.setFlag(ImageI);
|
||||
}
|
||||
}
|
||||
|
||||
if (newProcSet)
|
||||
{
|
||||
m_procedureSets = newProcSet;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page,
|
||||
@ -200,6 +237,7 @@ PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page,
|
||||
m_propertiesDictionary(nullptr),
|
||||
m_shadingDictionary(nullptr),
|
||||
m_patternDictionary(nullptr),
|
||||
m_procedureSets(NoProcSet),
|
||||
m_textBeginEndState(0),
|
||||
m_compatibilityBeginEndState(0),
|
||||
m_drawingUncoloredTilingPatternState(0),
|
||||
@ -3501,7 +3539,8 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::PDFPageContentProces
|
||||
m_extendedGraphicStateDictionary(processor->m_extendedGraphicStateDictionary),
|
||||
m_propertiesDictionary(processor->m_propertiesDictionary),
|
||||
m_shadingDictionary(processor->m_shadingDictionary),
|
||||
m_patternDictionary(processor->m_patternDictionary)
|
||||
m_patternDictionary(processor->m_patternDictionary),
|
||||
m_procedureSets(processor->m_procedureSets)
|
||||
{
|
||||
m_processor->operatorSaveGraphicState();
|
||||
}
|
||||
@ -3516,6 +3555,7 @@ PDFPageContentProcessor::PDFPageContentProcessorStateGuard::~PDFPageContentProce
|
||||
m_processor->m_propertiesDictionary = m_propertiesDictionary;
|
||||
m_processor->m_shadingDictionary = m_shadingDictionary;
|
||||
m_processor->m_patternDictionary = m_patternDictionary;
|
||||
m_processor->m_procedureSets = m_procedureSets;
|
||||
|
||||
m_processor->operatorRestoreGraphicState();
|
||||
}
|
||||
|
@ -197,6 +197,18 @@ public:
|
||||
Invalid ///< Invalid operator, use for error reporting
|
||||
};
|
||||
|
||||
enum ProcedureSet
|
||||
{
|
||||
EmptyProcSet = 0x0000,
|
||||
NoProcSet = 0x0001,
|
||||
PDF = 0x0002,
|
||||
Text = 0x0004,
|
||||
ImageB = 0x0008,
|
||||
ImageC = 0x0010,
|
||||
ImageI = 0x0020
|
||||
};
|
||||
Q_DECLARE_FLAGS(ProcedureSets, ProcedureSet)
|
||||
|
||||
/// Process the contents of the page
|
||||
QList<PDFRenderError> processContents();
|
||||
|
||||
@ -547,6 +559,10 @@ protected:
|
||||
/// Returns page bounding rectangle in device space
|
||||
const QRectF& getPageBoundingRectDeviceSpace() const { return m_pageBoundingRectDeviceSpace; }
|
||||
|
||||
/// Returns current procedure sets. Procedure sets are deprecated in PDF 2.0 and are here
|
||||
/// only for compatibility purposes. See chapter 14.2 in PDF 2.0 specification.
|
||||
ProcedureSets getProcedureSets() const { return m_procedureSets; }
|
||||
|
||||
private:
|
||||
/// Initializes the resources dictionaries
|
||||
void initDictionaries(const PDFObject& resourcesObject);
|
||||
@ -621,6 +637,7 @@ private:
|
||||
const PDFDictionary* m_propertiesDictionary;
|
||||
const PDFDictionary* m_shadingDictionary;
|
||||
const PDFDictionary* m_patternDictionary;
|
||||
ProcedureSets m_procedureSets;
|
||||
};
|
||||
|
||||
class PDFPageContentProcessorGraphicStateSaveRestoreGuard
|
||||
@ -878,6 +895,7 @@ private:
|
||||
const PDFDictionary* m_propertiesDictionary;
|
||||
const PDFDictionary* m_shadingDictionary;
|
||||
const PDFDictionary* m_patternDictionary;
|
||||
ProcedureSets m_procedureSets;
|
||||
|
||||
// Default color spaces
|
||||
PDFColorSpacePointer m_deviceGrayColorSpace;
|
||||
|
Reference in New Issue
Block a user