CID fonts

This commit is contained in:
Jakub Melka
2019-04-27 14:14:07 +02:00
parent c43bd073fe
commit 56a189d7af
203 changed files with 412265 additions and 85 deletions

View File

@@ -60,6 +60,7 @@ PDFWidget::~PDFWidget()
void PDFWidget::setDocument(const PDFDocument* document)
{
m_proxy->setDocument(document);
m_pageRenderingErrors.clear();
}
int PDFWidget::getPageRenderingErrorCount() const

View File

@@ -29,6 +29,7 @@
#include <QMutex>
#include <QPainterPath>
#include <QDataStream>
#ifdef Q_OS_WIN
#include "Windows.h"
@@ -314,7 +315,7 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ
textSequence.items.reserve(textSequence.items.size() + byteArray.size());
for (int i = 0, count = byteArray.size(); i < count; ++i)
{
unsigned int glyphIndex = (*glyphIndices)[static_cast<uint8_t>(byteArray[i])];
GID glyphIndex = (*glyphIndices)[static_cast<uint8_t>(byteArray[i])];
if (!glyphIndex)
{
@@ -336,6 +337,33 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ
break;
}
case FontType::Type0:
{
Q_ASSERT(dynamic_cast<PDFType0Font*>(m_parentFont.get()));
const PDFType0Font* font = static_cast<PDFType0Font*>(m_parentFont.get());
const PDFFontCMap* cmap = font->getCMap();
const PDFCIDtoGIDMapper* CIDtoGIDmapper = font->getCIDtoGIDMapper();
std::vector<CID> cids = cmap->interpret(byteArray);
textSequence.items.reserve(textSequence.items.size() + cids.size());
for (CID cid : cids)
{
GID glyphIndex = CIDtoGIDmapper->map(cid);
if (!glyphIndex)
{
throw PDFParserException(PDFTranslationContext::tr("Glyph for composite font character not found."));
}
// TODO: Dodelat mapovani na unicode
const Glyph& glyph = getGlyph(glyphIndex);
textSequence.items.emplace_back(&glyph.glyph, QChar(), glyph.advance);
}
break;
}
default:
{
// Unhandled font type
@@ -373,7 +401,7 @@ int PDFRealizedFontImpl::outlineCubicTo(const FT_Vector* control1, const FT_Vect
return 0;
}
const PDFRealizedFontImpl::Glyph&PDFRealizedFontImpl::getGlyph(unsigned int glyphIndex)
const PDFRealizedFontImpl::Glyph& PDFRealizedFontImpl::getGlyph(unsigned int glyphIndex)
{
QMutexLocker lock(&m_mutex);
@@ -488,58 +516,10 @@ PDFRealizedFontPointer PDFRealizedFont::createRealizedFont(PDFFontPointer font,
return result;
}
PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* document)
FontDescriptor PDFFont::readFontDescriptor(const PDFObject& fontDescriptorObject, const PDFDocument* document)
{
const PDFObject& dereferencedFontDictionary = document->getObject(object);
if (!dereferencedFontDictionary.isDictionary())
{
throw PDFParserException(PDFTranslationContext::tr("Font object must be a dictionary."));
}
const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary();
PDFDocumentDataLoaderDecorator fontLoader(document);
// TODO: Fonts - implement all types of the font
// First, determine the font subtype
constexpr const std::array<std::pair<const char*, FontType>, 2> fontTypes = {
std::pair<const char*, FontType>{ "Type1", FontType::Type1 },
std::pair<const char*, FontType>{ "TrueType", FontType::TrueType }
};
const FontType fontType = fontLoader.readEnumByName(fontDictionary->get("Subtype"), fontTypes.cbegin(), fontTypes.cend(), FontType::Invalid);
if (fontType == FontType::Invalid)
{
throw PDFParserException(PDFTranslationContext::tr("Invalid font type."));
}
QByteArray name = fontLoader.readNameFromDictionary(fontDictionary, "Name");
QByteArray baseFont = fontLoader.readNameFromDictionary(fontDictionary, "BaseFont");
const PDFInteger firstChar = fontLoader.readIntegerFromDictionary(fontDictionary, "FirstChar", 0);
const PDFInteger lastChar = fontLoader.readIntegerFromDictionary(fontDictionary, "LastChar", 255);
std::vector<PDFInteger> widths = fontLoader.readIntegerArrayFromDictionary(fontDictionary, "Widths");
// Read standard font
constexpr const std::array<std::pair<const char*, StandardFontType>, 14> standardFonts = {
std::pair<const char*, StandardFontType>{ "Times-Roman", StandardFontType::TimesRoman },
std::pair<const char*, StandardFontType>{ "Times-Bold", StandardFontType::TimesRomanBold },
std::pair<const char*, StandardFontType>{ "Times-Italic", StandardFontType::TimesRomanItalics },
std::pair<const char*, StandardFontType>{ "Times-BoldItalic", StandardFontType::TimesRomanBoldItalics },
std::pair<const char*, StandardFontType>{ "Helvetica", StandardFontType::Helvetica },
std::pair<const char*, StandardFontType>{ "Helvetica-Bold", StandardFontType::HelveticaBold },
std::pair<const char*, StandardFontType>{ "Helvetica-Oblique", StandardFontType::HelveticaOblique },
std::pair<const char*, StandardFontType>{ "Helvetica-BoldOblique", StandardFontType::HelveticaBoldOblique },
std::pair<const char*, StandardFontType>{ "Courier", StandardFontType::Courier },
std::pair<const char*, StandardFontType>{ "Courier-Bold", StandardFontType::CourierBold },
std::pair<const char*, StandardFontType>{ "Courier-Oblique", StandardFontType::CourierOblique },
std::pair<const char*, StandardFontType>{ "Courier-BoldOblique", StandardFontType::CourierBoldOblique },
std::pair<const char*, StandardFontType>{ "Symbol", StandardFontType::Symbol },
std::pair<const char*, StandardFontType>{ "ZapfDingbats", StandardFontType::ZapfDingbats }
};
const StandardFontType standardFont = fontLoader.readEnumByName(fontDictionary->get("BaseFont"), standardFonts.cbegin(), standardFonts.cend(), StandardFontType::Invalid);
// Read Font Descriptor
FontDescriptor fontDescriptor;
const PDFObject& fontDescriptorObject = document->getObject(fontDictionary->get("FontDescriptor"));
PDFDocumentDataLoaderDecorator fontLoader(document);
if (fontDescriptorObject.isDictionary())
{
const PDFDictionary* fontDescriptorDictionary = fontDescriptorObject.getDictionary();
@@ -590,6 +570,63 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d
loadStream(fontDescriptor.fontFile3, "FontFile3");
}
return fontDescriptor;
}
PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* document)
{
const PDFObject& dereferencedFontDictionary = document->getObject(object);
if (!dereferencedFontDictionary.isDictionary())
{
throw PDFParserException(PDFTranslationContext::tr("Font object must be a dictionary."));
}
const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary();
PDFDocumentDataLoaderDecorator fontLoader(document);
// TODO: Fonts - implement all types of the font
// First, determine the font subtype
constexpr const std::array<std::pair<const char*, FontType>, 3> fontTypes = {
std::pair<const char*, FontType>{ "Type0", FontType::Type0 },
std::pair<const char*, FontType>{ "Type1", FontType::Type1 },
std::pair<const char*, FontType>{ "TrueType", FontType::TrueType }
};
const FontType fontType = fontLoader.readEnumByName(fontDictionary->get("Subtype"), fontTypes.cbegin(), fontTypes.cend(), FontType::Invalid);
if (fontType == FontType::Invalid)
{
throw PDFParserException(PDFTranslationContext::tr("Invalid font type."));
}
QByteArray name = fontLoader.readNameFromDictionary(fontDictionary, "Name");
QByteArray baseFont = fontLoader.readNameFromDictionary(fontDictionary, "BaseFont");
const PDFInteger firstChar = fontLoader.readIntegerFromDictionary(fontDictionary, "FirstChar", 0);
const PDFInteger lastChar = fontLoader.readIntegerFromDictionary(fontDictionary, "LastChar", 255);
std::vector<PDFInteger> widths = fontLoader.readIntegerArrayFromDictionary(fontDictionary, "Widths");
// Read standard font
constexpr const std::array<std::pair<const char*, StandardFontType>, 14> standardFonts = {
std::pair<const char*, StandardFontType>{ "Times-Roman", StandardFontType::TimesRoman },
std::pair<const char*, StandardFontType>{ "Times-Bold", StandardFontType::TimesRomanBold },
std::pair<const char*, StandardFontType>{ "Times-Italic", StandardFontType::TimesRomanItalics },
std::pair<const char*, StandardFontType>{ "Times-BoldItalic", StandardFontType::TimesRomanBoldItalics },
std::pair<const char*, StandardFontType>{ "Helvetica", StandardFontType::Helvetica },
std::pair<const char*, StandardFontType>{ "Helvetica-Bold", StandardFontType::HelveticaBold },
std::pair<const char*, StandardFontType>{ "Helvetica-Oblique", StandardFontType::HelveticaOblique },
std::pair<const char*, StandardFontType>{ "Helvetica-BoldOblique", StandardFontType::HelveticaBoldOblique },
std::pair<const char*, StandardFontType>{ "Courier", StandardFontType::Courier },
std::pair<const char*, StandardFontType>{ "Courier-Bold", StandardFontType::CourierBold },
std::pair<const char*, StandardFontType>{ "Courier-Oblique", StandardFontType::CourierOblique },
std::pair<const char*, StandardFontType>{ "Courier-BoldOblique", StandardFontType::CourierBoldOblique },
std::pair<const char*, StandardFontType>{ "Symbol", StandardFontType::Symbol },
std::pair<const char*, StandardFontType>{ "ZapfDingbats", StandardFontType::ZapfDingbats }
};
const StandardFontType standardFont = fontLoader.readEnumByName(fontDictionary->get("BaseFont"), standardFonts.cbegin(), standardFonts.cend(), StandardFontType::Invalid);
// Read Font Descriptor
const PDFObject& fontDescriptorObject = document->getObject(fontDictionary->get("FontDescriptor"));
FontDescriptor fontDescriptor = readFontDescriptor(fontDescriptorObject, document);
// Read Font Encoding
// The font encoding for the simple font is determined by this algorithm:
// 1) Try to use Encoding dictionary to determine base encoding
@@ -846,6 +883,66 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d
break;
}
case FontType::Type0:
{
// This is composite font (CID keyed font)
// Load CMAP
PDFFontCMap cmap;
const PDFObject& cmapObject = document->getObject(fontDictionary->get("Encoding"));
if (cmapObject.isName())
{
cmap = PDFFontCMap::createFromName(cmapObject.getString());
}
else if (cmapObject.isStream())
{
const PDFStream* stream = cmapObject.getStream();
QByteArray decodedStream = document->getDecodedStream(stream);
cmap = PDFFontCMap::createFromData(decodedStream);
}
if (!cmap.isValid())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid CMAP in CID-keyed font."));
}
const PDFObject& descendantFonts = document->getObject(fontDictionary->get("DescendantFonts"));
if (!descendantFonts.isArray())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid descendant font in CID-keyed font."));
}
const PDFArray* descendantFontsArray = descendantFonts.getArray();
if (descendantFontsArray->getCount() != 1)
{
throw PDFParserException(PDFTranslationContext::tr("Invalid number (%1) of descendant fonts in CID-keyed font - exactly one is required.").arg(descendantFontsArray->getCount()));
}
const PDFObject& descendantFont = document->getObject(descendantFontsArray->getItem(0));
if (!descendantFont.isDictionary())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid descendant font in CID-keyed font."));
}
const PDFDictionary* descendantFontDictionary = descendantFont.getDictionary();
const PDFObject& fontDescriptorObjectForCompositeFont = document->getObject(descendantFontDictionary->get("FontDescriptor"));
fontDescriptor = readFontDescriptor(fontDescriptorObjectForCompositeFont, document);
QByteArray cidToGidMapping;
const PDFObject& cidToGidMappingObject = document->getObject(descendantFontDictionary->get("CIDtoGIDMap"));
if (cidToGidMappingObject.isStream())
{
const PDFStream* cidToGidMappingStream = cidToGidMappingObject.getStream();
cidToGidMapping = document->getDecodedStream(cidToGidMappingStream);
}
PDFCIDtoGIDMapper cidToGidMapper(qMove(cidToGidMapping));
baseFont = fontLoader.readNameFromDictionary(descendantFontDictionary, "BaseFont");
return PDFFontPointer(new PDFType0Font(qMove(fontDescriptor), qMove(cmap), qMove(cidToGidMapper)));
}
default:
{
Q_ASSERT(false);
@@ -896,24 +993,6 @@ PDFSimpleFont::PDFSimpleFont(FontDescriptor fontDescriptor,
}
PDFRealizedFontPointer PDFSimpleFont::getRealizedFont(PDFFontPointer font, PDFReal fontSize) const
{
return PDFRealizedFont::createRealizedFont(font, fontSize);
}
QString PDFSimpleFont::getTextUsingEncoding(const QByteArray& byteArray) const
{
QString string;
string.resize(byteArray.size(), QChar());
for (int i = 0, count = byteArray.size(); i < count; ++i)
{
string[i] = m_encoding[static_cast<uint8_t>(byteArray[i])];
}
return string;
}
PDFType1Font::PDFType1Font(FontDescriptor fontDescriptor,
QByteArray name,
QByteArray baseFont,
@@ -992,7 +1071,7 @@ PDFRealizedFontPointer PDFFontCache::getRealizedFont(const PDFFontPointer& font,
if (it == m_realizedFontCache.cend())
{
// We must create the realized font
PDFRealizedFontPointer realizedFont = font->getRealizedFont(font, size);
PDFRealizedFontPointer realizedFont = PDFRealizedFont::createRealizedFont(font, size);
if (m_realizedFontCache.size() >= m_realizedFontCacheLimit)
{
@@ -1023,4 +1102,325 @@ const QByteArray* FontDescriptor::getEmbeddedFontData() const
return nullptr;
}
PDFFontCMap PDFFontCMap::createFromName(const QByteArray& name)
{
QFile file(QString(":/cmaps/%1").arg(QString::fromLatin1(name)));
if (file.exists())
{
QByteArray data;
if (file.open(QFile::ReadOnly))
{
data = file.readAll();
file.close();
}
return createFromData(data);
}
throw PDFParserException(PDFTranslationContext::tr("Can't load CID font mapping named '%1'.").arg(QString::fromLatin1(name)));
return PDFFontCMap();
}
PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data)
{
Entries entries;
entries.reserve(1024); // Arbitrary number, we have enough memory, better than perform reallocation each time
std::vector<PDFFontCMap> additionalMappings;
PDFLexicalAnalyzer parser(data.constBegin(), data.constEnd());
bool vertical = false;
PDFLexicalAnalyzer::Token previousToken;
while (!parser.isAtEnd())
{
PDFLexicalAnalyzer::Token token = parser.fetch();
if (token.type == PDFLexicalAnalyzer::TokenType::Name && token.data.toByteArray() == "WMode")
{
PDFLexicalAnalyzer::Token valueToken = parser.fetch();
vertical = valueToken.type == PDFLexicalAnalyzer::TokenType::Integer && valueToken.data.value<PDFInteger>() == 1;
continue;
}
auto fetchCode = [] (const PDFLexicalAnalyzer::Token& currentToken) -> std::pair<unsigned int, unsigned int>
{
if (currentToken.type == PDFLexicalAnalyzer::TokenType::String)
{
QByteArray byteArray = currentToken.data.toByteArray();
unsigned int codeValue = 0;
for (int i = 0; i < byteArray.size(); ++i)
{
codeValue = (codeValue << 8) + static_cast<unsigned char>(byteArray[i]);
}
return std::make_pair(codeValue, byteArray.size());
}
throw PDFParserException(PDFTranslationContext::tr("Can't fetch code from CMap definition."));
return std::pair<unsigned int, unsigned int>();
};
auto fetchCID = [&parser] (const PDFLexicalAnalyzer::Token& currentToken) -> CID
{
if (currentToken.type == PDFLexicalAnalyzer::TokenType::Integer)
{
return currentToken.data.value<PDFInteger>();
}
throw PDFParserException(PDFTranslationContext::tr("Can't fetch CID from CMap definition."));
return 0;
};
if (token.type == PDFLexicalAnalyzer::TokenType::Command)
{
QByteArray command = token.data.toByteArray();
if (command == "usecmap")
{
if (previousToken.type == PDFLexicalAnalyzer::TokenType::Name)
{
additionalMappings.emplace_back(createFromName(previousToken.data.toByteArray()));
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Can't use cmap inside cmap file."));
}
}
else if (command == "begincidrange")
{
while (true)
{
PDFLexicalAnalyzer::Token token1 = parser.fetch();
if (token1.type == PDFLexicalAnalyzer::TokenType::Command &&
token1.data.toByteArray() == "endcidrange")
{
break;
}
PDFLexicalAnalyzer::Token token2 = parser.fetch();
PDFLexicalAnalyzer::Token token3 = parser.fetch();
std::pair<unsigned int, unsigned int> from = fetchCode(token1);
std::pair<unsigned int, unsigned int> to = fetchCode(token2);
CID cid = fetchCID(token3);
entries.emplace_back(from.first, to.first, qMax(from.second, to.second), cid);
}
}
else if (command == "begincidchar")
{
while (true)
{
PDFLexicalAnalyzer::Token token1 = parser.fetch();
if (token1.type == PDFLexicalAnalyzer::TokenType::Command &&
token1.data.toByteArray() == "endcidchar")
{
break;
}
PDFLexicalAnalyzer::Token token2 = parser.fetch();
std::pair<unsigned int, unsigned int> code = fetchCode(token1);
CID cid = fetchCID(token2);
entries.emplace_back(code.first, code.first, code.second, cid);
}
}
}
previousToken = token;
}
std::sort(entries.begin(), entries.end());
entries = optimize(entries);
if (!additionalMappings.empty())
{
for (const PDFFontCMap& map : additionalMappings)
{
entries.insert(entries.cend(), map.m_entries.cbegin(), map.m_entries.cend());
}
}
return PDFFontCMap(qMove(entries), vertical);
}
QByteArray PDFFontCMap::serialize() const
{
QByteArray result;
{
QDataStream stream(&result, QIODevice::WriteOnly);
stream << m_maxKeyLength;
stream << m_vertical;
stream << m_entries.size();
for (const Entry& entry : m_entries)
{
stream << entry.from;
stream << entry.to;
stream << entry.byteCount;
stream << entry.cid;
}
}
return qCompress(result, 9);
}
PDFFontCMap PDFFontCMap::deserialize(const QByteArray& byteArray)
{
PDFFontCMap result;
QByteArray decompressed = qUncompress(byteArray);
QDataStream stream(&decompressed, QIODevice::ReadOnly);
stream >> result.m_maxKeyLength;
stream >> result.m_vertical;
Entries::size_type size = 0;
stream >> size;
result.m_entries.reserve(size);
for (Entries::size_type i = 0; i < size; ++i)
{
Entry entry;
stream >> entry.from;
stream >> entry.to;
stream >> entry.byteCount;
stream >> entry.cid;
result.m_entries.push_back(entry);
}
return result;
}
std::vector<CID> PDFFontCMap::interpret(const QByteArray& byteArray) const
{
std::vector<CID> result;
result.reserve(byteArray.size() / m_maxKeyLength);
unsigned int value = 0;
int scannedBytes = 0;
for (int i = 0, size = byteArray.size(); i < size; ++i)
{
value = (value << 8) + static_cast<unsigned char>(byteArray[i]);
++scannedBytes;
// Find suitable mapping
auto it = std::find_if(m_entries.cbegin(), m_entries.cend(), [value, scannedBytes](const Entry& entry) { return entry.from <= value && entry.to >= value && entry.byteCount == scannedBytes; });
if (it != m_entries.cend())
{
const Entry& entry = *it;
const CID cid = value - entry.from + entry.cid;
result.push_back(cid);
value = 0;
scannedBytes = 0;
}
else if (scannedBytes == m_maxKeyLength)
{
// This means error occured - fill empty CID
result.push_back(0);
value = 0;
scannedBytes = 0;
}
}
return result;
}
PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) :
m_entries(qMove(entries)),
m_maxKeyLength(0),
m_vertical(vertical)
{
m_maxKeyLength = std::accumulate(m_entries.cbegin(), m_entries.cend(), 0, [](unsigned int a, const Entry& b) { return qMax(a, b.byteCount); });
}
PDFFontCMap::Entries PDFFontCMap::optimize(const PDFFontCMap::Entries& entries)
{
Entries result;
result.reserve(entries.size());
if (!entries.empty())
{
Entry current = entries.front();
for (size_t i = 1, count = entries.size(); i < count; ++i)
{
Entry toMerge = entries[i];
if (current.canMerge(toMerge))
{
current = current.merge(toMerge);
}
else
{
result.emplace_back(current);
current = toMerge;
}
}
result.emplace_back(current);
}
result.shrink_to_fit();
return result;
}
PDFFontCMapRepository* PDFFontCMapRepository::getInstance()
{
static PDFFontCMapRepository repository;
return &repository;
}
void PDFFontCMapRepository::saveToFile(const QString& fileName) const
{
QFile file(fileName);
if (file.open(QFile::WriteOnly | QFile::Truncate))
{
size_t size = m_cmaps.size();
{
QDataStream stream(&file);
stream << size;
for (const auto& item : m_cmaps)
{
stream << item.first;
stream << item.second;
}
}
file.close();
}
}
bool PDFFontCMapRepository::loadFromFile(const QString& fileName)
{
QFile file(fileName);
if (file.open(QFile::ReadOnly))
{
{
QDataStream stream(&file);
size_t size = 0;
stream >> size;
for (size_t i = 0; i < size; ++i)
{
QByteArray key;
QByteArray value;
stream >> key;
stream >> value;
m_cmaps[qMove(key)] = qMove(value);
}
}
file.close();
return true;
}
return false;
}
PDFFontCMapRepository::PDFFontCMapRepository()
{
}
} // namespace pdf

View File

@@ -31,7 +31,10 @@ namespace pdf
{
class PDFDocument;
using GlyphIndices = std::array<unsigned int, 256>;
using CID = unsigned int;
using GID = unsigned int;
using GlyphIndices = std::array<GID, 256>;
enum class TextRenderingMode
{
@@ -52,7 +55,7 @@ struct TextSequenceItem
inline explicit TextSequenceItem(const QPainterPath* glyph, QChar character, PDFReal advance) : glyph(glyph), character(character), advance(advance) { }
inline explicit TextSequenceItem(PDFReal advance) : character(), advance(advance) { }
inline bool isCharacter() const { return !character.isNull(); }
inline bool isCharacter() const { return glyph; }
inline bool isAdvance() const { return advance != 0.0; }
inline bool isNull() const { return !isCharacter() && !isAdvance(); }
@@ -114,6 +117,7 @@ constexpr bool isTextRenderingModeClipped(TextRenderingMode mode)
enum class FontType
{
Invalid,
Type0,
Type1,
TrueType
};
@@ -239,15 +243,6 @@ public:
/// Returns the font type
virtual FontType getFontType() const = 0;
/// Realizes the font (physical materialization of the font using pixel size,
/// if font can't be realized, then exception is thrown).
/// \param fontSize Size of the font
virtual PDFRealizedFontPointer getRealizedFont(PDFFontPointer font, PDFReal fontSize) const = 0;
/// Returns text using the font encoding
/// \param byteArray Byte array with encoded string
virtual QString getTextUsingEncoding(const QByteArray& byteArray) const = 0;
/// Returns font descriptor
const FontDescriptor* getFontDescriptor() const { return &m_fontDescriptor; }
@@ -258,6 +253,12 @@ public:
protected:
FontDescriptor m_fontDescriptor;
private:
/// Tries to read font descriptor from the object
/// \param fontDescriptorObject Font descriptor dictionary
/// \param document Document
static FontDescriptor readFontDescriptor(const PDFObject& fontDescriptorObject, const PDFDocument* document);
};
/// Simple font, see PDF reference 1.7, chapter 5.5. Simple fonts have encoding table,
@@ -276,9 +277,6 @@ public:
GlyphIndices glyphIndices);
virtual ~PDFSimpleFont() override = default;
virtual PDFRealizedFontPointer getRealizedFont(PDFFontPointer font, PDFReal fontSize) const override;
virtual QString getTextUsingEncoding(const QByteArray& byteArray) const override;
const encoding::EncodingTable* getEncoding() const { return &m_encoding; }
const GlyphIndices* getGlyphIndices() const { return &m_glyphIndices; }
@@ -362,6 +360,153 @@ private:
mutable std::map<std::pair<PDFFontPointer, PDFReal>, PDFRealizedFontPointer> m_realizedFontCache;
};
/// Performs mapping from CID to GID (even identity mapping, if byte array is empty)
class PDFCIDtoGIDMapper
{
public:
explicit inline PDFCIDtoGIDMapper(QByteArray&& mapping) : m_mapping(qMove(mapping)) { }
/// Maps CID to GID (glyph identifier)
GID map(CID cid) const
{
if (m_mapping.isEmpty())
{
// This means identity mapping
return cid;
}
else if ((2 * cid + 1) < CID(m_mapping.size()))
{
return (GID(m_mapping[2 * cid]) << 8) + GID(m_mapping[2 * cid + 1]);
}
// This should occur only in case of bad (damaged) PDF file - because in this case,
// encoding is missing. Return invalid glyph index.
return 0;
}
private:
QByteArray m_mapping;
};
/// Represents a font CMAP (mapping of CIDs)
class PDFFORQTLIBSHARED_EXPORT PDFFontCMap
{
public:
constexpr explicit PDFFontCMap() = default;
/// Returns true, if mapping is valid
bool isValid() const { return !m_entries.empty(); }
/// Creates mapping from name (name must be one of predefined names)
static PDFFontCMap createFromName(const QByteArray& name);
/// Creates mapping from data (data must be a byte array containing the CMap)
static PDFFontCMap createFromData(const QByteArray& data);
/// Serializes the CMap to the byte array
QByteArray serialize() const;
/// Deserializes the CMap from the byte array
static PDFFontCMap deserialize(const QByteArray& byteArray);
/// Converts byte array to array of CIDs
std::vector<CID> interpret(const QByteArray& byteArray) const;
private:
struct Entry
{
constexpr explicit inline Entry() = default;
constexpr explicit inline Entry(unsigned int from, unsigned int to, unsigned int byteCount, CID cid) : from(from), to(to), byteCount(byteCount), cid(cid) { }
unsigned int from = 0;
unsigned int to = 0;
unsigned int byteCount = 0;
CID cid = 0;
// Can merge from other CID entry?
bool canMerge(const Entry& other) const
{
const bool sameBytes = byteCount == other.byteCount;
const bool compatibleRange = (to + 1) == other.from;
const bool compatibleCID = (cid + to + 1) - from == other.cid;
return sameBytes && compatibleRange && compatibleCID;
}
inline constexpr Entry merge(const Entry& other) const
{
return Entry(from, other.to, byteCount, cid);
}
inline constexpr bool operator<(const Entry& other) const
{
return std::tie(byteCount, from) < std::tie(other.byteCount, other.from);
}
};
using Entries = std::vector<Entry>;
explicit PDFFontCMap(Entries&& entries, bool vertical);
/// Optimizes the entries - merges entries, which can be merged. This function
/// requires, that entries are sorted.
static Entries optimize(const Entries& entries);
Entries m_entries;
unsigned int m_maxKeyLength = 0;
bool m_vertical = false;
};
/// Composite font (CID-keyed font)
class PDFType0Font : public PDFFont
{
public:
explicit inline PDFType0Font(FontDescriptor fontDescriptor, PDFFontCMap cmap, PDFCIDtoGIDMapper mapper) :
PDFFont(qMove(fontDescriptor)),
m_cmap(qMove(cmap)),
m_mapper(qMove(mapper))
{
}
virtual ~PDFType0Font() = default;
virtual FontType getFontType() const override { return FontType::Type0; }
const PDFFontCMap* getCMap() const { return &m_cmap; }
const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; }
private:
PDFFontCMap m_cmap;
PDFCIDtoGIDMapper m_mapper;
};
/// Repository with predefined CMaps
class PDFFORQTLIBSHARED_EXPORT PDFFontCMapRepository
{
public:
/// Returns instance of CMAP repository
static PDFFontCMapRepository* getInstance();
/// Adds CMAP to the repository
void add(const QByteArray& key, QByteArray value) { m_cmaps[key] = qMove(value); }
/// Clears the repository
void clear() { m_cmaps.clear(); }
/// Saves the repository content to the file
void saveToFile(const QString& fileName) const;
/// Loads the repository content from the file
bool loadFromFile(const QString& fileName);
private:
explicit PDFFontCMapRepository();
/// Storage for predefined cmaps
std::map<QByteArray, QByteArray> m_cmaps;
};
} // namespace pdf
#endif // PDFFONT_H