diff --git a/PdfForQtLib/sources/pdffont.cpp b/PdfForQtLib/sources/pdffont.cpp index 5d35c3c..23c9b0f 100644 --- a/PdfForQtLib/sources/pdffont.cpp +++ b/PdfForQtLib/sources/pdffont.cpp @@ -461,6 +461,7 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ const PDFType0Font* font = static_cast(m_parentFont.get()); const PDFFontCMap* cmap = font->getCMap(); + const PDFFontCMap* toUnicode = font->getToUnicode(); const PDFCIDtoGIDMapper* CIDtoGIDmapper = font->getCIDtoGIDMapper(); std::vector cids = cmap->interpret(byteArray); @@ -472,9 +473,9 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ if (glyphIndex) { - // TODO: Dodelat mapovani na unicode + QChar character = toUnicode->getToUnicode(cid); const Glyph& glyph = getGlyph(glyphIndex); - textSequence.items.emplace_back(&glyph.glyph, QChar(), glyph.advance); + textSequence.items.emplace_back(&glyph.glyph, character, glyph.advance); } else { @@ -717,7 +718,7 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary(); PDFDocumentDataLoaderDecorator fontLoader(document); - // TODO: Fonts - implement all types of the font + // TODO: Fonts - Implement Type 3 font // First, determine the font subtype constexpr const std::array, 3> fontTypes = { std::pair{ "Type0", FontType::Type0 }, @@ -1111,7 +1112,20 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d } } - return PDFFontPointer(new PDFType0Font(qMove(fontDescriptor), qMove(cmap), qMove(cidToGidMapper), defaultWidth, qMove(advances))); + PDFFontCMap toUnicodeCMap; + const PDFObject& toUnicode = document->getObject(fontDictionary->get("ToUnicode")); + if (toUnicode.isName()) + { + toUnicodeCMap = PDFFontCMap::createFromName(toUnicode.getString()); + } + else if (toUnicode.isStream()) + { + const PDFStream* stream = toUnicode.getStream(); + QByteArray decodedStream = document->getDecodedStream(stream); + toUnicodeCMap = PDFFontCMap::createFromData(decodedStream); + } + + return PDFFontPointer(new PDFType0Font(qMove(fontDescriptor), qMove(cmap), qMove(toUnicodeCMap), qMove(cidToGidMapper), defaultWidth, qMove(advances))); } default: @@ -1136,9 +1150,6 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d } } - // Read To Unicode - // TODO: Read To Unicode - return PDFFontPointer(); } @@ -1360,6 +1371,25 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data) return 0; }; + auto fetchUnicode = [&parser](const PDFLexicalAnalyzer::Token& currentToken) -> CID + { + if (currentToken.type == PDFLexicalAnalyzer::TokenType::String) + { + QByteArray byteArray = currentToken.data.toByteArray(); + + if (byteArray.size() == 2) + { + CID unicodeValue = 0; + for (int i = 0; i < byteArray.size(); ++i) + { + unicodeValue = (unicodeValue << 8) + static_cast(byteArray[i]); + } + } + } + + return 0; + }; + if (token.type == PDFLexicalAnalyzer::TokenType::Command) { QByteArray command = token.data.toByteArray(); @@ -1374,6 +1404,25 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data) throw PDFParserException(PDFTranslationContext::tr("Can't use cmap inside cmap file.")); } } + else if (command == "beginbfrange") + { + PDFLexicalAnalyzer::Token token1 = parser.fetch(); + + if (token1.type == PDFLexicalAnalyzer::TokenType::Command && + token1.data.toByteArray() == "endbfrange") + { + break; + } + + PDFLexicalAnalyzer::Token token2 = parser.fetch(); + PDFLexicalAnalyzer::Token token3 = parser.fetch(); + + std::pair from = fetchCode(token1); + std::pair to = fetchCode(token2); + CID cid = fetchUnicode(token3); + + entries.emplace_back(from.first, to.first, qMax(from.second, to.second), cid); + } else if (command == "begincidrange") { while (true) @@ -1413,6 +1462,26 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data) std::pair code = fetchCode(token1); CID cid = fetchCID(token2); + entries.emplace_back(code.first, code.first, code.second, cid); + } + } + else if (command == "beginbfchar") + { + while (true) + { + PDFLexicalAnalyzer::Token token1 = parser.fetch(); + + if (token1.type == PDFLexicalAnalyzer::TokenType::Command && + token1.data.toByteArray() == "endbfchar") + { + break; + } + + PDFLexicalAnalyzer::Token token2 = parser.fetch(); + + std::pair code = fetchCode(token1); + CID cid = fetchUnicode(token2); + entries.emplace_back(code.first, code.first, code.second, cid); } } @@ -1516,6 +1585,22 @@ std::vector PDFFontCMap::interpret(const QByteArray& byteArray) const return result; } +QChar PDFFontCMap::getToUnicode(CID cid) const +{ + if (isValid()) + { + auto it = std::find_if(m_entries.cbegin(), m_entries.cend(), [cid](const Entry& entry) { return entry.from <= cid && entry.to >= cid; }); + if (it != m_entries.cend()) + { + const Entry& entry = *it; + const CID unicodeCID = cid - entry.from + entry.cid; + return QChar(unicodeCID); + } + } + + return QChar(); +} + PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) : m_entries(qMove(entries)), m_maxKeyLength(0), diff --git a/PdfForQtLib/sources/pdffont.h b/PdfForQtLib/sources/pdffont.h index 5dfc6dc..6e6f4da 100644 --- a/PdfForQtLib/sources/pdffont.h +++ b/PdfForQtLib/sources/pdffont.h @@ -419,6 +419,9 @@ public: /// Converts byte array to array of CIDs std::vector interpret(const QByteArray& byteArray) const; + /// Converts CID to QChar, use only on ToUnicode CMaps + QChar getToUnicode(CID cid) const; + private: struct Entry @@ -468,9 +471,10 @@ private: class PDFType0Font : public PDFFont { public: - explicit inline PDFType0Font(FontDescriptor fontDescriptor, PDFFontCMap cmap, PDFCIDtoGIDMapper mapper, PDFReal defaultAdvance, std::unordered_map advances) : + explicit inline PDFType0Font(FontDescriptor fontDescriptor, PDFFontCMap cmap, PDFFontCMap toUnicode, PDFCIDtoGIDMapper mapper, PDFReal defaultAdvance, std::unordered_map advances) : PDFFont(qMove(fontDescriptor)), m_cmap(qMove(cmap)), + m_toUnicode(qMove(toUnicode)), m_mapper(qMove(mapper)), m_defaultAdvance(defaultAdvance), m_advances(qMove(advances)) @@ -483,6 +487,7 @@ public: virtual FontType getFontType() const override { return FontType::Type0; } const PDFFontCMap* getCMap() const { return &m_cmap; } + const PDFFontCMap* getToUnicode() const { return &m_toUnicode; } const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; } /// Returns the glyph advance, if it can be obtained, or zero, if it cannot @@ -492,6 +497,7 @@ public: private: PDFFontCMap m_cmap; + PDFFontCMap m_toUnicode; PDFCIDtoGIDMapper m_mapper; PDFReal m_defaultAdvance; std::unordered_map m_advances; diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp index 8f2f996..959c711 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp @@ -1753,7 +1753,6 @@ void PDFPageContentProcessor::operatorTextSetSpacingAndShowText(PDFReal t_w, PDF void PDFPageContentProcessor::drawText(const TextSequence& textSequence) { - // TODO: Kdyz nejsme v text rezimu, tak nekreslime text if (textSequence.items.empty()) { // Do not display empty text