From 9c260bb75a7d14274183d8c6f1c0b26e2d4167fa Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Fri, 21 Aug 2020 16:37:57 +0200 Subject: [PATCH] ToUnicode map also in Type3 fonts --- PdfForQtLib/sources/pdffont.cpp | 30 ++++++++++--- PdfForQtLib/sources/pdffont.h | 75 ++++++++++++++++++--------------- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/PdfForQtLib/sources/pdffont.cpp b/PdfForQtLib/sources/pdffont.cpp index 979c6e6..8653da9 100644 --- a/PdfForQtLib/sources/pdffont.cpp +++ b/PdfForQtLib/sources/pdffont.cpp @@ -514,7 +514,8 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ reporter->reportRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("Glyph for simple font character code '%1' not found.").arg(static_cast(byteArray[i]))); if (glyphWidth > 0) { - textSequence.items.emplace_back(nullptr, QChar(), glyphWidth * m_pixelSize * FONT_WIDTH_MULTIPLIER); + const QPainterPath* nullpath = nullptr; + textSequence.items.emplace_back(nullpath, QChar(), glyphWidth * m_pixelSize * FONT_WIDTH_MULTIPLIER); } } } @@ -555,7 +556,8 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ { // We do not multiply advance with font size and FONT_WIDTH_MULTIPLIER, because in the code, // "advance" is treated as in font space. - textSequence.items.emplace_back(nullptr, QChar(), -glyphWidth); + const QPainterPath* nullpath = nullptr; + textSequence.items.emplace_back(nullpath, QChar(), -glyphWidth); } } } @@ -1406,8 +1408,21 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d } } + PDFFontCMap toUnicodeCMap; + const PDFObject& toUnicode = document->getObject(fontDictionary->get("ToUnicode")); + if (toUnicode.isName()) + { + toUnicodeCMap = PDFFontCMap::createFromName(toUnicode.getString()); + } + else if (toUnicode.isStream()) + { + const PDFStream* stream = toUnicode.getStream(); + QByteArray decodedStream = document->getDecodedStream(stream); + toUnicodeCMap = PDFFontCMap::createFromData(decodedStream); + } + std::vector widths = fontLoader.readNumberArrayFromDictionary(fontDictionary, "Widths"); - return PDFFontPointer(new PDFType3Font(qMove(fontDescriptor), firstChar, lastChar, fontMatrix, qMove(characterContentStreams), qMove(widths), document->getObject(fontDictionary->get("Resources")))); + return PDFFontPointer(new PDFType3Font(qMove(fontDescriptor), firstChar, lastChar, fontMatrix, qMove(characterContentStreams), qMove(widths), document->getObject(fontDictionary->get("Resources")), qMove(toUnicodeCMap))); } default: @@ -2146,14 +2161,16 @@ PDFType3Font::PDFType3Font(FontDescriptor fontDescriptor, QMatrix fontMatrix, std::map&& characterContentStreams, std::vector&& widths, - const PDFObject& resources) : + const PDFObject& resources, + PDFFontCMap toUnicode) : PDFFont(qMove(fontDescriptor)), m_firstCharacterIndex(firstCharacterIndex), m_lastCharacterIndex(lastCharacterIndex), m_fontMatrix(fontMatrix), m_characterContentStreams(qMove(characterContentStreams)), m_widths(qMove(widths)), - m_resources(resources) + m_resources(resources), + m_toUnicode(qMove(toUnicode)) { } @@ -2203,11 +2220,12 @@ void PDFRealizedType3FontImpl::fillTextSequence(const QByteArray& byteArray, Tex { int index = static_cast(byteArray[i]); const QByteArray* contentStream = parentFont->getContentStream(index); + QChar character = parentFont->getUnicode(index); const double width = parentFont->getWidth(index); if (contentStream) { - textSequence.items.emplace_back(contentStream, width); + textSequence.items.emplace_back(contentStream, character, width); } else { diff --git a/PdfForQtLib/sources/pdffont.h b/PdfForQtLib/sources/pdffont.h index adf2c92..6f4787d 100644 --- a/PdfForQtLib/sources/pdffont.h +++ b/PdfForQtLib/sources/pdffont.h @@ -61,7 +61,7 @@ struct TextSequenceItem inline explicit TextSequenceItem() = default; inline explicit TextSequenceItem(const QPainterPath* glyph, QChar character, PDFReal advance) : glyph(glyph), character(character), advance(advance) { } inline explicit TextSequenceItem(PDFReal advance) : character(), advance(advance) { } - inline explicit TextSequenceItem(const QByteArray* characterContentStream, double advance) : characterContentStream(characterContentStream), advance(advance) { } + inline explicit TextSequenceItem(const QByteArray* characterContentStream, QChar character, PDFReal advance) : characterContentStream(characterContentStream), character(character), advance(advance) { } inline bool isContentStream() const { return characterContentStream; } inline bool isCharacter() const { return glyph; } @@ -354,39 +354,6 @@ public: virtual FontType getFontType() const override; }; -class PDFType3Font : public PDFFont -{ -public: - explicit PDFType3Font(FontDescriptor fontDescriptor, - int firstCharacterIndex, - int lastCharacterIndex, - QMatrix fontMatrix, - std::map&& characterContentStreams, - std::vector&& widths, - const PDFObject& resources); - - virtual FontType getFontType() const override; - virtual void dumpFontToTreeItem(QTreeWidgetItem*item) const override; - - /// Returns width of the character. If character doesn't exist, then zero is returned. - double getWidth(int characterIndex) const; - - /// Return content stream for the character. If character doesn't exist, then nullptr - /// is returned. - const QByteArray* getContentStream(int characterIndex) const; - - const QMatrix& getFontMatrix() const { return m_fontMatrix; } - const PDFObject& getResources() const { return m_resources; } - -private: - int m_firstCharacterIndex; - int m_lastCharacterIndex; - QMatrix m_fontMatrix; - std::map m_characterContentStreams; - std::vector m_widths; - PDFObject m_resources; -}; - /// Font cache which caches both fonts, and realized fonts. Cache has individual limit /// for fonts, and realized fonts. class PDFFontCache @@ -542,6 +509,46 @@ private: bool m_vertical = false; }; +class PDFType3Font : public PDFFont +{ +public: + explicit PDFType3Font(FontDescriptor fontDescriptor, + int firstCharacterIndex, + int lastCharacterIndex, + QMatrix fontMatrix, + std::map&& characterContentStreams, + std::vector&& widths, + const PDFObject& resources, + PDFFontCMap toUnicode); + + virtual FontType getFontType() const override; + virtual void dumpFontToTreeItem(QTreeWidgetItem*item) const override; + + /// Returns width of the character. If character doesn't exist, then zero is returned. + double getWidth(int characterIndex) const; + + /// Return content stream for the character. If character doesn't exist, then nullptr + /// is returned. + const QByteArray* getContentStream(int characterIndex) const; + + const QMatrix& getFontMatrix() const { return m_fontMatrix; } + const PDFObject& getResources() const { return m_resources; } + const PDFFontCMap& getToUnicode() const { return m_toUnicode; } + + /// Returns unicode character for given character index. If unicode mapping is not + /// present, empty (null) character is returned. + QChar getUnicode(int characterIndex) const { return m_toUnicode.getToUnicode(characterIndex); } + +private: + int m_firstCharacterIndex; + int m_lastCharacterIndex; + QMatrix m_fontMatrix; + std::map m_characterContentStreams; + std::vector m_widths; + PDFObject m_resources; + PDFFontCMap m_toUnicode; +}; + /// Composite font (CID-keyed font) class PDFType0Font : public PDFFont {