diff --git a/Pdf4QtLibCore/sources/pdffont.cpp b/Pdf4QtLibCore/sources/pdffont.cpp index 959b279..01c2953 100644 --- a/Pdf4QtLibCore/sources/pdffont.cpp +++ b/Pdf4QtLibCore/sources/pdffont.cpp @@ -1290,6 +1290,47 @@ QByteArray PDFFont::getFontId() const return m_fontId; } +PDFEncodedText PDFFont::encodeText(const QString& text) const +{ + PDFEncodedText result; + result.isValid = true; + + const PDFFontCMap* cmap = getCMap(); + const PDFFontCMap* toUnicode = getToUnicode(); + + if (!cmap || !toUnicode) + { + result.errorString = PDFTranslationContext::tr("Invalid font encoding."); + return result; + } + + for (const QChar& character : text) + { + CID cid = toUnicode->getFromUnicode(character); + if (cid != CID()) + { + QByteArray encoded = cmap->encode(cid); + if (!encoded.isEmpty()) + { + result.encodedText.append(encoded); + result.errorString += "_"; + } + else + { + result.isValid = false; + result.errorString += character; + } + } + else + { + result.isValid = false; + result.errorString += character; + } + } + + return result; +} + PDFFontPointer PDFFont::createFont(const PDFObject& object, QByteArray fontId, const PDFDocument* document) { const PDFObject& dereferencedFontDictionary = document->getObject(object); @@ -1929,6 +1970,44 @@ PDFInteger PDFSimpleFont::getGlyphAdvance(size_t index) const return 0; } +PDFEncodedText PDFSimpleFont::encodeText(const QString& text) const +{ + PDFEncodedText result; + result.isValid = true; + + const encoding::EncodingTable* encodingTable = getEncoding(); + + for (const QChar& character : text) + { + ushort unicode = character.unicode(); + unsigned char converted = 0; + + bool isFound = false; + for (size_t i = 0; i < encodingTable->size(); ++i) + { + if (unicode == (*encodingTable)[static_cast(i)]) + { + isFound = true; + converted = static_cast(i); + break; + } + } + + if (isFound) + { + result.encodedText.append(static_cast(converted)); + result.errorString += "_"; + } + else + { + result.isValid = false; + result.errorString += character; + } + } + + return result; +} + void PDFSimpleFont::dumpFontToTreeItem(ITreeFactory* treeFactory) const { BaseClass::dumpFontToTreeItem(treeFactory); @@ -2496,6 +2575,35 @@ std::vector PDFFontCMap::interpret(const QByteArray& byteArray) const return result; } +QByteArray PDFFontCMap::encode(CID cid) const +{ + QByteArray byteArray; + + for (const auto& entry : m_entries) + { + unsigned int minPossibleValue = entry.from + entry.cid; + unsigned int maxPossibleValue = entry.to + entry.cid; + + if (cid >= minPossibleValue && cid <= maxPossibleValue) + { + // Calculate the original value from cid + unsigned int value = cid - entry.cid + entry.from; + + byteArray.reserve(entry.byteCount); + + // Construct byte array for this value based on the entry's byteCount + for (int i = entry.byteCount - 1; i >= 0; --i) + { + byteArray.append(static_cast((value >> (8 * i)) & 0xFF)); + } + + break; + } + } + + return byteArray; +} + QChar PDFFontCMap::getToUnicode(CID cid) const { if (isValid()) @@ -2512,6 +2620,29 @@ QChar PDFFontCMap::getToUnicode(CID cid) const return QChar(); } +CID PDFFontCMap::getFromUnicode(QChar character) const +{ + if (!character.isNull()) + { + char16_t ucs4 = character.unicode(); + const CID unicodeCID = ucs4; + + for (const Entry& entry : m_entries) + { + const CID minUnicodeCID = entry.cid; + const CID maxUnicodeCID = (entry.to - entry.from) + entry.cid; + + if (unicodeCID >= minUnicodeCID && unicodeCID <= maxUnicodeCID) + { + const CID cid = unicodeCID + entry.from - entry.cid; + return cid; + } + } + } + + return CID(); +} + PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) : m_entries(qMove(entries)), m_maxKeyLength(0), diff --git a/Pdf4QtLibCore/sources/pdffont.h b/Pdf4QtLibCore/sources/pdffont.h index e54a6ce..716ff81 100644 --- a/Pdf4QtLibCore/sources/pdffont.h +++ b/Pdf4QtLibCore/sources/pdffont.h @@ -291,6 +291,13 @@ private: IRealizedFontImpl* m_impl; }; +struct PDFEncodedText +{ + QByteArray encodedText; + QString errorString; + bool isValid = false; +}; + /// Base class representing font in the PDF file class PDF4QTLIBCORESHARED_EXPORT PDFFont { @@ -335,6 +342,9 @@ public: /// Returns font id from the font dictionary QByteArray getFontId() const; + /// Encodes text into font encoding + virtual PDFEncodedText encodeText(const QString& text) const; + protected: CIDSystemInfo m_CIDSystemInfo; FontDescriptor m_fontDescriptor; @@ -368,6 +378,8 @@ public: /// Returns the glyph advance (or zero, if glyph advance is invalid) PDFInteger getGlyphAdvance(size_t index) const; + virtual PDFEncodedText encodeText(const QString& text) const override; + virtual void dumpFontToTreeItem(ITreeFactory* treeFactory) const override; protected: @@ -556,9 +568,15 @@ public: /// Converts byte array to array of CIDs std::vector interpret(const QByteArray& byteArray) const; + /// Encodes character to byte array + QByteArray encode(CID cid) const; + /// Converts CID to QChar, use only on ToUnicode CMaps QChar getToUnicode(CID cid) const; + /// Converts QChar to CID, use only on ToUnicode CMaps + CID getFromUnicode(QChar character) const; + private: struct Entry diff --git a/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.cpp b/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.cpp index cf67110..4a37b53 100644 --- a/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.cpp +++ b/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.cpp @@ -16,6 +16,7 @@ // along with PDF4QT. If not, see . #include "pdfpagecontenteditorprocessor.h" +#include "pdfdocumentbuilder.h" #include #include @@ -890,6 +891,7 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co stream << "q BT" << Qt::endl; QXmlStreamReader reader(text); + m_textFont = m_currentState.getTextFont(); auto isCommand = [&reader](const char* tag) -> bool { @@ -1009,6 +1011,7 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co } else { + v1 = selectFont(v1); stream << "/" << v1 << " " << v2 << " Tf" << Qt::endl; } } @@ -1087,12 +1090,77 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co if (reader.isCharacters()) { QString characters = reader.text().toString(); + + if (m_textFont) + { + PDFEncodedText encodedText = m_textFont->encodeText(characters); + + if (!encodedText.encodedText.isEmpty()) + { + stream << "<" << encodedText.encodedText.toHex() << "> Tj" << Qt::endl; + } + + if (!encodedText.isValid) + { + addError(PDFTranslationContext::tr("Error during converting text to font encoding. Some characters were not converted: '%1'.").arg(encodedText.errorString)); + } + } + else + { + addError(PDFTranslationContext::tr("Text font not defined!")); + } } } stream << "ET Q" << Qt::endl; } +QByteArray PDFPageContentEditorContentStreamBuilder::selectFont(const QByteArray& font) +{ + m_textFont = nullptr; + + PDFObject fontObject = m_fontDictionary.get(font); + if (!fontObject.isNull()) + { + try + { + m_textFont = PDFFont::createFont(fontObject, font, m_document); + } + catch (const PDFException&) + { + addError(PDFTranslationContext::tr("Font '%1' is invalid.").arg(QString::fromLatin1(font))); + } + } + + if (!m_textFont) + { + QByteArray defaultFontKey = "PDF4QT_DefFnt"; + if (!m_fontDictionary.hasKey(defaultFontKey)) + { + PDFObjectFactory defaultFontFactory; + + defaultFontFactory.beginDictionary(); + defaultFontFactory.beginDictionaryItem("Type"); + defaultFontFactory << WrapName("Font"); + defaultFontFactory.endDictionaryItem(); + defaultFontFactory.beginDictionaryItem("Subtype"); + defaultFontFactory << WrapName("Type1"); + defaultFontFactory.endDictionaryItem(); + defaultFontFactory.beginDictionaryItem("BaseFont"); + defaultFontFactory << WrapName("Helvetica"); + defaultFontFactory.endDictionaryItem(); + defaultFontFactory.beginDictionaryItem("Encoding"); + defaultFontFactory << WrapName("WinAnsiEncoding"); + defaultFontFactory.endDictionaryItem(); + defaultFontFactory.endDictionary(); + + m_fontDictionary.setEntry(PDFInplaceOrMemoryString(defaultFontKey), defaultFontFactory.takeObject()); + } + + m_textFont = PDFFont::createFont(fontObject, font, m_document); + } +} + void PDFPageContentEditorContentStreamBuilder::addError(const QString& error) { diff --git a/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.h b/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.h index 8f361eb..f904241 100644 --- a/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.h +++ b/Pdf4QtLibCore/sources/pdfpagecontenteditorprocessor.h @@ -225,12 +225,15 @@ private: bool isFilling); void writeText(QTextStream& stream, const QString& text); + QByteArray selectFont(const QByteArray& font); void addError(const QString& error); + PDFDocument* m_document = nullptr; PDFDictionary m_fontDictionary; PDFDictionary m_xobjectDictionary; QByteArray m_outputContent; PDFPageContentProcessorState m_currentState; + PDFFontPointer m_textFont; }; class PDF4QTLIBCORESHARED_EXPORT PDFPageContentEditorProcessor : public PDFPageContentProcessor