mirror of https://github.com/JakubMelka/PDF4QT.git
Editor plugin: Encoding content - text encoding
This commit is contained in:
parent
eeadf328b2
commit
70b7c2464f
|
@ -1290,6 +1290,47 @@ QByteArray PDFFont::getFontId() const
|
|||
return m_fontId;
|
||||
}
|
||||
|
||||
PDFEncodedText PDFFont::encodeText(const QString& text) const
|
||||
{
|
||||
PDFEncodedText result;
|
||||
result.isValid = true;
|
||||
|
||||
const PDFFontCMap* cmap = getCMap();
|
||||
const PDFFontCMap* toUnicode = getToUnicode();
|
||||
|
||||
if (!cmap || !toUnicode)
|
||||
{
|
||||
result.errorString = PDFTranslationContext::tr("Invalid font encoding.");
|
||||
return result;
|
||||
}
|
||||
|
||||
for (const QChar& character : text)
|
||||
{
|
||||
CID cid = toUnicode->getFromUnicode(character);
|
||||
if (cid != CID())
|
||||
{
|
||||
QByteArray encoded = cmap->encode(cid);
|
||||
if (!encoded.isEmpty())
|
||||
{
|
||||
result.encodedText.append(encoded);
|
||||
result.errorString += "_";
|
||||
}
|
||||
else
|
||||
{
|
||||
result.isValid = false;
|
||||
result.errorString += character;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.isValid = false;
|
||||
result.errorString += character;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFFontPointer PDFFont::createFont(const PDFObject& object, QByteArray fontId, const PDFDocument* document)
|
||||
{
|
||||
const PDFObject& dereferencedFontDictionary = document->getObject(object);
|
||||
|
@ -1929,6 +1970,44 @@ PDFInteger PDFSimpleFont::getGlyphAdvance(size_t index) const
|
|||
return 0;
|
||||
}
|
||||
|
||||
PDFEncodedText PDFSimpleFont::encodeText(const QString& text) const
|
||||
{
|
||||
PDFEncodedText result;
|
||||
result.isValid = true;
|
||||
|
||||
const encoding::EncodingTable* encodingTable = getEncoding();
|
||||
|
||||
for (const QChar& character : text)
|
||||
{
|
||||
ushort unicode = character.unicode();
|
||||
unsigned char converted = 0;
|
||||
|
||||
bool isFound = false;
|
||||
for (size_t i = 0; i < encodingTable->size(); ++i)
|
||||
{
|
||||
if (unicode == (*encodingTable)[static_cast<unsigned char>(i)])
|
||||
{
|
||||
isFound = true;
|
||||
converted = static_cast<unsigned char>(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isFound)
|
||||
{
|
||||
result.encodedText.append(static_cast<char>(converted));
|
||||
result.errorString += "_";
|
||||
}
|
||||
else
|
||||
{
|
||||
result.isValid = false;
|
||||
result.errorString += character;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void PDFSimpleFont::dumpFontToTreeItem(ITreeFactory* treeFactory) const
|
||||
{
|
||||
BaseClass::dumpFontToTreeItem(treeFactory);
|
||||
|
@ -2496,6 +2575,35 @@ std::vector<CID> PDFFontCMap::interpret(const QByteArray& byteArray) const
|
|||
return result;
|
||||
}
|
||||
|
||||
QByteArray PDFFontCMap::encode(CID cid) const
|
||||
{
|
||||
QByteArray byteArray;
|
||||
|
||||
for (const auto& entry : m_entries)
|
||||
{
|
||||
unsigned int minPossibleValue = entry.from + entry.cid;
|
||||
unsigned int maxPossibleValue = entry.to + entry.cid;
|
||||
|
||||
if (cid >= minPossibleValue && cid <= maxPossibleValue)
|
||||
{
|
||||
// Calculate the original value from cid
|
||||
unsigned int value = cid - entry.cid + entry.from;
|
||||
|
||||
byteArray.reserve(entry.byteCount);
|
||||
|
||||
// Construct byte array for this value based on the entry's byteCount
|
||||
for (int i = entry.byteCount - 1; i >= 0; --i)
|
||||
{
|
||||
byteArray.append(static_cast<char>((value >> (8 * i)) & 0xFF));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
QChar PDFFontCMap::getToUnicode(CID cid) const
|
||||
{
|
||||
if (isValid())
|
||||
|
@ -2512,6 +2620,29 @@ QChar PDFFontCMap::getToUnicode(CID cid) const
|
|||
return QChar();
|
||||
}
|
||||
|
||||
CID PDFFontCMap::getFromUnicode(QChar character) const
|
||||
{
|
||||
if (!character.isNull())
|
||||
{
|
||||
char16_t ucs4 = character.unicode();
|
||||
const CID unicodeCID = ucs4;
|
||||
|
||||
for (const Entry& entry : m_entries)
|
||||
{
|
||||
const CID minUnicodeCID = entry.cid;
|
||||
const CID maxUnicodeCID = (entry.to - entry.from) + entry.cid;
|
||||
|
||||
if (unicodeCID >= minUnicodeCID && unicodeCID <= maxUnicodeCID)
|
||||
{
|
||||
const CID cid = unicodeCID + entry.from - entry.cid;
|
||||
return cid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CID();
|
||||
}
|
||||
|
||||
PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) :
|
||||
m_entries(qMove(entries)),
|
||||
m_maxKeyLength(0),
|
||||
|
|
|
@ -291,6 +291,13 @@ private:
|
|||
IRealizedFontImpl* m_impl;
|
||||
};
|
||||
|
||||
struct PDFEncodedText
|
||||
{
|
||||
QByteArray encodedText;
|
||||
QString errorString;
|
||||
bool isValid = false;
|
||||
};
|
||||
|
||||
/// Base class representing font in the PDF file
|
||||
class PDF4QTLIBCORESHARED_EXPORT PDFFont
|
||||
{
|
||||
|
@ -335,6 +342,9 @@ public:
|
|||
/// Returns font id from the font dictionary
|
||||
QByteArray getFontId() const;
|
||||
|
||||
/// Encodes text into font encoding
|
||||
virtual PDFEncodedText encodeText(const QString& text) const;
|
||||
|
||||
protected:
|
||||
CIDSystemInfo m_CIDSystemInfo;
|
||||
FontDescriptor m_fontDescriptor;
|
||||
|
@ -368,6 +378,8 @@ public:
|
|||
/// Returns the glyph advance (or zero, if glyph advance is invalid)
|
||||
PDFInteger getGlyphAdvance(size_t index) const;
|
||||
|
||||
virtual PDFEncodedText encodeText(const QString& text) const override;
|
||||
|
||||
virtual void dumpFontToTreeItem(ITreeFactory* treeFactory) const override;
|
||||
|
||||
protected:
|
||||
|
@ -556,9 +568,15 @@ public:
|
|||
/// Converts byte array to array of CIDs
|
||||
std::vector<CID> interpret(const QByteArray& byteArray) const;
|
||||
|
||||
/// Encodes character to byte array
|
||||
QByteArray encode(CID cid) const;
|
||||
|
||||
/// Converts CID to QChar, use only on ToUnicode CMaps
|
||||
QChar getToUnicode(CID cid) const;
|
||||
|
||||
/// Converts QChar to CID, use only on ToUnicode CMaps
|
||||
CID getFromUnicode(QChar character) const;
|
||||
|
||||
private:
|
||||
|
||||
struct Entry
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfpagecontenteditorprocessor.h"
|
||||
#include "pdfdocumentbuilder.h"
|
||||
|
||||
#include <QStringBuilder>
|
||||
#include <QXmlStreamReader>
|
||||
|
@ -890,6 +891,7 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co
|
|||
stream << "q BT" << Qt::endl;
|
||||
|
||||
QXmlStreamReader reader(text);
|
||||
m_textFont = m_currentState.getTextFont();
|
||||
|
||||
auto isCommand = [&reader](const char* tag) -> bool
|
||||
{
|
||||
|
@ -1009,6 +1011,7 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co
|
|||
}
|
||||
else
|
||||
{
|
||||
v1 = selectFont(v1);
|
||||
stream << "/" << v1 << " " << v2 << " Tf" << Qt::endl;
|
||||
}
|
||||
}
|
||||
|
@ -1087,12 +1090,77 @@ void PDFPageContentEditorContentStreamBuilder::writeText(QTextStream& stream, co
|
|||
if (reader.isCharacters())
|
||||
{
|
||||
QString characters = reader.text().toString();
|
||||
|
||||
if (m_textFont)
|
||||
{
|
||||
PDFEncodedText encodedText = m_textFont->encodeText(characters);
|
||||
|
||||
if (!encodedText.encodedText.isEmpty())
|
||||
{
|
||||
stream << "<" << encodedText.encodedText.toHex() << "> Tj" << Qt::endl;
|
||||
}
|
||||
|
||||
if (!encodedText.isValid)
|
||||
{
|
||||
addError(PDFTranslationContext::tr("Error during converting text to font encoding. Some characters were not converted: '%1'.").arg(encodedText.errorString));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addError(PDFTranslationContext::tr("Text font not defined!"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream << "ET Q" << Qt::endl;
|
||||
}
|
||||
|
||||
QByteArray PDFPageContentEditorContentStreamBuilder::selectFont(const QByteArray& font)
|
||||
{
|
||||
m_textFont = nullptr;
|
||||
|
||||
PDFObject fontObject = m_fontDictionary.get(font);
|
||||
if (!fontObject.isNull())
|
||||
{
|
||||
try
|
||||
{
|
||||
m_textFont = PDFFont::createFont(fontObject, font, m_document);
|
||||
}
|
||||
catch (const PDFException&)
|
||||
{
|
||||
addError(PDFTranslationContext::tr("Font '%1' is invalid.").arg(QString::fromLatin1(font)));
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_textFont)
|
||||
{
|
||||
QByteArray defaultFontKey = "PDF4QT_DefFnt";
|
||||
if (!m_fontDictionary.hasKey(defaultFontKey))
|
||||
{
|
||||
PDFObjectFactory defaultFontFactory;
|
||||
|
||||
defaultFontFactory.beginDictionary();
|
||||
defaultFontFactory.beginDictionaryItem("Type");
|
||||
defaultFontFactory << WrapName("Font");
|
||||
defaultFontFactory.endDictionaryItem();
|
||||
defaultFontFactory.beginDictionaryItem("Subtype");
|
||||
defaultFontFactory << WrapName("Type1");
|
||||
defaultFontFactory.endDictionaryItem();
|
||||
defaultFontFactory.beginDictionaryItem("BaseFont");
|
||||
defaultFontFactory << WrapName("Helvetica");
|
||||
defaultFontFactory.endDictionaryItem();
|
||||
defaultFontFactory.beginDictionaryItem("Encoding");
|
||||
defaultFontFactory << WrapName("WinAnsiEncoding");
|
||||
defaultFontFactory.endDictionaryItem();
|
||||
defaultFontFactory.endDictionary();
|
||||
|
||||
m_fontDictionary.setEntry(PDFInplaceOrMemoryString(defaultFontKey), defaultFontFactory.takeObject());
|
||||
}
|
||||
|
||||
m_textFont = PDFFont::createFont(fontObject, font, m_document);
|
||||
}
|
||||
}
|
||||
|
||||
void PDFPageContentEditorContentStreamBuilder::addError(const QString& error)
|
||||
{
|
||||
|
||||
|
|
|
@ -225,12 +225,15 @@ private:
|
|||
bool isFilling);
|
||||
void writeText(QTextStream& stream, const QString& text);
|
||||
|
||||
QByteArray selectFont(const QByteArray& font);
|
||||
void addError(const QString& error);
|
||||
|
||||
PDFDocument* m_document = nullptr;
|
||||
PDFDictionary m_fontDictionary;
|
||||
PDFDictionary m_xobjectDictionary;
|
||||
QByteArray m_outputContent;
|
||||
PDFPageContentProcessorState m_currentState;
|
||||
PDFFontPointer m_textFont;
|
||||
};
|
||||
|
||||
class PDF4QTLIBCORESHARED_EXPORT PDFPageContentEditorProcessor : public PDFPageContentProcessor
|
||||
|
|
Loading…
Reference in New Issue