mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-03-15 19:00:24 +01:00
To Unicode mapping
This commit is contained in:
parent
8667cbbf90
commit
3ad7485dbf
@ -461,6 +461,7 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ
|
|||||||
const PDFType0Font* font = static_cast<PDFType0Font*>(m_parentFont.get());
|
const PDFType0Font* font = static_cast<PDFType0Font*>(m_parentFont.get());
|
||||||
|
|
||||||
const PDFFontCMap* cmap = font->getCMap();
|
const PDFFontCMap* cmap = font->getCMap();
|
||||||
|
const PDFFontCMap* toUnicode = font->getToUnicode();
|
||||||
const PDFCIDtoGIDMapper* CIDtoGIDmapper = font->getCIDtoGIDMapper();
|
const PDFCIDtoGIDMapper* CIDtoGIDmapper = font->getCIDtoGIDMapper();
|
||||||
|
|
||||||
std::vector<CID> cids = cmap->interpret(byteArray);
|
std::vector<CID> cids = cmap->interpret(byteArray);
|
||||||
@ -472,9 +473,9 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ
|
|||||||
|
|
||||||
if (glyphIndex)
|
if (glyphIndex)
|
||||||
{
|
{
|
||||||
// TODO: Dodelat mapovani na unicode
|
QChar character = toUnicode->getToUnicode(cid);
|
||||||
const Glyph& glyph = getGlyph(glyphIndex);
|
const Glyph& glyph = getGlyph(glyphIndex);
|
||||||
textSequence.items.emplace_back(&glyph.glyph, QChar(), glyph.advance);
|
textSequence.items.emplace_back(&glyph.glyph, character, glyph.advance);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -717,7 +718,7 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d
|
|||||||
const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary();
|
const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary();
|
||||||
PDFDocumentDataLoaderDecorator fontLoader(document);
|
PDFDocumentDataLoaderDecorator fontLoader(document);
|
||||||
|
|
||||||
// TODO: Fonts - implement all types of the font
|
// TODO: Fonts - Implement Type 3 font
|
||||||
// First, determine the font subtype
|
// First, determine the font subtype
|
||||||
constexpr const std::array<std::pair<const char*, FontType>, 3> fontTypes = {
|
constexpr const std::array<std::pair<const char*, FontType>, 3> fontTypes = {
|
||||||
std::pair<const char*, FontType>{ "Type0", FontType::Type0 },
|
std::pair<const char*, FontType>{ "Type0", FontType::Type0 },
|
||||||
@ -1111,7 +1112,20 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return PDFFontPointer(new PDFType0Font(qMove(fontDescriptor), qMove(cmap), qMove(cidToGidMapper), defaultWidth, qMove(advances)));
|
PDFFontCMap toUnicodeCMap;
|
||||||
|
const PDFObject& toUnicode = document->getObject(fontDictionary->get("ToUnicode"));
|
||||||
|
if (toUnicode.isName())
|
||||||
|
{
|
||||||
|
toUnicodeCMap = PDFFontCMap::createFromName(toUnicode.getString());
|
||||||
|
}
|
||||||
|
else if (toUnicode.isStream())
|
||||||
|
{
|
||||||
|
const PDFStream* stream = toUnicode.getStream();
|
||||||
|
QByteArray decodedStream = document->getDecodedStream(stream);
|
||||||
|
toUnicodeCMap = PDFFontCMap::createFromData(decodedStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
return PDFFontPointer(new PDFType0Font(qMove(fontDescriptor), qMove(cmap), qMove(toUnicodeCMap), qMove(cidToGidMapper), defaultWidth, qMove(advances)));
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -1136,9 +1150,6 @@ PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* d
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read To Unicode
|
|
||||||
// TODO: Read To Unicode
|
|
||||||
|
|
||||||
return PDFFontPointer();
|
return PDFFontPointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1360,6 +1371,25 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data)
|
|||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto fetchUnicode = [&parser](const PDFLexicalAnalyzer::Token& currentToken) -> CID
|
||||||
|
{
|
||||||
|
if (currentToken.type == PDFLexicalAnalyzer::TokenType::String)
|
||||||
|
{
|
||||||
|
QByteArray byteArray = currentToken.data.toByteArray();
|
||||||
|
|
||||||
|
if (byteArray.size() == 2)
|
||||||
|
{
|
||||||
|
CID unicodeValue = 0;
|
||||||
|
for (int i = 0; i < byteArray.size(); ++i)
|
||||||
|
{
|
||||||
|
unicodeValue = (unicodeValue << 8) + static_cast<unsigned char>(byteArray[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
if (token.type == PDFLexicalAnalyzer::TokenType::Command)
|
if (token.type == PDFLexicalAnalyzer::TokenType::Command)
|
||||||
{
|
{
|
||||||
QByteArray command = token.data.toByteArray();
|
QByteArray command = token.data.toByteArray();
|
||||||
@ -1374,6 +1404,25 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data)
|
|||||||
throw PDFParserException(PDFTranslationContext::tr("Can't use cmap inside cmap file."));
|
throw PDFParserException(PDFTranslationContext::tr("Can't use cmap inside cmap file."));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (command == "beginbfrange")
|
||||||
|
{
|
||||||
|
PDFLexicalAnalyzer::Token token1 = parser.fetch();
|
||||||
|
|
||||||
|
if (token1.type == PDFLexicalAnalyzer::TokenType::Command &&
|
||||||
|
token1.data.toByteArray() == "endbfrange")
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFLexicalAnalyzer::Token token2 = parser.fetch();
|
||||||
|
PDFLexicalAnalyzer::Token token3 = parser.fetch();
|
||||||
|
|
||||||
|
std::pair<unsigned int, unsigned int> from = fetchCode(token1);
|
||||||
|
std::pair<unsigned int, unsigned int> to = fetchCode(token2);
|
||||||
|
CID cid = fetchUnicode(token3);
|
||||||
|
|
||||||
|
entries.emplace_back(from.first, to.first, qMax(from.second, to.second), cid);
|
||||||
|
}
|
||||||
else if (command == "begincidrange")
|
else if (command == "begincidrange")
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
@ -1413,6 +1462,26 @@ PDFFontCMap PDFFontCMap::createFromData(const QByteArray& data)
|
|||||||
std::pair<unsigned int, unsigned int> code = fetchCode(token1);
|
std::pair<unsigned int, unsigned int> code = fetchCode(token1);
|
||||||
CID cid = fetchCID(token2);
|
CID cid = fetchCID(token2);
|
||||||
|
|
||||||
|
entries.emplace_back(code.first, code.first, code.second, cid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (command == "beginbfchar")
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
PDFLexicalAnalyzer::Token token1 = parser.fetch();
|
||||||
|
|
||||||
|
if (token1.type == PDFLexicalAnalyzer::TokenType::Command &&
|
||||||
|
token1.data.toByteArray() == "endbfchar")
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFLexicalAnalyzer::Token token2 = parser.fetch();
|
||||||
|
|
||||||
|
std::pair<unsigned int, unsigned int> code = fetchCode(token1);
|
||||||
|
CID cid = fetchUnicode(token2);
|
||||||
|
|
||||||
entries.emplace_back(code.first, code.first, code.second, cid);
|
entries.emplace_back(code.first, code.first, code.second, cid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1516,6 +1585,22 @@ std::vector<CID> PDFFontCMap::interpret(const QByteArray& byteArray) const
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QChar PDFFontCMap::getToUnicode(CID cid) const
|
||||||
|
{
|
||||||
|
if (isValid())
|
||||||
|
{
|
||||||
|
auto it = std::find_if(m_entries.cbegin(), m_entries.cend(), [cid](const Entry& entry) { return entry.from <= cid && entry.to >= cid; });
|
||||||
|
if (it != m_entries.cend())
|
||||||
|
{
|
||||||
|
const Entry& entry = *it;
|
||||||
|
const CID unicodeCID = cid - entry.from + entry.cid;
|
||||||
|
return QChar(unicodeCID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QChar();
|
||||||
|
}
|
||||||
|
|
||||||
PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) :
|
PDFFontCMap::PDFFontCMap(Entries&& entries, bool vertical) :
|
||||||
m_entries(qMove(entries)),
|
m_entries(qMove(entries)),
|
||||||
m_maxKeyLength(0),
|
m_maxKeyLength(0),
|
||||||
|
@ -419,6 +419,9 @@ public:
|
|||||||
/// Converts byte array to array of CIDs
|
/// Converts byte array to array of CIDs
|
||||||
std::vector<CID> interpret(const QByteArray& byteArray) const;
|
std::vector<CID> interpret(const QByteArray& byteArray) const;
|
||||||
|
|
||||||
|
/// Converts CID to QChar, use only on ToUnicode CMaps
|
||||||
|
QChar getToUnicode(CID cid) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
struct Entry
|
struct Entry
|
||||||
@ -468,9 +471,10 @@ private:
|
|||||||
class PDFType0Font : public PDFFont
|
class PDFType0Font : public PDFFont
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit inline PDFType0Font(FontDescriptor fontDescriptor, PDFFontCMap cmap, PDFCIDtoGIDMapper mapper, PDFReal defaultAdvance, std::unordered_map<CID, PDFReal> advances) :
|
explicit inline PDFType0Font(FontDescriptor fontDescriptor, PDFFontCMap cmap, PDFFontCMap toUnicode, PDFCIDtoGIDMapper mapper, PDFReal defaultAdvance, std::unordered_map<CID, PDFReal> advances) :
|
||||||
PDFFont(qMove(fontDescriptor)),
|
PDFFont(qMove(fontDescriptor)),
|
||||||
m_cmap(qMove(cmap)),
|
m_cmap(qMove(cmap)),
|
||||||
|
m_toUnicode(qMove(toUnicode)),
|
||||||
m_mapper(qMove(mapper)),
|
m_mapper(qMove(mapper)),
|
||||||
m_defaultAdvance(defaultAdvance),
|
m_defaultAdvance(defaultAdvance),
|
||||||
m_advances(qMove(advances))
|
m_advances(qMove(advances))
|
||||||
@ -483,6 +487,7 @@ public:
|
|||||||
virtual FontType getFontType() const override { return FontType::Type0; }
|
virtual FontType getFontType() const override { return FontType::Type0; }
|
||||||
|
|
||||||
const PDFFontCMap* getCMap() const { return &m_cmap; }
|
const PDFFontCMap* getCMap() const { return &m_cmap; }
|
||||||
|
const PDFFontCMap* getToUnicode() const { return &m_toUnicode; }
|
||||||
const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; }
|
const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; }
|
||||||
|
|
||||||
/// Returns the glyph advance, if it can be obtained, or zero, if it cannot
|
/// Returns the glyph advance, if it can be obtained, or zero, if it cannot
|
||||||
@ -492,6 +497,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
PDFFontCMap m_cmap;
|
PDFFontCMap m_cmap;
|
||||||
|
PDFFontCMap m_toUnicode;
|
||||||
PDFCIDtoGIDMapper m_mapper;
|
PDFCIDtoGIDMapper m_mapper;
|
||||||
PDFReal m_defaultAdvance;
|
PDFReal m_defaultAdvance;
|
||||||
std::unordered_map<CID, PDFReal> m_advances;
|
std::unordered_map<CID, PDFReal> m_advances;
|
||||||
|
@ -1753,7 +1753,6 @@ void PDFPageContentProcessor::operatorTextSetSpacingAndShowText(PDFReal t_w, PDF
|
|||||||
|
|
||||||
void PDFPageContentProcessor::drawText(const TextSequence& textSequence)
|
void PDFPageContentProcessor::drawText(const TextSequence& textSequence)
|
||||||
{
|
{
|
||||||
// TODO: Kdyz nejsme v text rezimu, tak nekreslime text
|
|
||||||
if (textSequence.items.empty())
|
if (textSequence.items.empty())
|
||||||
{
|
{
|
||||||
// Do not display empty text
|
// Do not display empty text
|
||||||
|
Loading…
x
Reference in New Issue
Block a user