mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-04-01 04:00:19 +02:00
Font info tool - added character maps
This commit is contained in:
parent
acee5f2186
commit
f43459b88e
@ -363,6 +363,9 @@ public:
|
||||
|
||||
/// Returns postscript name of the font
|
||||
virtual QString getPostScriptName() const { return QString(); }
|
||||
|
||||
/// Returns character info
|
||||
virtual CharacterInfos getCharacterInfos() const = 0;
|
||||
};
|
||||
|
||||
/// Implementation of the PDFRealizedFont class using PIMPL pattern for Type 3 fonts
|
||||
@ -374,6 +377,7 @@ public:
|
||||
|
||||
virtual void fillTextSequence(const QByteArray& byteArray, TextSequence& textSequence, PDFRenderErrorReporter* reporter) override;
|
||||
virtual bool isHorizontalWritingSystem() const override;
|
||||
virtual CharacterInfos getCharacterInfos() const override;
|
||||
|
||||
private:
|
||||
/// Pixel size of the font
|
||||
@ -394,6 +398,7 @@ public:
|
||||
virtual bool isHorizontalWritingSystem() const override { return !m_isVertical; }
|
||||
virtual void dumpFontToTreeItem(QTreeWidgetItem* item) const override;
|
||||
virtual QString getPostScriptName() const override { return m_postScriptName; }
|
||||
virtual CharacterInfos getCharacterInfos() const override;
|
||||
|
||||
static constexpr const PDFReal PIXEL_SIZE_MULTIPLIER = 100.0;
|
||||
|
||||
@ -581,6 +586,107 @@ void PDFRealizedFontImpl::fillTextSequence(const QByteArray& byteArray, TextSequ
|
||||
}
|
||||
}
|
||||
|
||||
CharacterInfos PDFRealizedFontImpl::getCharacterInfos() const
|
||||
{
|
||||
CharacterInfos result;
|
||||
|
||||
switch (m_parentFont->getFontType())
|
||||
{
|
||||
case FontType::Type1:
|
||||
case FontType::TrueType:
|
||||
case FontType::MMType1:
|
||||
{
|
||||
// We can use encoding
|
||||
Q_ASSERT(dynamic_cast<PDFSimpleFont*>(m_parentFont.get()));
|
||||
const PDFSimpleFont* font = static_cast<PDFSimpleFont*>(m_parentFont.get());
|
||||
const encoding::EncodingTable* encoding = font->getEncoding();
|
||||
const GlyphIndices* glyphIndices = font->getGlyphIndices();
|
||||
|
||||
for (size_t i = 0; i < encoding->size(); ++i)
|
||||
{
|
||||
QChar character = (*encoding)[i];
|
||||
GID glyphIndex = (*glyphIndices)[static_cast<uint8_t>(i)];
|
||||
|
||||
if (!glyphIndex)
|
||||
{
|
||||
// Try to obtain glyph index from unicode
|
||||
if (m_face->charmap && m_face->charmap->encoding == FT_ENCODING_UNICODE)
|
||||
{
|
||||
glyphIndex = FT_Get_Char_Index(m_face, character.unicode());
|
||||
}
|
||||
}
|
||||
|
||||
if (glyphIndex)
|
||||
{
|
||||
CharacterInfo info;
|
||||
info.gid = glyphIndex;
|
||||
info.character = character;
|
||||
result.emplace_back(qMove(info));
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case FontType::Type0:
|
||||
{
|
||||
Q_ASSERT(dynamic_cast<PDFType0Font*>(m_parentFont.get()));
|
||||
const PDFType0Font* font = static_cast<PDFType0Font*>(m_parentFont.get());
|
||||
|
||||
const PDFFontCMap* toUnicode = font->getToUnicode();
|
||||
const PDFCIDtoGIDMapper* CIDtoGIDmapper = font->getCIDtoGIDMapper();
|
||||
|
||||
FT_UInt index = 0;
|
||||
FT_ULong character = FT_Get_First_Char(m_face, &index);
|
||||
while (index != 0)
|
||||
{
|
||||
const GID gid = index;
|
||||
const CID cid = CIDtoGIDmapper->unmap(gid);
|
||||
|
||||
CharacterInfo info;
|
||||
info.gid = gid;
|
||||
info.character = toUnicode->getToUnicode(cid);
|
||||
result.emplace_back(qMove(info));
|
||||
|
||||
character = FT_Get_Next_Char(m_face, character, &index);
|
||||
}
|
||||
|
||||
if (result.empty())
|
||||
{
|
||||
// We will try all reasonable high CIDs
|
||||
for (CID cid = 0; cid < QChar::LastValidCodePoint; ++cid)
|
||||
{
|
||||
const GID gid = CIDtoGIDmapper->map(cid);
|
||||
|
||||
if (!gid)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!FT_Load_Glyph(m_face, gid, FT_LOAD_NO_BITMAP | FT_LOAD_NO_HINTING))
|
||||
{
|
||||
CharacterInfo info;
|
||||
info.gid = gid;
|
||||
info.character = toUnicode->getToUnicode(cid);
|
||||
result.emplace_back(qMove(info));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
// Unhandled font type
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void PDFRealizedFontImpl::dumpFontToTreeItem(QTreeWidgetItem* item) const
|
||||
{
|
||||
QTreeWidgetItem* root = new QTreeWidgetItem(item, { PDFTranslationContext::tr("Details") });
|
||||
@ -786,6 +892,11 @@ QString PDFRealizedFont::getPostScriptName() const
|
||||
return m_impl->getPostScriptName();
|
||||
}
|
||||
|
||||
CharacterInfos PDFRealizedFont::getCharacterInfos() const
|
||||
{
|
||||
return m_impl->getCharacterInfos();
|
||||
}
|
||||
|
||||
PDFRealizedFontPointer PDFRealizedFont::createRealizedFont(PDFFontPointer font, PDFReal pixelSize, PDFRenderErrorReporter* reporter)
|
||||
{
|
||||
PDFRealizedFontPointer result;
|
||||
@ -2261,4 +2372,22 @@ bool PDFRealizedType3FontImpl::isHorizontalWritingSystem() const
|
||||
return true;
|
||||
}
|
||||
|
||||
CharacterInfos PDFRealizedType3FontImpl::getCharacterInfos() const
|
||||
{
|
||||
CharacterInfos result;
|
||||
|
||||
Q_ASSERT(dynamic_cast<const PDFType3Font*>(m_parentFont.get()));
|
||||
const PDFType3Font* parentFont = static_cast<const PDFType3Font*>(m_parentFont.get());
|
||||
|
||||
for (const auto& contentStreamItem : parentFont->getContentStreams())
|
||||
{
|
||||
CharacterInfo info;
|
||||
info.gid = contentStreamItem.first;
|
||||
info.character = parentFont->getUnicode(contentStreamItem.first);
|
||||
result.emplace_back(qMove(info));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
@ -219,6 +219,13 @@ class IRealizedFontImpl;
|
||||
|
||||
using PDFRealizedFontPointer = QSharedPointer<PDFRealizedFont>;
|
||||
|
||||
struct CharacterInfo
|
||||
{
|
||||
GID gid = 0;
|
||||
QChar character;
|
||||
};
|
||||
using CharacterInfos = std::vector<CharacterInfo>;
|
||||
|
||||
/// Font, which has fixed pixel size. It is programmed as PIMPL, because we need
|
||||
/// to remove FreeType types from the interface (so we do not include FreeType in the interface).
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFRealizedFont
|
||||
@ -242,6 +249,9 @@ public:
|
||||
/// Returns postscript name of the font
|
||||
QString getPostScriptName() const;
|
||||
|
||||
/// Returns character info
|
||||
CharacterInfos getCharacterInfos() const;
|
||||
|
||||
/// Creates new realized font from the standard font. If font can't be created,
|
||||
/// then exception is thrown.
|
||||
static PDFRealizedFontPointer createRealizedFont(PDFFontPointer font, PDFReal pixelSize, PDFRenderErrorReporter* reporter);
|
||||
@ -441,6 +451,31 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Maps GID to CID (inverse mapping)
|
||||
CID unmap(GID gid) const
|
||||
{
|
||||
if (m_mapping.isEmpty())
|
||||
{
|
||||
// This means identity mapping
|
||||
return gid;
|
||||
}
|
||||
else
|
||||
{
|
||||
CID lastCid = CID(m_mapping.size() / 2);
|
||||
for (CID i = 0; i < lastCid; ++i)
|
||||
{
|
||||
if (map(i) == gid)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This should occur only in case of bad (damaged) PDF file - because in this case,
|
||||
// encoding is missing. Return invalid character index.
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
QByteArray m_mapping;
|
||||
};
|
||||
@ -542,6 +577,7 @@ public:
|
||||
|
||||
const QMatrix& getFontMatrix() const { return m_fontMatrix; }
|
||||
const PDFObject& getResources() const { return m_resources; }
|
||||
const std::map<int, QByteArray>& getContentStreams() const { return m_characterContentStreams; }
|
||||
|
||||
/// Returns unicode character for given character index. If unicode mapping is not
|
||||
/// present, empty (null) character is returned.
|
||||
|
@ -236,6 +236,11 @@ void PDFToolAbstractApplication::initializeCommandLineParser(QCommandLineParser*
|
||||
parser->addOption(QCommandLineOption("say-struct-exp-form", "Say expanded form extracted from structure tree (only for tagged pdf)."));
|
||||
parser->addOption(QCommandLineOption("say-struct-act-text", "Say actual text extracted from structure tree (only for tagged pdf)."));
|
||||
}
|
||||
|
||||
if (optionFlags.testFlag(CharacterMaps))
|
||||
{
|
||||
parser->addOption(QCommandLineOption("character-maps", "Show character maps for embedded fonts."));
|
||||
}
|
||||
}
|
||||
|
||||
PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser) const
|
||||
@ -404,6 +409,11 @@ PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser
|
||||
options.textSpeechSayStructActualText = parser->isSet("say-struct-act-text");
|
||||
}
|
||||
|
||||
if (optionFlags.testFlag(CharacterMaps))
|
||||
{
|
||||
options.showCharacterMapsForEmbeddedFonts = parser->isSet("character-maps");
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
|
@ -108,6 +108,9 @@ struct PDFToolOptions
|
||||
bool textSpeechSayStructActualText = false;
|
||||
QString textSpeechAudioFormat = "mp3";
|
||||
|
||||
// For option 'CharacterMaps'
|
||||
bool showCharacterMapsForEmbeddedFonts = false;
|
||||
|
||||
/// Returns page range. If page range is invalid, then \p errorMessage is empty.
|
||||
/// \param pageCount Page count
|
||||
/// \param[out] errorMessage Error message
|
||||
@ -157,6 +160,7 @@ public:
|
||||
TextShow = 0x0200, ///< Text extract and show options
|
||||
VoiceSelector = 0x0400, ///< Select voice from SAPI
|
||||
TextSpeech = 0x0800, ///< Text speech options
|
||||
CharacterMaps = 0x1000, ///< Character maps for embedded fonts
|
||||
};
|
||||
Q_DECLARE_FLAGS(Options, Option)
|
||||
|
||||
|
@ -50,6 +50,7 @@ QString PDFToolInfoFonts::getStandardString(StandardString standardString) const
|
||||
struct FontInfo
|
||||
{
|
||||
pdf::PDFClosedIntervalSet pages;
|
||||
QString fontFullName;
|
||||
QString fontName;
|
||||
QString fontTypeName;
|
||||
QString encoding;
|
||||
@ -58,6 +59,7 @@ struct FontInfo
|
||||
bool isToUnicodePresent = false;
|
||||
pdf::PDFObjectReference reference;
|
||||
QString substitutedFont;
|
||||
pdf::CharacterInfos characterInfos;
|
||||
};
|
||||
|
||||
int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
@ -127,6 +129,7 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
const pdf::FontType fontType = font->getFontType();
|
||||
const pdf::FontDescriptor* fontDescriptor = font->getFontDescriptor();
|
||||
QString fontName = fontDescriptor->fontName;
|
||||
QString fontFullName = fontName;
|
||||
int plusPos = fontName.lastIndexOf('+');
|
||||
|
||||
// Jakub Melka: Detect, if font is subset. Font subsets have special form,
|
||||
@ -190,6 +193,7 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
|
||||
FontInfo info;
|
||||
info.fontName = fontName;
|
||||
info.fontFullName = fontFullName;
|
||||
info.pages.addValue(pageIndex + 1);
|
||||
info.fontTypeName = fontTypeName;
|
||||
info.isEmbedded = fontDescriptor->isEmbedded() || fontType == pdf::FontType::Type3;
|
||||
@ -198,6 +202,11 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
info.reference = fontReference;
|
||||
info.substitutedFont = realizedFont->getPostScriptName();
|
||||
|
||||
if (options.showCharacterMapsForEmbeddedFonts && info.isEmbedded)
|
||||
{
|
||||
info.characterInfos = realizedFont->getCharacterInfos();
|
||||
}
|
||||
|
||||
const pdf::PDFSimpleFont* simpleFont = dynamic_cast<const pdf::PDFSimpleFont*>(font.data());
|
||||
if (simpleFont)
|
||||
{
|
||||
@ -292,6 +301,7 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
QString noText = PDFToolTranslationContext::tr("No");
|
||||
QString noRef = PDFToolTranslationContext::tr("--");
|
||||
|
||||
bool hasEmbedded = false;
|
||||
bool hasSubstitutions = false;
|
||||
int ref = 1;
|
||||
for (const FontInfo& info : directFonts)
|
||||
@ -319,6 +329,7 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
}
|
||||
|
||||
hasSubstitutions = hasSubstitutions || !info.isEmbedded;
|
||||
hasEmbedded = hasEmbedded || info.isEmbedded;
|
||||
|
||||
formatter.endTableRow();
|
||||
++ref;
|
||||
@ -326,10 +337,9 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
|
||||
formatter.endTable();
|
||||
|
||||
formatter.endl();
|
||||
|
||||
if (hasSubstitutions)
|
||||
{
|
||||
formatter.endl();
|
||||
formatter.beginTable("fonts-substitutions", PDFToolTranslationContext::tr("Substitutions"));
|
||||
|
||||
formatter.beginTableHeaderRow("header");
|
||||
@ -374,6 +384,53 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
formatter.endTable();
|
||||
}
|
||||
|
||||
if (options.showCharacterMapsForEmbeddedFonts && hasEmbedded)
|
||||
{
|
||||
formatter.endl();
|
||||
formatter.beginHeader("font-character-maps", PDFToolTranslationContext::tr("Font Character Maps"));
|
||||
|
||||
int fontRef = 1;
|
||||
for (const FontInfo& info : directFonts)
|
||||
{
|
||||
if (!info.isEmbedded)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
formatter.beginTable("font-character-map", PDFToolTranslationContext::tr("Character Map for Font '%1'").arg(info.fontFullName));
|
||||
|
||||
formatter.beginTableHeaderRow("header");
|
||||
formatter.writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No."), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("glyph-index", PDFToolTranslationContext::tr("Glyph Index"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("character", PDFToolTranslationContext::tr("Character"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("unicode", PDFToolTranslationContext::tr("Unicode"), Qt::AlignLeft);
|
||||
formatter.endTableHeaderRow();
|
||||
|
||||
int characterIndex = 1;
|
||||
for (const pdf::CharacterInfo& characterInfo : info.characterInfos)
|
||||
{
|
||||
formatter.beginTableRow("character", characterInfo.gid);
|
||||
|
||||
QString character = characterInfo.character.isNull() ? "??" : QString(1, characterInfo.character);
|
||||
QString unicode = QString("0x%1").arg(QString::number(characterInfo.character.unicode(), 16).toUpper().rightJustified(4, QChar('0')));
|
||||
|
||||
formatter.writeTableColumn("no", locale.toString(characterIndex++), Qt::AlignRight);
|
||||
formatter.writeTableColumn("glyph-index", locale.toString(characterInfo.gid), Qt::AlignRight);
|
||||
formatter.writeTableColumn("character", character);
|
||||
formatter.writeTableColumn("unicode", unicode);
|
||||
|
||||
formatter.endTableRow();
|
||||
}
|
||||
|
||||
formatter.endTable();
|
||||
++fontRef;
|
||||
|
||||
formatter.endl();
|
||||
}
|
||||
|
||||
formatter.endTable();
|
||||
}
|
||||
|
||||
formatter.endDocument();
|
||||
PDFConsole::writeText(formatter.getString(), options.outputCodec);
|
||||
|
||||
@ -382,7 +439,7 @@ int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
|
||||
PDFToolAbstractApplication::Options PDFToolInfoFonts::getOptionsFlags() const
|
||||
{
|
||||
return ConsoleFormat | OpenDocument | PageSelector;
|
||||
return ConsoleFormat | OpenDocument | PageSelector | CharacterMaps;
|
||||
}
|
||||
|
||||
} // namespace pdftool
|
||||
|
Loading…
x
Reference in New Issue
Block a user