mirror of https://github.com/JakubMelka/PDF4QT.git
Fontinfo tool (first steps)
This commit is contained in:
parent
b6d216aea2
commit
6142b263a0
|
@ -25,18 +25,19 @@
|
|||
#include <QFont>
|
||||
#include <QMatrix>
|
||||
#include <QSharedPointer>
|
||||
#include <QTreeWidgetItem>
|
||||
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
class QPainterPath;
|
||||
class QTreeWidgetItem;
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDocument;
|
||||
class PDFModifiedDocument;
|
||||
class PDFRenderErrorReporter;
|
||||
class PDFFontCMap;
|
||||
|
||||
using CID = unsigned int;
|
||||
using GID = unsigned int;
|
||||
|
@ -259,6 +260,9 @@ public:
|
|||
/// Returns the font type
|
||||
virtual FontType getFontType() const = 0;
|
||||
|
||||
/// Returns ToUnicode mapping (or nullptr, if font has no mapping to unicode)
|
||||
virtual const PDFFontCMap* getToUnicode() const { return nullptr; }
|
||||
|
||||
/// Returns font descriptor
|
||||
const FontDescriptor* getFontDescriptor() const { return &m_fontDescriptor; }
|
||||
|
||||
|
@ -298,6 +302,7 @@ public:
|
|||
GlyphIndices glyphIndices);
|
||||
virtual ~PDFSimpleFont() override = default;
|
||||
|
||||
const PDFEncoding::Encoding getEncodingType() const { return m_encodingType; }
|
||||
const encoding::EncodingTable* getEncoding() const { return &m_encoding; }
|
||||
const GlyphIndices* getGlyphIndices() const { return &m_glyphIndices; }
|
||||
|
||||
|
@ -523,6 +528,7 @@ public:
|
|||
|
||||
virtual FontType getFontType() const override;
|
||||
virtual void dumpFontToTreeItem(QTreeWidgetItem*item) const override;
|
||||
virtual const PDFFontCMap* getToUnicode() const override { return &m_toUnicode; }
|
||||
|
||||
/// Returns width of the character. If character doesn't exist, then zero is returned.
|
||||
double getWidth(int characterIndex) const;
|
||||
|
@ -533,7 +539,6 @@ public:
|
|||
|
||||
const QMatrix& getFontMatrix() const { return m_fontMatrix; }
|
||||
const PDFObject& getResources() const { return m_resources; }
|
||||
const PDFFontCMap& getToUnicode() const { return m_toUnicode; }
|
||||
|
||||
/// Returns unicode character for given character index. If unicode mapping is not
|
||||
/// present, empty (null) character is returned.
|
||||
|
@ -567,9 +572,9 @@ public:
|
|||
virtual ~PDFType0Font() = default;
|
||||
|
||||
virtual FontType getFontType() const override { return FontType::Type0; }
|
||||
virtual const PDFFontCMap* getToUnicode() const override { return &m_toUnicode; }
|
||||
|
||||
const PDFFontCMap* getCMap() const { return &m_cmap; }
|
||||
const PDFFontCMap* getToUnicode() const { return &m_toUnicode; }
|
||||
const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; }
|
||||
|
||||
/// Returns the glyph advance, if it can be obtained, or zero, if it cannot
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <QApplication>
|
||||
#include <QByteArray>
|
||||
#include <QClipboard>
|
||||
#include <QStyleOption>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
|
|
@ -302,14 +302,31 @@ PDFInteger PDFClosedIntervalSet::getTotalLength() const
|
|||
return std::accumulate(m_intervals.cbegin(), m_intervals.cend(), 0, [](PDFInteger count, const auto& b) { return count + b.second - b.first + 1; });
|
||||
}
|
||||
|
||||
QString PDFClosedIntervalSet::toText() const
|
||||
QString PDFClosedIntervalSet::toText(bool withoutBrackets) const
|
||||
{
|
||||
QStringList intervals;
|
||||
|
||||
if (withoutBrackets)
|
||||
{
|
||||
for (const ClosedInterval& interval : m_intervals)
|
||||
{
|
||||
if (interval.first == interval.second)
|
||||
{
|
||||
intervals << QString::number(interval.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
intervals << QString("%1-%2").arg(interval.first).arg(interval.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const ClosedInterval& interval : m_intervals)
|
||||
{
|
||||
intervals << QString("[%1 - %2]").arg(interval.first).arg(interval.second);
|
||||
}
|
||||
}
|
||||
|
||||
return intervals.join(", ");
|
||||
}
|
||||
|
|
|
@ -615,7 +615,7 @@ public:
|
|||
PDFInteger getTotalLength() const;
|
||||
|
||||
/// Transforms interval set to readable text
|
||||
QString toText() const;
|
||||
QString toText(bool withoutBrackets) const;
|
||||
|
||||
/// Returns all integers from the range
|
||||
std::vector<PDFInteger> unfold() const;
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <QCursor>
|
||||
|
||||
class QCheckBox;
|
||||
class QLineEdit;
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
|
|
@ -46,6 +46,7 @@ SOURCES += \
|
|||
pdftoolaudiobook.cpp \
|
||||
pdftoolfetchtext.cpp \
|
||||
pdftoolinfo.cpp \
|
||||
pdftoolinfofonts.cpp \
|
||||
pdftoolinfojavascript.cpp \
|
||||
pdftoolinfometadata.cpp \
|
||||
pdftoolinfonameddestinations.cpp \
|
||||
|
@ -70,6 +71,7 @@ HEADERS += \
|
|||
pdftoolaudiobook.h \
|
||||
pdftoolfetchtext.h \
|
||||
pdftoolinfo.h \
|
||||
pdftoolinfofonts.h \
|
||||
pdftoolinfojavascript.h \
|
||||
pdftoolinfometadata.h \
|
||||
pdftoolinfonameddestinations.h \
|
||||
|
|
|
@ -0,0 +1,335 @@
|
|||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdftoolinfofonts.h"
|
||||
#include "pdfexecutionpolicy.h"
|
||||
#include "pdfdocument.h"
|
||||
#include "pdffont.h"
|
||||
#include "pdfutils.h"
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
static PDFToolInfoFonts s_infoFontsApplication;
|
||||
|
||||
QString PDFToolInfoFonts::getStandardString(StandardString standardString) const
|
||||
{
|
||||
switch (standardString)
|
||||
{
|
||||
case Command:
|
||||
return "info-fonts";
|
||||
|
||||
case Name:
|
||||
return PDFToolTranslationContext::tr("Info about used fonts");
|
||||
|
||||
case Description:
|
||||
return PDFToolTranslationContext::tr("Retrieve informations about font usage in a document.");
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return QString();
|
||||
}
|
||||
|
||||
struct FontInfo
|
||||
{
|
||||
pdf::PDFClosedIntervalSet pages;
|
||||
QString fontName;
|
||||
QString fontTypeName;
|
||||
QString encoding;
|
||||
bool isEmbedded = false;
|
||||
bool isSubset = false;
|
||||
bool isToUnicodePresent = false;
|
||||
pdf::PDFObjectReference reference;
|
||||
};
|
||||
|
||||
int PDFToolInfoFonts::execute(const PDFToolOptions& options)
|
||||
{
|
||||
pdf::PDFDocument document;
|
||||
QByteArray sourceData;
|
||||
if (!readDocument(options, document, &sourceData))
|
||||
{
|
||||
return ErrorDocumentReading;
|
||||
}
|
||||
|
||||
QString parseError;
|
||||
std::vector<pdf::PDFInteger> pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
||||
|
||||
if (!parseError.isEmpty())
|
||||
{
|
||||
PDFConsole::writeError(parseError, options.outputCodec);
|
||||
return ErrorInvalidArguments;
|
||||
}
|
||||
|
||||
QMutex mutex;
|
||||
std::map<pdf::PDFObjectReference, FontInfo> fontInfoMap;
|
||||
std::vector<FontInfo> directFonts;
|
||||
std::set<pdf::PDFObjectReference> usedFontReferences;
|
||||
|
||||
auto processPage = [&](pdf::PDFInteger pageIndex)
|
||||
{
|
||||
try
|
||||
{
|
||||
const pdf::PDFPage* page = document.getCatalog()->getPage(pageIndex);
|
||||
if (const pdf::PDFDictionary* resourcesDictionary = document.getDictionaryFromObject(page->getResources()))
|
||||
{
|
||||
if (const pdf::PDFDictionary* fontsDictionary = document.getDictionaryFromObject(resourcesDictionary->get("Font")))
|
||||
{
|
||||
// Iterate trough each font
|
||||
const size_t fontsCount = fontsDictionary->getCount();
|
||||
for (size_t i = 0; i < fontsCount; ++i)
|
||||
{
|
||||
pdf::PDFObjectReference fontReference;
|
||||
pdf::PDFObject object = fontsDictionary->getValue(i);
|
||||
if (object.isReference())
|
||||
{
|
||||
// Check, if we have not processed the object. If we have it processed,
|
||||
// then do nothing, otherwise insert it into the processed objects.
|
||||
// We must also use mutex, because we use multithreading.
|
||||
QMutexLocker lock(&mutex);
|
||||
if (usedFontReferences.count(object.getReference()))
|
||||
{
|
||||
fontInfoMap[object.getReference()].pages.addValue(pageIndex + 1);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
fontReference = object.getReference();
|
||||
usedFontReferences.insert(fontReference);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (pdf::PDFFontPointer font = pdf::PDFFont::createFont(object, &document))
|
||||
{
|
||||
pdf::PDFRenderErrorReporterDummy dummyReporter;
|
||||
pdf::PDFRealizedFontPointer realizedFont = pdf::PDFRealizedFont::createRealizedFont(font, 8.0, &dummyReporter);
|
||||
if (realizedFont)
|
||||
{
|
||||
const pdf::FontType fontType = font->getFontType();
|
||||
const pdf::FontDescriptor* fontDescriptor = font->getFontDescriptor();
|
||||
QString fontName = fontDescriptor->fontName;
|
||||
int plusPos = fontName.lastIndexOf('+');
|
||||
|
||||
// Jakub Melka: Detect, if font is subset. Font subsets have special form,
|
||||
// according to chapter 9.9.2 of PDF 2.0 specification. The first 6 letters
|
||||
// of font name are uppercase alphabet letters, and 7'th character is '+' sign.
|
||||
bool isSubset = false;
|
||||
if (plusPos == 6)
|
||||
{
|
||||
isSubset = true;
|
||||
for (int i = 0; i < 6; ++i)
|
||||
{
|
||||
QChar character = fontName[i];
|
||||
if (!character.isLetter() || !character.isUpper())
|
||||
{
|
||||
isSubset = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to remove characters from +, if we have font name 'SDFDSF+ValidFontName'
|
||||
if (plusPos != -1 && plusPos < fontName.size() - 1)
|
||||
{
|
||||
fontName = fontName.mid(plusPos + 1);
|
||||
}
|
||||
|
||||
if (fontName.isEmpty())
|
||||
{
|
||||
fontName = QString::fromLatin1(fontsDictionary->getKey(i).getString());
|
||||
}
|
||||
|
||||
QString fontTypeName;
|
||||
switch (fontType)
|
||||
{
|
||||
case pdf::FontType::Type0:
|
||||
fontTypeName = PDFToolTranslationContext::tr("Type 0 (CID)");
|
||||
break;
|
||||
|
||||
case pdf::FontType::Type1:
|
||||
fontTypeName = PDFToolTranslationContext::tr("Type 1 (8 bit)");
|
||||
break;
|
||||
|
||||
case pdf::FontType::MMType1:
|
||||
fontTypeName = PDFToolTranslationContext::tr("MM Type 1 (8 bit)");
|
||||
break;
|
||||
|
||||
case pdf::FontType::TrueType:
|
||||
fontTypeName = PDFToolTranslationContext::tr("TrueType (8 bit)");
|
||||
break;
|
||||
|
||||
case pdf::FontType::Type3:
|
||||
fontTypeName = PDFToolTranslationContext::tr("Type 3");
|
||||
break;
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
const pdf::PDFFontCMap* toUnicode = font->getToUnicode();
|
||||
|
||||
FontInfo info;
|
||||
info.fontName = fontName;
|
||||
info.pages.addValue(pageIndex + 1);
|
||||
info.fontTypeName = fontTypeName;
|
||||
info.isEmbedded = fontDescriptor->isEmbedded();
|
||||
info.isSubset = isSubset;
|
||||
info.isToUnicodePresent = toUnicode && toUnicode->isValid();
|
||||
info.reference = fontReference;
|
||||
|
||||
const pdf::PDFSimpleFont* simpleFont = dynamic_cast<const pdf::PDFSimpleFont*>(font.data());
|
||||
if (simpleFont)
|
||||
{
|
||||
const pdf::PDFEncoding::Encoding encoding = simpleFont->getEncodingType();
|
||||
switch (encoding)
|
||||
{
|
||||
case pdf::PDFEncoding::Encoding::Standard:
|
||||
info.encoding = PDFToolTranslationContext::tr("Standard");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::MacRoman:
|
||||
info.encoding = PDFToolTranslationContext::tr("MacRoman");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::WinAnsi:
|
||||
info.encoding = PDFToolTranslationContext::tr("WinAnsi");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::PDFDoc:
|
||||
info.encoding = PDFToolTranslationContext::tr("PDFDoc");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::MacExpert:
|
||||
info.encoding = PDFToolTranslationContext::tr("MacExpert");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::Symbol:
|
||||
info.encoding = PDFToolTranslationContext::tr("Symbol");
|
||||
break;
|
||||
case pdf::PDFEncoding::Encoding::ZapfDingbats:
|
||||
info.encoding = PDFToolTranslationContext::tr("ZapfDingbats");
|
||||
break;
|
||||
|
||||
default:
|
||||
info.encoding = PDFToolTranslationContext::tr("Custom");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
QMutexLocker lock(&mutex);
|
||||
if (fontReference.isValid())
|
||||
{
|
||||
info.pages.merge(fontInfoMap[fontReference].pages);
|
||||
fontInfoMap[fontReference] = qMove(info);
|
||||
}
|
||||
else
|
||||
{
|
||||
directFonts.emplace_back(qMove(info));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (pdf::PDFException)
|
||||
{
|
||||
// Do nothing, some error occured, continue with next font
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (pdf::PDFException)
|
||||
{
|
||||
// Do nothing, some error occured
|
||||
}
|
||||
};
|
||||
|
||||
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pages.begin(), pages.end(), processPage);
|
||||
|
||||
for (auto& item : fontInfoMap)
|
||||
{
|
||||
directFonts.emplace_back(qMove(item.second));
|
||||
}
|
||||
|
||||
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
|
||||
formatter.beginDocument("info-fonts", PDFToolTranslationContext::tr("Fonts used in document %1").arg(options.document));
|
||||
formatter.endl();
|
||||
|
||||
formatter.beginTable("fonts-overview", PDFToolTranslationContext::tr("Overview"));
|
||||
|
||||
formatter.beginTableHeaderRow("header");
|
||||
formatter.writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No."), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("font-name", PDFToolTranslationContext::tr("Font Name"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("font-type", PDFToolTranslationContext::tr("Font Type"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("encoding", PDFToolTranslationContext::tr("Encoding"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("pages", PDFToolTranslationContext::tr("Pages"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("is-embedded", PDFToolTranslationContext::tr("Embedded"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("is-subset", PDFToolTranslationContext::tr("Subset"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("is-unicode", PDFToolTranslationContext::tr("Unicode"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("object-no", PDFToolTranslationContext::tr("Object"), Qt::AlignLeft);
|
||||
formatter.writeTableHeaderColumn("generation-no", PDFToolTranslationContext::tr("Gen."), Qt::AlignLeft);
|
||||
formatter.endTableHeaderRow();
|
||||
|
||||
QLocale locale;
|
||||
|
||||
QString yesText = PDFToolTranslationContext::tr("Yes");
|
||||
QString noText = PDFToolTranslationContext::tr("No");
|
||||
QString noRef = PDFToolTranslationContext::tr("--");
|
||||
|
||||
int ref = 1;
|
||||
for (const FontInfo& info : directFonts)
|
||||
{
|
||||
formatter.beginTableRow("font", ref);
|
||||
|
||||
formatter.writeTableColumn("no", locale.toString(ref), Qt::AlignRight);
|
||||
formatter.writeTableColumn("font-name", info.fontName);
|
||||
formatter.writeTableColumn("font-type", info.fontTypeName);
|
||||
formatter.writeTableColumn("encoding", info.encoding);
|
||||
formatter.writeTableColumn("pages", info.pages.toText(true));
|
||||
formatter.writeTableColumn("is-embedded", info.isEmbedded ? yesText : noText);
|
||||
formatter.writeTableColumn("is-subset", info.isSubset ? yesText : noText);
|
||||
formatter.writeTableColumn("is-unicode", info.isToUnicodePresent ? yesText : noText);
|
||||
|
||||
if (info.reference.isValid())
|
||||
{
|
||||
formatter.writeTableColumn("object-no", locale.toString(info.reference.objectNumber), Qt::AlignRight);
|
||||
formatter.writeTableColumn("generation-no", locale.toString(info.reference.generation), Qt::AlignRight);
|
||||
}
|
||||
else
|
||||
{
|
||||
formatter.writeTableColumn("object-no", noRef, Qt::AlignRight);
|
||||
formatter.writeTableColumn("generation-no", noRef, Qt::AlignRight);
|
||||
}
|
||||
|
||||
formatter.endTableRow();
|
||||
++ref;
|
||||
}
|
||||
|
||||
formatter.endTable();
|
||||
|
||||
formatter.endDocument();
|
||||
PDFConsole::writeText(formatter.getString(), options.outputCodec);
|
||||
|
||||
return ExitSuccess;
|
||||
}
|
||||
|
||||
PDFToolAbstractApplication::Options PDFToolInfoFonts::getOptionsFlags() const
|
||||
{
|
||||
return ConsoleFormat | OpenDocument | PageSelector;
|
||||
}
|
||||
|
||||
} // namespace pdftool
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFTOOLINFOFONTS_H
|
||||
#define PDFTOOLINFOFONTS_H
|
||||
|
||||
#include "pdftoolabstractapplication.h"
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
class PDFToolInfoFonts : public PDFToolAbstractApplication
|
||||
{
|
||||
public:
|
||||
virtual QString getStandardString(StandardString standardString) const override;
|
||||
virtual int execute(const PDFToolOptions& options) override;
|
||||
virtual Options getOptionsFlags() const override;
|
||||
};
|
||||
|
||||
} // namespace pdftool
|
||||
|
||||
#endif // PDFTOOLINFOFONTS_H
|
|
@ -130,7 +130,7 @@ int PDFToolInfoPageBoxesApplication::execute(const PDFToolOptions& options)
|
|||
for (const PDFPageBoxInfo& info : infos)
|
||||
{
|
||||
formatter.endl();
|
||||
formatter.beginTable("page-range", PDFToolTranslationContext::tr("Pages %1").arg(info.pages.toText()));
|
||||
formatter.beginTable("page-range", PDFToolTranslationContext::tr("Pages %1").arg(info.pages.toText(true)));
|
||||
|
||||
formatter.beginTableHeaderRow("header");
|
||||
formatter.writeTableHeaderColumn("box", PDFToolTranslationContext::tr("Box"), Qt::AlignLeft);
|
||||
|
|
|
@ -202,7 +202,7 @@ int PDFToolVerifySignaturesApplication::execute(const PDFToolOptions& options)
|
|||
|
||||
// Signature range
|
||||
const pdf::PDFClosedIntervalSet& bytesCoveredBySignature = signature.getBytesCoveredBySignature();
|
||||
formatter.writeText("byte-range", PDFToolTranslationContext::tr("Byte range covered by signature: %1").arg(bytesCoveredBySignature.toText()));
|
||||
formatter.writeText("byte-range", PDFToolTranslationContext::tr("Byte range covered by signature: %1").arg(bytesCoveredBySignature.toText(false)));
|
||||
|
||||
if (signature.hasError())
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue