diff --git a/PdfForQtLib/sources/pdffont.h b/PdfForQtLib/sources/pdffont.h index 6f4787d..1cd9cc1 100644 --- a/PdfForQtLib/sources/pdffont.h +++ b/PdfForQtLib/sources/pdffont.h @@ -25,18 +25,19 @@ #include #include #include -#include #include #include class QPainterPath; +class QTreeWidgetItem; namespace pdf { class PDFDocument; class PDFModifiedDocument; class PDFRenderErrorReporter; +class PDFFontCMap; using CID = unsigned int; using GID = unsigned int; @@ -259,6 +260,9 @@ public: /// Returns the font type virtual FontType getFontType() const = 0; + /// Returns ToUnicode mapping (or nullptr, if font has no mapping to unicode) + virtual const PDFFontCMap* getToUnicode() const { return nullptr; } + /// Returns font descriptor const FontDescriptor* getFontDescriptor() const { return &m_fontDescriptor; } @@ -298,6 +302,7 @@ public: GlyphIndices glyphIndices); virtual ~PDFSimpleFont() override = default; + const PDFEncoding::Encoding getEncodingType() const { return m_encodingType; } const encoding::EncodingTable* getEncoding() const { return &m_encoding; } const GlyphIndices* getGlyphIndices() const { return &m_glyphIndices; } @@ -523,6 +528,7 @@ public: virtual FontType getFontType() const override; virtual void dumpFontToTreeItem(QTreeWidgetItem*item) const override; + virtual const PDFFontCMap* getToUnicode() const override { return &m_toUnicode; } /// Returns width of the character. If character doesn't exist, then zero is returned. double getWidth(int characterIndex) const; @@ -533,7 +539,6 @@ public: const QMatrix& getFontMatrix() const { return m_fontMatrix; } const PDFObject& getResources() const { return m_resources; } - const PDFFontCMap& getToUnicode() const { return m_toUnicode; } /// Returns unicode character for given character index. If unicode mapping is not /// present, empty (null) character is returned. @@ -567,9 +572,9 @@ public: virtual ~PDFType0Font() = default; virtual FontType getFontType() const override { return FontType::Type0; } + virtual const PDFFontCMap* getToUnicode() const override { return &m_toUnicode; } const PDFFontCMap* getCMap() const { return &m_cmap; } - const PDFFontCMap* getToUnicode() const { return &m_toUnicode; } const PDFCIDtoGIDMapper* getCIDtoGIDMapper() const { return &m_mapper; } /// Returns the glyph advance, if it can be obtained, or zero, if it cannot diff --git a/PdfForQtLib/sources/pdfform.cpp b/PdfForQtLib/sources/pdfform.cpp index 7daaa04..60546e4 100644 --- a/PdfForQtLib/sources/pdfform.cpp +++ b/PdfForQtLib/sources/pdfform.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace pdf { diff --git a/PdfForQtLib/sources/pdfutils.cpp b/PdfForQtLib/sources/pdfutils.cpp index 4e32120..a787b21 100644 --- a/PdfForQtLib/sources/pdfutils.cpp +++ b/PdfForQtLib/sources/pdfutils.cpp @@ -302,13 +302,30 @@ PDFInteger PDFClosedIntervalSet::getTotalLength() const return std::accumulate(m_intervals.cbegin(), m_intervals.cend(), 0, [](PDFInteger count, const auto& b) { return count + b.second - b.first + 1; }); } -QString PDFClosedIntervalSet::toText() const +QString PDFClosedIntervalSet::toText(bool withoutBrackets) const { QStringList intervals; - for (const ClosedInterval& interval : m_intervals) + if (withoutBrackets) { - intervals << QString("[%1 - %2]").arg(interval.first).arg(interval.second); + for (const ClosedInterval& interval : m_intervals) + { + if (interval.first == interval.second) + { + intervals << QString::number(interval.first); + } + else + { + intervals << QString("%1-%2").arg(interval.first).arg(interval.second); + } + } + } + else + { + for (const ClosedInterval& interval : m_intervals) + { + intervals << QString("[%1 - %2]").arg(interval.first).arg(interval.second); + } } return intervals.join(", "); diff --git a/PdfForQtLib/sources/pdfutils.h b/PdfForQtLib/sources/pdfutils.h index bb65382..2e2c65f 100644 --- a/PdfForQtLib/sources/pdfutils.h +++ b/PdfForQtLib/sources/pdfutils.h @@ -615,7 +615,7 @@ public: PDFInteger getTotalLength() const; /// Transforms interval set to readable text - QString toText() const; + QString toText(bool withoutBrackets) const; /// Returns all integers from the range std::vector unfold() const; diff --git a/PdfForQtLib/sources/pdfwidgettool.h b/PdfForQtLib/sources/pdfwidgettool.h index 35f8081..4b4ec6f 100644 --- a/PdfForQtLib/sources/pdfwidgettool.h +++ b/PdfForQtLib/sources/pdfwidgettool.h @@ -26,6 +26,7 @@ #include class QCheckBox; +class QLineEdit; namespace pdf { diff --git a/PdfTool/PdfTool.pro b/PdfTool/PdfTool.pro index a78a7da..e362630 100644 --- a/PdfTool/PdfTool.pro +++ b/PdfTool/PdfTool.pro @@ -46,6 +46,7 @@ SOURCES += \ pdftoolaudiobook.cpp \ pdftoolfetchtext.cpp \ pdftoolinfo.cpp \ + pdftoolinfofonts.cpp \ pdftoolinfojavascript.cpp \ pdftoolinfometadata.cpp \ pdftoolinfonameddestinations.cpp \ @@ -70,6 +71,7 @@ HEADERS += \ pdftoolaudiobook.h \ pdftoolfetchtext.h \ pdftoolinfo.h \ + pdftoolinfofonts.h \ pdftoolinfojavascript.h \ pdftoolinfometadata.h \ pdftoolinfonameddestinations.h \ diff --git a/PdfTool/pdftoolinfofonts.cpp b/PdfTool/pdftoolinfofonts.cpp new file mode 100644 index 0000000..eaa26ef --- /dev/null +++ b/PdfTool/pdftoolinfofonts.cpp @@ -0,0 +1,335 @@ +// Copyright (C) 2020 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + +#include "pdftoolinfofonts.h" +#include "pdfexecutionpolicy.h" +#include "pdfdocument.h" +#include "pdffont.h" +#include "pdfutils.h" + +namespace pdftool +{ + +static PDFToolInfoFonts s_infoFontsApplication; + +QString PDFToolInfoFonts::getStandardString(StandardString standardString) const +{ + switch (standardString) + { + case Command: + return "info-fonts"; + + case Name: + return PDFToolTranslationContext::tr("Info about used fonts"); + + case Description: + return PDFToolTranslationContext::tr("Retrieve informations about font usage in a document."); + + default: + Q_ASSERT(false); + break; + } + + return QString(); +} + +struct FontInfo +{ + pdf::PDFClosedIntervalSet pages; + QString fontName; + QString fontTypeName; + QString encoding; + bool isEmbedded = false; + bool isSubset = false; + bool isToUnicodePresent = false; + pdf::PDFObjectReference reference; +}; + +int PDFToolInfoFonts::execute(const PDFToolOptions& options) +{ + pdf::PDFDocument document; + QByteArray sourceData; + if (!readDocument(options, document, &sourceData)) + { + return ErrorDocumentReading; + } + + QString parseError; + std::vector pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true); + + if (!parseError.isEmpty()) + { + PDFConsole::writeError(parseError, options.outputCodec); + return ErrorInvalidArguments; + } + + QMutex mutex; + std::map fontInfoMap; + std::vector directFonts; + std::set usedFontReferences; + + auto processPage = [&](pdf::PDFInteger pageIndex) + { + try + { + const pdf::PDFPage* page = document.getCatalog()->getPage(pageIndex); + if (const pdf::PDFDictionary* resourcesDictionary = document.getDictionaryFromObject(page->getResources())) + { + if (const pdf::PDFDictionary* fontsDictionary = document.getDictionaryFromObject(resourcesDictionary->get("Font"))) + { + // Iterate trough each font + const size_t fontsCount = fontsDictionary->getCount(); + for (size_t i = 0; i < fontsCount; ++i) + { + pdf::PDFObjectReference fontReference; + pdf::PDFObject object = fontsDictionary->getValue(i); + if (object.isReference()) + { + // Check, if we have not processed the object. If we have it processed, + // then do nothing, otherwise insert it into the processed objects. + // We must also use mutex, because we use multithreading. + QMutexLocker lock(&mutex); + if (usedFontReferences.count(object.getReference())) + { + fontInfoMap[object.getReference()].pages.addValue(pageIndex + 1); + continue; + } + else + { + fontReference = object.getReference(); + usedFontReferences.insert(fontReference); + } + } + + try + { + if (pdf::PDFFontPointer font = pdf::PDFFont::createFont(object, &document)) + { + pdf::PDFRenderErrorReporterDummy dummyReporter; + pdf::PDFRealizedFontPointer realizedFont = pdf::PDFRealizedFont::createRealizedFont(font, 8.0, &dummyReporter); + if (realizedFont) + { + const pdf::FontType fontType = font->getFontType(); + const pdf::FontDescriptor* fontDescriptor = font->getFontDescriptor(); + QString fontName = fontDescriptor->fontName; + int plusPos = fontName.lastIndexOf('+'); + + // Jakub Melka: Detect, if font is subset. Font subsets have special form, + // according to chapter 9.9.2 of PDF 2.0 specification. The first 6 letters + // of font name are uppercase alphabet letters, and 7'th character is '+' sign. + bool isSubset = false; + if (plusPos == 6) + { + isSubset = true; + for (int i = 0; i < 6; ++i) + { + QChar character = fontName[i]; + if (!character.isLetter() || !character.isUpper()) + { + isSubset = false; + break; + } + } + } + + // Try to remove characters from +, if we have font name 'SDFDSF+ValidFontName' + if (plusPos != -1 && plusPos < fontName.size() - 1) + { + fontName = fontName.mid(plusPos + 1); + } + + if (fontName.isEmpty()) + { + fontName = QString::fromLatin1(fontsDictionary->getKey(i).getString()); + } + + QString fontTypeName; + switch (fontType) + { + case pdf::FontType::Type0: + fontTypeName = PDFToolTranslationContext::tr("Type 0 (CID)"); + break; + + case pdf::FontType::Type1: + fontTypeName = PDFToolTranslationContext::tr("Type 1 (8 bit)"); + break; + + case pdf::FontType::MMType1: + fontTypeName = PDFToolTranslationContext::tr("MM Type 1 (8 bit)"); + break; + + case pdf::FontType::TrueType: + fontTypeName = PDFToolTranslationContext::tr("TrueType (8 bit)"); + break; + + case pdf::FontType::Type3: + fontTypeName = PDFToolTranslationContext::tr("Type 3"); + break; + + default: + Q_ASSERT(false); + break; + } + + const pdf::PDFFontCMap* toUnicode = font->getToUnicode(); + + FontInfo info; + info.fontName = fontName; + info.pages.addValue(pageIndex + 1); + info.fontTypeName = fontTypeName; + info.isEmbedded = fontDescriptor->isEmbedded(); + info.isSubset = isSubset; + info.isToUnicodePresent = toUnicode && toUnicode->isValid(); + info.reference = fontReference; + + const pdf::PDFSimpleFont* simpleFont = dynamic_cast(font.data()); + if (simpleFont) + { + const pdf::PDFEncoding::Encoding encoding = simpleFont->getEncodingType(); + switch (encoding) + { + case pdf::PDFEncoding::Encoding::Standard: + info.encoding = PDFToolTranslationContext::tr("Standard"); + break; + case pdf::PDFEncoding::Encoding::MacRoman: + info.encoding = PDFToolTranslationContext::tr("MacRoman"); + break; + case pdf::PDFEncoding::Encoding::WinAnsi: + info.encoding = PDFToolTranslationContext::tr("WinAnsi"); + break; + case pdf::PDFEncoding::Encoding::PDFDoc: + info.encoding = PDFToolTranslationContext::tr("PDFDoc"); + break; + case pdf::PDFEncoding::Encoding::MacExpert: + info.encoding = PDFToolTranslationContext::tr("MacExpert"); + break; + case pdf::PDFEncoding::Encoding::Symbol: + info.encoding = PDFToolTranslationContext::tr("Symbol"); + break; + case pdf::PDFEncoding::Encoding::ZapfDingbats: + info.encoding = PDFToolTranslationContext::tr("ZapfDingbats"); + break; + + default: + info.encoding = PDFToolTranslationContext::tr("Custom"); + break; + } + } + + QMutexLocker lock(&mutex); + if (fontReference.isValid()) + { + info.pages.merge(fontInfoMap[fontReference].pages); + fontInfoMap[fontReference] = qMove(info); + } + else + { + directFonts.emplace_back(qMove(info)); + } + } + } + } + catch (pdf::PDFException) + { + // Do nothing, some error occured, continue with next font + continue; + } + } + } + } + } + catch (pdf::PDFException) + { + // Do nothing, some error occured + } + }; + + pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pages.begin(), pages.end(), processPage); + + for (auto& item : fontInfoMap) + { + directFonts.emplace_back(qMove(item.second)); + } + + PDFOutputFormatter formatter(options.outputStyle, options.outputCodec); + formatter.beginDocument("info-fonts", PDFToolTranslationContext::tr("Fonts used in document %1").arg(options.document)); + formatter.endl(); + + formatter.beginTable("fonts-overview", PDFToolTranslationContext::tr("Overview")); + + formatter.beginTableHeaderRow("header"); + formatter.writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No."), Qt::AlignLeft); + formatter.writeTableHeaderColumn("font-name", PDFToolTranslationContext::tr("Font Name"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("font-type", PDFToolTranslationContext::tr("Font Type"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("encoding", PDFToolTranslationContext::tr("Encoding"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("pages", PDFToolTranslationContext::tr("Pages"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("is-embedded", PDFToolTranslationContext::tr("Embedded"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("is-subset", PDFToolTranslationContext::tr("Subset"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("is-unicode", PDFToolTranslationContext::tr("Unicode"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("object-no", PDFToolTranslationContext::tr("Object"), Qt::AlignLeft); + formatter.writeTableHeaderColumn("generation-no", PDFToolTranslationContext::tr("Gen."), Qt::AlignLeft); + formatter.endTableHeaderRow(); + + QLocale locale; + + QString yesText = PDFToolTranslationContext::tr("Yes"); + QString noText = PDFToolTranslationContext::tr("No"); + QString noRef = PDFToolTranslationContext::tr("--"); + + int ref = 1; + for (const FontInfo& info : directFonts) + { + formatter.beginTableRow("font", ref); + + formatter.writeTableColumn("no", locale.toString(ref), Qt::AlignRight); + formatter.writeTableColumn("font-name", info.fontName); + formatter.writeTableColumn("font-type", info.fontTypeName); + formatter.writeTableColumn("encoding", info.encoding); + formatter.writeTableColumn("pages", info.pages.toText(true)); + formatter.writeTableColumn("is-embedded", info.isEmbedded ? yesText : noText); + formatter.writeTableColumn("is-subset", info.isSubset ? yesText : noText); + formatter.writeTableColumn("is-unicode", info.isToUnicodePresent ? yesText : noText); + + if (info.reference.isValid()) + { + formatter.writeTableColumn("object-no", locale.toString(info.reference.objectNumber), Qt::AlignRight); + formatter.writeTableColumn("generation-no", locale.toString(info.reference.generation), Qt::AlignRight); + } + else + { + formatter.writeTableColumn("object-no", noRef, Qt::AlignRight); + formatter.writeTableColumn("generation-no", noRef, Qt::AlignRight); + } + + formatter.endTableRow(); + ++ref; + } + + formatter.endTable(); + + formatter.endDocument(); + PDFConsole::writeText(formatter.getString(), options.outputCodec); + + return ExitSuccess; +} + +PDFToolAbstractApplication::Options PDFToolInfoFonts::getOptionsFlags() const +{ + return ConsoleFormat | OpenDocument | PageSelector; +} + +} // namespace pdftool diff --git a/PdfTool/pdftoolinfofonts.h b/PdfTool/pdftoolinfofonts.h new file mode 100644 index 0000000..00281b3 --- /dev/null +++ b/PdfTool/pdftoolinfofonts.h @@ -0,0 +1,36 @@ +// Copyright (C) 2020 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + +#ifndef PDFTOOLINFOFONTS_H +#define PDFTOOLINFOFONTS_H + +#include "pdftoolabstractapplication.h" + +namespace pdftool +{ + +class PDFToolInfoFonts : public PDFToolAbstractApplication +{ +public: + virtual QString getStandardString(StandardString standardString) const override; + virtual int execute(const PDFToolOptions& options) override; + virtual Options getOptionsFlags() const override; +}; + +} // namespace pdftool + +#endif // PDFTOOLINFOFONTS_H diff --git a/PdfTool/pdftoolinfopageboxes.cpp b/PdfTool/pdftoolinfopageboxes.cpp index e6c7d7b..302dd08 100644 --- a/PdfTool/pdftoolinfopageboxes.cpp +++ b/PdfTool/pdftoolinfopageboxes.cpp @@ -130,7 +130,7 @@ int PDFToolInfoPageBoxesApplication::execute(const PDFToolOptions& options) for (const PDFPageBoxInfo& info : infos) { formatter.endl(); - formatter.beginTable("page-range", PDFToolTranslationContext::tr("Pages %1").arg(info.pages.toText())); + formatter.beginTable("page-range", PDFToolTranslationContext::tr("Pages %1").arg(info.pages.toText(true))); formatter.beginTableHeaderRow("header"); formatter.writeTableHeaderColumn("box", PDFToolTranslationContext::tr("Box"), Qt::AlignLeft); diff --git a/PdfTool/pdftoolverifysignatures.cpp b/PdfTool/pdftoolverifysignatures.cpp index ecac64c..3e1f29d 100644 --- a/PdfTool/pdftoolverifysignatures.cpp +++ b/PdfTool/pdftoolverifysignatures.cpp @@ -202,7 +202,7 @@ int PDFToolVerifySignaturesApplication::execute(const PDFToolOptions& options) // Signature range const pdf::PDFClosedIntervalSet& bytesCoveredBySignature = signature.getBytesCoveredBySignature(); - formatter.writeText("byte-range", PDFToolTranslationContext::tr("Byte range covered by signature: %1").arg(bytesCoveredBySignature.toText())); + formatter.writeText("byte-range", PDFToolTranslationContext::tr("Byte range covered by signature: %1").arg(bytesCoveredBySignature.toText(false))); if (signature.hasError()) {