PDF4QT/PdfTool/pdftoolinfofonts.cpp

336 lines
16 KiB
C++
Raw Normal View History

2020-10-24 14:39:09 +02:00
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdftoolinfofonts.h"
#include "pdfexecutionpolicy.h"
#include "pdfdocument.h"
#include "pdffont.h"
#include "pdfutils.h"
namespace pdftool
{
static PDFToolInfoFonts s_infoFontsApplication;
QString PDFToolInfoFonts::getStandardString(StandardString standardString) const
{
switch (standardString)
{
case Command:
return "info-fonts";
case Name:
return PDFToolTranslationContext::tr("Info about used fonts");
case Description:
return PDFToolTranslationContext::tr("Retrieve informations about font usage in a document.");
default:
Q_ASSERT(false);
break;
}
return QString();
}
struct FontInfo
{
pdf::PDFClosedIntervalSet pages;
QString fontName;
QString fontTypeName;
QString encoding;
bool isEmbedded = false;
bool isSubset = false;
bool isToUnicodePresent = false;
pdf::PDFObjectReference reference;
};
int PDFToolInfoFonts::execute(const PDFToolOptions& options)
{
pdf::PDFDocument document;
QByteArray sourceData;
if (!readDocument(options, document, &sourceData))
{
return ErrorDocumentReading;
}
QString parseError;
std::vector<pdf::PDFInteger> pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
if (!parseError.isEmpty())
{
PDFConsole::writeError(parseError, options.outputCodec);
return ErrorInvalidArguments;
}
QMutex mutex;
std::map<pdf::PDFObjectReference, FontInfo> fontInfoMap;
std::vector<FontInfo> directFonts;
std::set<pdf::PDFObjectReference> usedFontReferences;
auto processPage = [&](pdf::PDFInteger pageIndex)
{
try
{
const pdf::PDFPage* page = document.getCatalog()->getPage(pageIndex);
if (const pdf::PDFDictionary* resourcesDictionary = document.getDictionaryFromObject(page->getResources()))
{
if (const pdf::PDFDictionary* fontsDictionary = document.getDictionaryFromObject(resourcesDictionary->get("Font")))
{
// Iterate trough each font
const size_t fontsCount = fontsDictionary->getCount();
for (size_t i = 0; i < fontsCount; ++i)
{
pdf::PDFObjectReference fontReference;
pdf::PDFObject object = fontsDictionary->getValue(i);
if (object.isReference())
{
// Check, if we have not processed the object. If we have it processed,
// then do nothing, otherwise insert it into the processed objects.
// We must also use mutex, because we use multithreading.
QMutexLocker lock(&mutex);
if (usedFontReferences.count(object.getReference()))
{
fontInfoMap[object.getReference()].pages.addValue(pageIndex + 1);
continue;
}
else
{
fontReference = object.getReference();
usedFontReferences.insert(fontReference);
}
}
try
{
if (pdf::PDFFontPointer font = pdf::PDFFont::createFont(object, &document))
{
pdf::PDFRenderErrorReporterDummy dummyReporter;
pdf::PDFRealizedFontPointer realizedFont = pdf::PDFRealizedFont::createRealizedFont(font, 8.0, &dummyReporter);
if (realizedFont)
{
const pdf::FontType fontType = font->getFontType();
const pdf::FontDescriptor* fontDescriptor = font->getFontDescriptor();
QString fontName = fontDescriptor->fontName;
int plusPos = fontName.lastIndexOf('+');
// Jakub Melka: Detect, if font is subset. Font subsets have special form,
// according to chapter 9.9.2 of PDF 2.0 specification. The first 6 letters
// of font name are uppercase alphabet letters, and 7'th character is '+' sign.
bool isSubset = false;
if (plusPos == 6)
{
isSubset = true;
for (int i = 0; i < 6; ++i)
{
QChar character = fontName[i];
if (!character.isLetter() || !character.isUpper())
{
isSubset = false;
break;
}
}
}
// Try to remove characters from +, if we have font name 'SDFDSF+ValidFontName'
if (plusPos != -1 && plusPos < fontName.size() - 1)
{
fontName = fontName.mid(plusPos + 1);
}
if (fontName.isEmpty())
{
fontName = QString::fromLatin1(fontsDictionary->getKey(i).getString());
}
QString fontTypeName;
switch (fontType)
{
case pdf::FontType::Type0:
fontTypeName = PDFToolTranslationContext::tr("Type 0 (CID)");
break;
case pdf::FontType::Type1:
fontTypeName = PDFToolTranslationContext::tr("Type 1 (8 bit)");
break;
case pdf::FontType::MMType1:
fontTypeName = PDFToolTranslationContext::tr("MM Type 1 (8 bit)");
break;
case pdf::FontType::TrueType:
fontTypeName = PDFToolTranslationContext::tr("TrueType (8 bit)");
break;
case pdf::FontType::Type3:
fontTypeName = PDFToolTranslationContext::tr("Type 3");
break;
default:
Q_ASSERT(false);
break;
}
const pdf::PDFFontCMap* toUnicode = font->getToUnicode();
FontInfo info;
info.fontName = fontName;
info.pages.addValue(pageIndex + 1);
info.fontTypeName = fontTypeName;
info.isEmbedded = fontDescriptor->isEmbedded();
info.isSubset = isSubset;
info.isToUnicodePresent = toUnicode && toUnicode->isValid();
info.reference = fontReference;
const pdf::PDFSimpleFont* simpleFont = dynamic_cast<const pdf::PDFSimpleFont*>(font.data());
if (simpleFont)
{
const pdf::PDFEncoding::Encoding encoding = simpleFont->getEncodingType();
switch (encoding)
{
case pdf::PDFEncoding::Encoding::Standard:
info.encoding = PDFToolTranslationContext::tr("Standard");
break;
case pdf::PDFEncoding::Encoding::MacRoman:
info.encoding = PDFToolTranslationContext::tr("MacRoman");
break;
case pdf::PDFEncoding::Encoding::WinAnsi:
info.encoding = PDFToolTranslationContext::tr("WinAnsi");
break;
case pdf::PDFEncoding::Encoding::PDFDoc:
info.encoding = PDFToolTranslationContext::tr("PDFDoc");
break;
case pdf::PDFEncoding::Encoding::MacExpert:
info.encoding = PDFToolTranslationContext::tr("MacExpert");
break;
case pdf::PDFEncoding::Encoding::Symbol:
info.encoding = PDFToolTranslationContext::tr("Symbol");
break;
case pdf::PDFEncoding::Encoding::ZapfDingbats:
info.encoding = PDFToolTranslationContext::tr("ZapfDingbats");
break;
default:
info.encoding = PDFToolTranslationContext::tr("Custom");
break;
}
}
QMutexLocker lock(&mutex);
if (fontReference.isValid())
{
info.pages.merge(fontInfoMap[fontReference].pages);
fontInfoMap[fontReference] = qMove(info);
}
else
{
directFonts.emplace_back(qMove(info));
}
}
}
}
catch (pdf::PDFException)
{
// Do nothing, some error occured, continue with next font
continue;
}
}
}
}
}
catch (pdf::PDFException)
{
// Do nothing, some error occured
}
};
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pages.begin(), pages.end(), processPage);
for (auto& item : fontInfoMap)
{
directFonts.emplace_back(qMove(item.second));
}
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
formatter.beginDocument("info-fonts", PDFToolTranslationContext::tr("Fonts used in document %1").arg(options.document));
formatter.endl();
formatter.beginTable("fonts-overview", PDFToolTranslationContext::tr("Overview"));
formatter.beginTableHeaderRow("header");
formatter.writeTableHeaderColumn("no", PDFToolTranslationContext::tr("No."), Qt::AlignLeft);
formatter.writeTableHeaderColumn("font-name", PDFToolTranslationContext::tr("Font Name"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("font-type", PDFToolTranslationContext::tr("Font Type"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("encoding", PDFToolTranslationContext::tr("Encoding"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("pages", PDFToolTranslationContext::tr("Pages"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("is-embedded", PDFToolTranslationContext::tr("Embedded"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("is-subset", PDFToolTranslationContext::tr("Subset"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("is-unicode", PDFToolTranslationContext::tr("Unicode"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("object-no", PDFToolTranslationContext::tr("Object"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("generation-no", PDFToolTranslationContext::tr("Gen."), Qt::AlignLeft);
formatter.endTableHeaderRow();
QLocale locale;
QString yesText = PDFToolTranslationContext::tr("Yes");
QString noText = PDFToolTranslationContext::tr("No");
QString noRef = PDFToolTranslationContext::tr("--");
int ref = 1;
for (const FontInfo& info : directFonts)
{
formatter.beginTableRow("font", ref);
formatter.writeTableColumn("no", locale.toString(ref), Qt::AlignRight);
formatter.writeTableColumn("font-name", info.fontName);
formatter.writeTableColumn("font-type", info.fontTypeName);
formatter.writeTableColumn("encoding", info.encoding);
formatter.writeTableColumn("pages", info.pages.toText(true));
formatter.writeTableColumn("is-embedded", info.isEmbedded ? yesText : noText);
formatter.writeTableColumn("is-subset", info.isSubset ? yesText : noText);
formatter.writeTableColumn("is-unicode", info.isToUnicodePresent ? yesText : noText);
if (info.reference.isValid())
{
formatter.writeTableColumn("object-no", locale.toString(info.reference.objectNumber), Qt::AlignRight);
formatter.writeTableColumn("generation-no", locale.toString(info.reference.generation), Qt::AlignRight);
}
else
{
formatter.writeTableColumn("object-no", noRef, Qt::AlignRight);
formatter.writeTableColumn("generation-no", noRef, Qt::AlignRight);
}
formatter.endTableRow();
++ref;
}
formatter.endTable();
formatter.endDocument();
PDFConsole::writeText(formatter.getString(), options.outputCodec);
return ExitSuccess;
}
PDFToolAbstractApplication::Options PDFToolInfoFonts::getOptionsFlags() const
{
return ConsoleFormat | OpenDocument | PageSelector;
}
} // namespace pdftool