Tool for computing object statistics

This commit is contained in:
Jakub Melka 2021-06-25 19:22:08 +02:00
parent f06db28b99
commit e89a26790e
3 changed files with 212 additions and 0 deletions

View File

@ -60,6 +60,7 @@ SOURCES += \
pdftooloptimize.cpp \
pdftoolrender.cpp \
pdftoolseparate.cpp \
pdftoolstatistics.cpp \
pdftoolunite.cpp \
pdftoolverifysignatures.cpp \
pdftoolxml.cpp
@ -94,6 +95,7 @@ HEADERS += \
pdftooloptimize.h \
pdftoolrender.h \
pdftoolseparate.h \
pdftoolstatistics.h \
pdftoolunite.h \
pdftoolverifysignatures.h \
pdftoolxml.h

View File

@ -0,0 +1,174 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of Pdf4Qt.
//
// Pdf4Qt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// Pdf4Qt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Pdf4Qt. If not, see <https://www.gnu.org/licenses/>.
#include "pdftoolstatistics.h"
#include "pdfobjectutils.h"
namespace pdftool
{
static PDFToolStatisticsApplication s_statisticsApplication;
QString PDFToolStatisticsApplication::getStandardString(StandardString standardString) const
{
switch (standardString)
{
case Command:
return "statistics";
case Name:
return PDFToolTranslationContext::tr("Statistics");
case Description:
return PDFToolTranslationContext::tr("Compute statistics of internal objects used in a document.");
default:
Q_ASSERT(false);
break;
}
return QString();
}
int PDFToolStatisticsApplication::execute(const PDFToolOptions& options)
{
pdf::PDFDocument document;
QByteArray sourceData;
if (!readDocument(options, document, &sourceData, false))
{
return ErrorDocumentReading;
}
pdf::PDFObjectClassifier classifier;
classifier.classify(&document);
pdf::PDFObjectClassifier::Statistics statistics = classifier.calculateStatistics(&document);
QLocale locale;
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
formatter.beginDocument("info", PDFToolTranslationContext::tr("Information about document %1").arg(options.document));
formatter.endl();
{
formatter.beginTable("statistics-objects-by-class", PDFToolTranslationContext::tr("Statistics by Object Class"));
formatter.beginTableHeaderRow("header");
formatter.writeTableHeaderColumn("class", PDFToolTranslationContext::tr("Class"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("percentage", PDFToolTranslationContext::tr("Percentage [%]"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("count", PDFToolTranslationContext::tr("Count [#]"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("space-usage", PDFToolTranslationContext::tr("Space Usage [bytes]"), Qt::AlignLeft);
formatter.endTableHeaderRow();
qint64 totalBytesCount = 0;
for (const auto& item : statistics.statistics)
{
totalBytesCount += item.second.bytes;
}
auto addRow = [&](pdf::PDFObjectClassifier::Type type, QString classText)
{
auto it = statistics.statistics.find(type);
if (it == statistics.statistics.cend())
{
// Jakub Melka: no type found
return;
}
const pdf::PDFObjectClassifier::StatisticsItem& statisticsItem = it->second;
qreal percentage = qreal(100.0) * qreal(statisticsItem.bytes) / qreal(totalBytesCount);
formatter.beginTableRow("item", int(type));
formatter.writeTableColumn("class", classText);
formatter.writeTableColumn("percentage", locale.toString(percentage, 'f', 2), Qt::AlignRight);
formatter.writeTableColumn("count", locale.toString(statisticsItem.count), Qt::AlignRight);
formatter.writeTableColumn("byte-usage", locale.toString(statisticsItem.bytes), Qt::AlignRight);
formatter.endTableRow();
};
addRow(pdf::PDFObjectClassifier::Page, PDFToolTranslationContext::tr("Page"));
addRow(pdf::PDFObjectClassifier::ContentStream, PDFToolTranslationContext::tr("Content Stream"));
addRow(pdf::PDFObjectClassifier::GraphicState, PDFToolTranslationContext::tr("Graphic State"));
addRow(pdf::PDFObjectClassifier::ColorSpace, PDFToolTranslationContext::tr("Color Space"));
addRow(pdf::PDFObjectClassifier::Pattern, PDFToolTranslationContext::tr("Pattern"));
addRow(pdf::PDFObjectClassifier::Shading, PDFToolTranslationContext::tr("Shading"));
addRow(pdf::PDFObjectClassifier::Image, PDFToolTranslationContext::tr("Image"));
addRow(pdf::PDFObjectClassifier::Form, PDFToolTranslationContext::tr("Form"));
addRow(pdf::PDFObjectClassifier::Font, PDFToolTranslationContext::tr("Font"));
addRow(pdf::PDFObjectClassifier::Action, PDFToolTranslationContext::tr("Action"));
addRow(pdf::PDFObjectClassifier::Annotation, PDFToolTranslationContext::tr("Annotation"));
addRow(pdf::PDFObjectClassifier::None, PDFToolTranslationContext::tr("Other"));
formatter.endTable();
}
formatter.endl();
{
formatter.beginTable("statistics-objects-by-type", PDFToolTranslationContext::tr("Statistics by Object Type"));
formatter.beginTableHeaderRow("header");
formatter.writeTableHeaderColumn("class", PDFToolTranslationContext::tr("Type") , Qt::AlignLeft);
formatter.writeTableHeaderColumn("percentage", PDFToolTranslationContext::tr("Percentage [%]"), Qt::AlignLeft);
formatter.writeTableHeaderColumn("count", PDFToolTranslationContext::tr("Count [#]"), Qt::AlignLeft);
formatter.endTableHeaderRow();
qint64 totalObjectCount = 0;
for (pdf::PDFObject::Type type : pdf::PDFObject::getTypes())
{
const qint64 currentObjectCount = statistics.objectCountByType[size_t(type)];
totalObjectCount += currentObjectCount;
}
if (totalObjectCount > 0)
{
for (pdf::PDFObject::Type type : pdf::PDFObject::getTypes())
{
const qint64 currentObjectCount = statistics.objectCountByType[size_t(type)];
if (currentObjectCount == 0)
{
continue;
}
qreal percentage = qreal(100.0) * qreal(currentObjectCount) / qreal(totalObjectCount);
formatter.beginTableRow("item", int(type));
formatter.writeTableColumn("type", pdf::PDFObjectUtils::getObjectTypeName(type));
formatter.writeTableColumn("percentage", locale.toString(percentage, 'f', 2), Qt::AlignRight);
formatter.writeTableColumn("count", locale.toString(currentObjectCount), Qt::AlignRight);
formatter.endTableRow();
}
}
formatter.endTable();
}
formatter.endDocument();
PDFConsole::writeText(formatter.getString(), options.outputCodec);
return ExitSuccess;
}
PDFToolAbstractApplication::Options PDFToolStatisticsApplication::getOptionsFlags() const
{
return ConsoleFormat | OpenDocument;
}
} // namespace pdftool

View File

@ -0,0 +1,36 @@
// Copyright (C) 2021 Jakub Melka
//
// This file is part of Pdf4Qt.
//
// Pdf4Qt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// with the written consent of the copyright owner, any later version.
//
// Pdf4Qt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Pdf4Qt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFTOOLSTATISTICS_H
#define PDFTOOLSTATISTICS_H
#include "pdftoolabstractapplication.h"
namespace pdftool
{
class PDFToolStatisticsApplication : public PDFToolAbstractApplication
{
public:
virtual QString getStandardString(StandardString standardString) const override;
virtual int execute(const PDFToolOptions& options) override;
virtual Options getOptionsFlags() const override;
};
} // namespace pdftool
#endif // PDFTOOLSTATISTICS_H