2021-04-30 20:12:10 +02:00
|
|
|
// Copyright (C) 2020-2021 Jakub Melka
|
2020-10-29 19:17:24 +01:00
|
|
|
//
|
2021-08-10 19:22:56 +02:00
|
|
|
// This file is part of PDF4QT.
|
2020-10-29 19:17:24 +01:00
|
|
|
//
|
2021-08-10 19:22:56 +02:00
|
|
|
// PDF4QT is free software: you can redistribute it and/or modify
|
2020-10-29 19:17:24 +01:00
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
2021-04-30 20:12:10 +02:00
|
|
|
// with the written consent of the copyright owner, any later version.
|
2020-10-29 19:17:24 +01:00
|
|
|
//
|
2021-08-10 19:22:56 +02:00
|
|
|
// PDF4QT is distributed in the hope that it will be useful,
|
2020-10-29 19:17:24 +01:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
2021-08-10 19:22:56 +02:00
|
|
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
2020-10-29 19:17:24 +01:00
|
|
|
|
|
|
|
#include "pdftoolfetchimages.h"
|
|
|
|
#include "pdfpagecontentprocessor.h"
|
|
|
|
#include "pdfconstants.h"
|
|
|
|
#include "pdfexecutionpolicy.h"
|
|
|
|
|
2020-10-30 18:10:01 +01:00
|
|
|
#include <QCryptographicHash>
|
|
|
|
|
2020-10-29 19:17:24 +01:00
|
|
|
namespace pdftool
|
|
|
|
{
|
|
|
|
|
|
|
|
static PDFToolFetchImages s_fetchImagesApplication;
|
|
|
|
|
|
|
|
class PDFImageContentExtractorProcessor : public pdf::PDFPageContentProcessor
|
|
|
|
{
|
|
|
|
using BaseClass = PDFPageContentProcessor;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit PDFImageContentExtractorProcessor(const pdf::PDFPage* page,
|
|
|
|
const pdf::PDFDocument* document,
|
|
|
|
const pdf::PDFFontCache* fontCache,
|
|
|
|
const pdf::PDFCMS* cms,
|
|
|
|
const pdf::PDFOptionalContentActivity* optionalContentActivity,
|
|
|
|
QMatrix pagePointToDevicePointMatrix,
|
|
|
|
const pdf::PDFMeshQualitySettings& meshQualitySettings,
|
|
|
|
pdf::PDFInteger pageIndex,
|
|
|
|
PDFToolFetchImages* tool) :
|
|
|
|
BaseClass(page, document, fontCache, cms, optionalContentActivity, pagePointToDevicePointMatrix, meshQualitySettings),
|
|
|
|
m_pageIndex(pageIndex),
|
2020-10-30 18:10:01 +01:00
|
|
|
m_order(0),
|
2020-10-29 19:17:24 +01:00
|
|
|
m_tool(tool)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual bool isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd) override;
|
|
|
|
virtual bool isContentKindSuppressed(ContentKind kind) const override;
|
|
|
|
virtual void performImagePainting(const QImage& image) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
pdf::PDFInteger m_pageIndex;
|
2020-10-30 18:10:01 +01:00
|
|
|
pdf::PDFInteger m_order;
|
2020-10-29 19:17:24 +01:00
|
|
|
PDFToolFetchImages* m_tool;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool PDFImageContentExtractorProcessor::isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd)
|
|
|
|
{
|
|
|
|
Q_UNUSED(ocgOrOcmd);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PDFImageContentExtractorProcessor::isContentKindSuppressed(ContentKind kind) const
|
|
|
|
{
|
|
|
|
switch (kind)
|
|
|
|
{
|
|
|
|
case ContentKind::Shapes:
|
|
|
|
case ContentKind::Text:
|
|
|
|
case ContentKind::Shading:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case ContentKind::Tiling:
|
|
|
|
case ContentKind::Images:
|
|
|
|
return false; // Tiling can have images
|
|
|
|
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
Q_ASSERT(false);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PDFImageContentExtractorProcessor::performImagePainting(const QImage& image)
|
|
|
|
{
|
2020-10-30 18:10:01 +01:00
|
|
|
m_tool->onImageExtracted(m_pageIndex, m_order++, image);
|
2020-10-29 19:17:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
QString PDFToolFetchImages::getStandardString(PDFToolAbstractApplication::StandardString standardString) const
|
|
|
|
{
|
|
|
|
switch (standardString)
|
|
|
|
{
|
|
|
|
case Command:
|
|
|
|
return "fetch-images";
|
|
|
|
|
|
|
|
case Name:
|
|
|
|
return PDFToolTranslationContext::tr("Fetch images");
|
|
|
|
|
|
|
|
case Description:
|
|
|
|
return PDFToolTranslationContext::tr("Fetch image content from document.");
|
|
|
|
|
|
|
|
default:
|
|
|
|
Q_ASSERT(false);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return QString();
|
|
|
|
}
|
|
|
|
|
|
|
|
int PDFToolFetchImages::execute(const PDFToolOptions& options)
|
|
|
|
{
|
|
|
|
pdf::PDFDocument document;
|
|
|
|
QByteArray sourceData;
|
2021-05-11 18:46:33 +02:00
|
|
|
if (!readDocument(options, document, &sourceData, false))
|
2020-10-29 19:17:24 +01:00
|
|
|
{
|
|
|
|
return ErrorDocumentReading;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!document.getStorage().getSecurityHandler()->isAllowed(pdf::PDFSecurityHandler::Permission::CopyContent))
|
|
|
|
{
|
|
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Document doesn't allow to copy content."), options.outputCodec);
|
|
|
|
return ErrorPermissions;
|
|
|
|
}
|
|
|
|
|
|
|
|
QString parseError;
|
|
|
|
std::vector<pdf::PDFInteger> pageIndices = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
|
|
|
|
|
|
|
if (!parseError.isEmpty())
|
|
|
|
{
|
|
|
|
PDFConsole::writeError(parseError, options.outputCodec);
|
|
|
|
return ErrorInvalidArguments;
|
|
|
|
}
|
|
|
|
|
|
|
|
QString errorMessage;
|
|
|
|
Options optionFlags = getOptionsFlags();
|
|
|
|
if (!options.imageExportSettings.validate(&errorMessage, false, optionFlags.testFlag(ImageExportSettingsFiles), optionFlags.testFlag(ImageExportSettingsResolution)))
|
|
|
|
{
|
|
|
|
PDFConsole::writeError(errorMessage, options.outputCodec);
|
|
|
|
return ErrorInvalidArguments;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We are ready to render the document
|
|
|
|
pdf::PDFOptionalContentActivity optionalContentActivity(&document, pdf::OCUsage::Export, nullptr);
|
|
|
|
pdf::PDFCMSManager cmsManager(nullptr);
|
2021-03-04 19:39:14 +01:00
|
|
|
cmsManager.setDocument(&document);
|
2020-10-29 19:17:24 +01:00
|
|
|
cmsManager.setSettings(options.cmsSettings);
|
|
|
|
pdf::PDFCMSPointer cms = cmsManager.getCurrentCMS();
|
|
|
|
pdf::PDFMeshQualitySettings meshQualitySettings;
|
|
|
|
pdf::PDFFontCache fontCache(pdf::DEFAULT_FONT_CACHE_LIMIT, pdf::DEFAULT_REALIZED_FONT_CACHE_LIMIT);
|
|
|
|
pdf::PDFModifiedDocument md(&document, &optionalContentActivity);
|
|
|
|
fontCache.setDocument(md);
|
|
|
|
fontCache.setCacheShrinkEnabled(nullptr, false);
|
|
|
|
|
|
|
|
auto processPageContents = [&, this](pdf::PDFInteger pageIndex)
|
|
|
|
{
|
|
|
|
const pdf::PDFCatalog* catalog = document.getCatalog();
|
|
|
|
if (!catalog->getPage(pageIndex))
|
|
|
|
{
|
|
|
|
// Invalid page index
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const pdf::PDFPage* page = catalog->getPage(pageIndex);
|
|
|
|
Q_ASSERT(page);
|
|
|
|
|
|
|
|
PDFImageContentExtractorProcessor processor(page, &document, &fontCache, cms.data(), &optionalContentActivity,
|
|
|
|
QMatrix(), meshQualitySettings, pageIndex, this);
|
|
|
|
processor.processContents();
|
|
|
|
};
|
|
|
|
|
|
|
|
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pageIndices.begin(), pageIndices.end(), processPageContents);
|
|
|
|
fontCache.setCacheShrinkEnabled(nullptr, true);
|
|
|
|
|
2020-10-30 18:10:01 +01:00
|
|
|
auto comparator = [](const Image& left, const Image& right) -> bool
|
|
|
|
{
|
|
|
|
return std::make_pair(left.pageIndex, left.order) < std::make_pair(right.pageIndex, right.order);
|
|
|
|
};
|
|
|
|
std::sort(m_images.begin(), m_images.end(), comparator);
|
|
|
|
|
|
|
|
// Write information about images
|
|
|
|
PDFOutputFormatter formatter(options.outputStyle, options.outputCodec);
|
|
|
|
formatter.beginDocument("images", PDFToolTranslationContext::tr("Images fetched from document %1").arg(options.document));
|
|
|
|
formatter.endl();
|
|
|
|
|
|
|
|
formatter.beginTable("overview", PDFToolTranslationContext::tr("Overview"));
|
|
|
|
|
|
|
|
formatter.beginTableHeaderRow("header");
|
|
|
|
formatter.writeTableHeaderColumn("item-no", PDFToolTranslationContext::tr("Image No."), Qt::AlignLeft);
|
|
|
|
formatter.writeTableHeaderColumn("page-no", PDFToolTranslationContext::tr("Page No."), Qt::AlignLeft);
|
|
|
|
formatter.writeTableHeaderColumn("width", PDFToolTranslationContext::tr("Width [pixels]"), Qt::AlignLeft);
|
|
|
|
formatter.writeTableHeaderColumn("height", PDFToolTranslationContext::tr("Height [pixels]"), Qt::AlignLeft);
|
|
|
|
formatter.writeTableHeaderColumn("size", PDFToolTranslationContext::tr("Size [bytes]"), Qt::AlignLeft);
|
|
|
|
formatter.writeTableHeaderColumn("stored-to", PDFToolTranslationContext::tr("Stored to"), Qt::AlignLeft);
|
|
|
|
formatter.endTableHeaderRow();
|
|
|
|
|
|
|
|
QLocale locale;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < m_images.size(); ++i)
|
|
|
|
{
|
|
|
|
Image& image = m_images[i];
|
|
|
|
image.fileName = options.imageExportSettings.getOutputFileName(pdf::PDFInteger(i), options.imageWriterSettings.getCurrentFormat());
|
|
|
|
|
|
|
|
formatter.beginTableRow("image", int(i));
|
|
|
|
|
|
|
|
formatter.writeTableColumn("item-no", locale.toString(i + 1), Qt::AlignRight);
|
|
|
|
formatter.writeTableColumn("page-no", locale.toString(image.pageIndex + 1), Qt::AlignRight);
|
|
|
|
formatter.writeTableColumn("width", locale.toString(image.image.width()), Qt::AlignRight);
|
|
|
|
formatter.writeTableColumn("height", locale.toString(image.image.height()), Qt::AlignRight);
|
|
|
|
formatter.writeTableColumn("size", locale.toString(image.image.byteCount()), Qt::AlignRight);
|
|
|
|
formatter.writeTableColumn("stored-to", image.fileName);
|
|
|
|
|
|
|
|
formatter.endTableRow();
|
|
|
|
}
|
|
|
|
|
|
|
|
formatter.endTable();
|
|
|
|
|
|
|
|
formatter.endDocument();
|
|
|
|
PDFConsole::writeText(formatter.getString(), options.outputCodec);
|
|
|
|
|
|
|
|
// Store images to the disk file
|
|
|
|
auto saveImage = [this, &options](size_t index)
|
|
|
|
{
|
|
|
|
Image& image = m_images[index];
|
|
|
|
|
|
|
|
QImageWriter imageWriter(image.fileName, options.imageWriterSettings.getCurrentFormat());
|
|
|
|
imageWriter.setSubType(options.imageWriterSettings.getCurrentSubtype());
|
|
|
|
imageWriter.setCompression(options.imageWriterSettings.getCompression());
|
|
|
|
imageWriter.setQuality(options.imageWriterSettings.getQuality());
|
|
|
|
imageWriter.setGamma(options.imageWriterSettings.getGamma());
|
|
|
|
imageWriter.setOptimizedWrite(options.imageWriterSettings.hasOptimizedWrite());
|
|
|
|
imageWriter.setProgressiveScanWrite(options.imageWriterSettings.hasProgressiveScanWrite());
|
|
|
|
|
|
|
|
if (!imageWriter.write(image.image))
|
|
|
|
{
|
|
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot write page image to file '%1', because: %2.").arg(image.fileName).arg(imageWriter.errorString()), options.outputCodec);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
auto imageRange = pdf::PDFIntegerRange<size_t>(0, m_images.size());
|
|
|
|
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, imageRange.begin(), imageRange.end(), saveImage);
|
|
|
|
|
2020-10-29 19:17:24 +01:00
|
|
|
return ExitSuccess;
|
|
|
|
}
|
|
|
|
|
|
|
|
PDFToolAbstractApplication::Options PDFToolFetchImages::getOptionsFlags() const
|
|
|
|
{
|
|
|
|
return ConsoleFormat | OpenDocument | PageSelector | ImageWriterSettings | ImageExportSettingsFiles | ColorManagementSystem;
|
|
|
|
}
|
|
|
|
|
2020-10-30 18:10:01 +01:00
|
|
|
void PDFToolFetchImages::onImageExtracted(pdf::PDFInteger pageIndex, pdf::PDFInteger order, const QImage& image)
|
2020-10-29 19:17:24 +01:00
|
|
|
{
|
2020-10-30 18:10:01 +01:00
|
|
|
QCryptographicHash hasher(QCryptographicHash::Sha512);
|
|
|
|
hasher.addData(reinterpret_cast<const char*>(image.bits()), image.byteCount());
|
|
|
|
QByteArray hash = hasher.result();
|
2020-10-29 19:17:24 +01:00
|
|
|
|
2020-10-30 18:10:01 +01:00
|
|
|
QMutexLocker lock(&m_mutex);
|
|
|
|
auto it = std::find_if(m_images.begin(), m_images.end(), [&hash](const Image& image) { return image.hash == hash; });
|
|
|
|
if (it == m_images.cend())
|
|
|
|
{
|
|
|
|
Image imageStructure;
|
|
|
|
imageStructure.hash = hash;
|
|
|
|
imageStructure.pageIndex = pageIndex;
|
|
|
|
imageStructure.order = order;
|
|
|
|
imageStructure.image = image;
|
|
|
|
m_images.emplace_back(qMove(imageStructure));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Image& imageStructure = *it;
|
|
|
|
if (imageStructure.pageIndex > pageIndex)
|
|
|
|
{
|
|
|
|
imageStructure.pageIndex = pageIndex;
|
|
|
|
imageStructure.order = order;
|
|
|
|
}
|
|
|
|
}
|
2020-10-29 19:17:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace pdftool
|