mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-04-10 08:31:09 +02:00
Image extraction tool (basics)
This commit is contained in:
parent
b6d29f0b03
commit
b5b96d3585
@ -71,7 +71,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// Process the contents of the page.
|
/// Process the contents of the page.
|
||||||
class PDFPageContentProcessor : public PDFRenderErrorReporter
|
class PDFFORQTLIBSHARED_EXPORT PDFPageContentProcessor : public PDFRenderErrorReporter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit PDFPageContentProcessor(const PDFPage* page,
|
explicit PDFPageContentProcessor(const PDFPage* page,
|
||||||
|
@ -45,6 +45,7 @@ SOURCES += \
|
|||||||
pdftoolattachments.cpp \
|
pdftoolattachments.cpp \
|
||||||
pdftoolaudiobook.cpp \
|
pdftoolaudiobook.cpp \
|
||||||
pdftoolcolorprofiles.cpp \
|
pdftoolcolorprofiles.cpp \
|
||||||
|
pdftoolfetchimages.cpp \
|
||||||
pdftoolfetchtext.cpp \
|
pdftoolfetchtext.cpp \
|
||||||
pdftoolinfo.cpp \
|
pdftoolinfo.cpp \
|
||||||
pdftoolinfofonts.cpp \
|
pdftoolinfofonts.cpp \
|
||||||
@ -72,6 +73,7 @@ HEADERS += \
|
|||||||
pdftoolattachments.h \
|
pdftoolattachments.h \
|
||||||
pdftoolaudiobook.h \
|
pdftoolaudiobook.h \
|
||||||
pdftoolcolorprofiles.h \
|
pdftoolcolorprofiles.h \
|
||||||
|
pdftoolfetchimages.h \
|
||||||
pdftoolfetchtext.h \
|
pdftoolfetchtext.h \
|
||||||
pdftoolinfo.h \
|
pdftoolinfo.h \
|
||||||
pdftoolinfofonts.h \
|
pdftoolinfofonts.h \
|
||||||
|
191
PdfTool/pdftoolfetchimages.cpp
Normal file
191
PdfTool/pdftoolfetchimages.cpp
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
// Copyright (C) 2020 Jakub Melka
|
||||||
|
//
|
||||||
|
// This file is part of PdfForQt.
|
||||||
|
//
|
||||||
|
// PdfForQt is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// PdfForQt is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "pdftoolfetchimages.h"
|
||||||
|
#include "pdfpagecontentprocessor.h"
|
||||||
|
#include "pdfconstants.h"
|
||||||
|
#include "pdfexecutionpolicy.h"
|
||||||
|
|
||||||
|
namespace pdftool
|
||||||
|
{
|
||||||
|
|
||||||
|
static PDFToolFetchImages s_fetchImagesApplication;
|
||||||
|
|
||||||
|
class PDFImageContentExtractorProcessor : public pdf::PDFPageContentProcessor
|
||||||
|
{
|
||||||
|
using BaseClass = PDFPageContentProcessor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit PDFImageContentExtractorProcessor(const pdf::PDFPage* page,
|
||||||
|
const pdf::PDFDocument* document,
|
||||||
|
const pdf::PDFFontCache* fontCache,
|
||||||
|
const pdf::PDFCMS* cms,
|
||||||
|
const pdf::PDFOptionalContentActivity* optionalContentActivity,
|
||||||
|
QMatrix pagePointToDevicePointMatrix,
|
||||||
|
const pdf::PDFMeshQualitySettings& meshQualitySettings,
|
||||||
|
pdf::PDFInteger pageIndex,
|
||||||
|
PDFToolFetchImages* tool) :
|
||||||
|
BaseClass(page, document, fontCache, cms, optionalContentActivity, pagePointToDevicePointMatrix, meshQualitySettings),
|
||||||
|
m_pageIndex(pageIndex),
|
||||||
|
m_tool(tool)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual bool isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd) override;
|
||||||
|
virtual bool isContentKindSuppressed(ContentKind kind) const override;
|
||||||
|
virtual void performImagePainting(const QImage& image) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
pdf::PDFInteger m_pageIndex;
|
||||||
|
PDFToolFetchImages* m_tool;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool PDFImageContentExtractorProcessor::isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd)
|
||||||
|
{
|
||||||
|
Q_UNUSED(ocgOrOcmd);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PDFImageContentExtractorProcessor::isContentKindSuppressed(ContentKind kind) const
|
||||||
|
{
|
||||||
|
switch (kind)
|
||||||
|
{
|
||||||
|
case ContentKind::Shapes:
|
||||||
|
case ContentKind::Text:
|
||||||
|
case ContentKind::Shading:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case ContentKind::Tiling:
|
||||||
|
case ContentKind::Images:
|
||||||
|
return false; // Tiling can have images
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
Q_ASSERT(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PDFImageContentExtractorProcessor::performImagePainting(const QImage& image)
|
||||||
|
{
|
||||||
|
m_tool->onImageExtracted(m_pageIndex, image);
|
||||||
|
}
|
||||||
|
|
||||||
|
QString PDFToolFetchImages::getStandardString(PDFToolAbstractApplication::StandardString standardString) const
|
||||||
|
{
|
||||||
|
switch (standardString)
|
||||||
|
{
|
||||||
|
case Command:
|
||||||
|
return "fetch-images";
|
||||||
|
|
||||||
|
case Name:
|
||||||
|
return PDFToolTranslationContext::tr("Fetch images");
|
||||||
|
|
||||||
|
case Description:
|
||||||
|
return PDFToolTranslationContext::tr("Fetch image content from document.");
|
||||||
|
|
||||||
|
default:
|
||||||
|
Q_ASSERT(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
int PDFToolFetchImages::execute(const PDFToolOptions& options)
|
||||||
|
{
|
||||||
|
pdf::PDFDocument document;
|
||||||
|
QByteArray sourceData;
|
||||||
|
if (!readDocument(options, document, &sourceData))
|
||||||
|
{
|
||||||
|
return ErrorDocumentReading;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!document.getStorage().getSecurityHandler()->isAllowed(pdf::PDFSecurityHandler::Permission::CopyContent))
|
||||||
|
{
|
||||||
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Document doesn't allow to copy content."), options.outputCodec);
|
||||||
|
return ErrorPermissions;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString parseError;
|
||||||
|
std::vector<pdf::PDFInteger> pageIndices = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
||||||
|
|
||||||
|
if (!parseError.isEmpty())
|
||||||
|
{
|
||||||
|
PDFConsole::writeError(parseError, options.outputCodec);
|
||||||
|
return ErrorInvalidArguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString errorMessage;
|
||||||
|
Options optionFlags = getOptionsFlags();
|
||||||
|
if (!options.imageExportSettings.validate(&errorMessage, false, optionFlags.testFlag(ImageExportSettingsFiles), optionFlags.testFlag(ImageExportSettingsResolution)))
|
||||||
|
{
|
||||||
|
PDFConsole::writeError(errorMessage, options.outputCodec);
|
||||||
|
return ErrorInvalidArguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are ready to render the document
|
||||||
|
pdf::PDFOptionalContentActivity optionalContentActivity(&document, pdf::OCUsage::Export, nullptr);
|
||||||
|
pdf::PDFCMSManager cmsManager(nullptr);
|
||||||
|
cmsManager.setSettings(options.cmsSettings);
|
||||||
|
pdf::PDFCMSPointer cms = cmsManager.getCurrentCMS();
|
||||||
|
pdf::PDFMeshQualitySettings meshQualitySettings;
|
||||||
|
pdf::PDFFontCache fontCache(pdf::DEFAULT_FONT_CACHE_LIMIT, pdf::DEFAULT_REALIZED_FONT_CACHE_LIMIT);
|
||||||
|
pdf::PDFModifiedDocument md(&document, &optionalContentActivity);
|
||||||
|
fontCache.setDocument(md);
|
||||||
|
fontCache.setCacheShrinkEnabled(nullptr, false);
|
||||||
|
|
||||||
|
auto processPageContents = [&, this](pdf::PDFInteger pageIndex)
|
||||||
|
{
|
||||||
|
const pdf::PDFCatalog* catalog = document.getCatalog();
|
||||||
|
if (!catalog->getPage(pageIndex))
|
||||||
|
{
|
||||||
|
// Invalid page index
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pdf::PDFPage* page = catalog->getPage(pageIndex);
|
||||||
|
Q_ASSERT(page);
|
||||||
|
|
||||||
|
PDFImageContentExtractorProcessor processor(page, &document, &fontCache, cms.data(), &optionalContentActivity,
|
||||||
|
QMatrix(), meshQualitySettings, pageIndex, this);
|
||||||
|
processor.processContents();
|
||||||
|
};
|
||||||
|
|
||||||
|
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pageIndices.begin(), pageIndices.end(), processPageContents);
|
||||||
|
fontCache.setCacheShrinkEnabled(nullptr, true);
|
||||||
|
|
||||||
|
return ExitSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDFToolAbstractApplication::Options PDFToolFetchImages::getOptionsFlags() const
|
||||||
|
{
|
||||||
|
return ConsoleFormat | OpenDocument | PageSelector | ImageWriterSettings | ImageExportSettingsFiles | ColorManagementSystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PDFToolFetchImages::onImageExtracted(pdf::PDFInteger pageIndex, const QImage& image)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace pdftool
|
38
PdfTool/pdftoolfetchimages.h
Normal file
38
PdfTool/pdftoolfetchimages.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
// Copyright (C) 2020 Jakub Melka
|
||||||
|
//
|
||||||
|
// This file is part of PdfForQt.
|
||||||
|
//
|
||||||
|
// PdfForQt is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// PdfForQt is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef PDFTOOLFETCHIMAGES_H
|
||||||
|
#define PDFTOOLFETCHIMAGES_H
|
||||||
|
|
||||||
|
#include "pdftoolabstractapplication.h"
|
||||||
|
|
||||||
|
namespace pdftool
|
||||||
|
{
|
||||||
|
|
||||||
|
class PDFToolFetchImages : public PDFToolAbstractApplication
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual QString getStandardString(StandardString standardString) const override;
|
||||||
|
virtual int execute(const PDFToolOptions& options) override;
|
||||||
|
virtual Options getOptionsFlags() const override;
|
||||||
|
|
||||||
|
void onImageExtracted(pdf::PDFInteger pageIndex, const QImage& image);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pdftool
|
||||||
|
|
||||||
|
#endif // PDFTOOLFETCHIMAGES_H
|
Loading…
x
Reference in New Issue
Block a user