mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2024-12-27 08:42:48 +01:00
Image extraction tool (basics)
This commit is contained in:
parent
b6d29f0b03
commit
b5b96d3585
@ -71,7 +71,7 @@ private:
|
||||
};
|
||||
|
||||
/// Process the contents of the page.
|
||||
class PDFPageContentProcessor : public PDFRenderErrorReporter
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFPageContentProcessor : public PDFRenderErrorReporter
|
||||
{
|
||||
public:
|
||||
explicit PDFPageContentProcessor(const PDFPage* page,
|
||||
|
@ -45,6 +45,7 @@ SOURCES += \
|
||||
pdftoolattachments.cpp \
|
||||
pdftoolaudiobook.cpp \
|
||||
pdftoolcolorprofiles.cpp \
|
||||
pdftoolfetchimages.cpp \
|
||||
pdftoolfetchtext.cpp \
|
||||
pdftoolinfo.cpp \
|
||||
pdftoolinfofonts.cpp \
|
||||
@ -72,6 +73,7 @@ HEADERS += \
|
||||
pdftoolattachments.h \
|
||||
pdftoolaudiobook.h \
|
||||
pdftoolcolorprofiles.h \
|
||||
pdftoolfetchimages.h \
|
||||
pdftoolfetchtext.h \
|
||||
pdftoolinfo.h \
|
||||
pdftoolinfofonts.h \
|
||||
|
191
PdfTool/pdftoolfetchimages.cpp
Normal file
191
PdfTool/pdftoolfetchimages.cpp
Normal file
@ -0,0 +1,191 @@
|
||||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdftoolfetchimages.h"
|
||||
#include "pdfpagecontentprocessor.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfexecutionpolicy.h"
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
static PDFToolFetchImages s_fetchImagesApplication;
|
||||
|
||||
class PDFImageContentExtractorProcessor : public pdf::PDFPageContentProcessor
|
||||
{
|
||||
using BaseClass = PDFPageContentProcessor;
|
||||
|
||||
public:
|
||||
explicit PDFImageContentExtractorProcessor(const pdf::PDFPage* page,
|
||||
const pdf::PDFDocument* document,
|
||||
const pdf::PDFFontCache* fontCache,
|
||||
const pdf::PDFCMS* cms,
|
||||
const pdf::PDFOptionalContentActivity* optionalContentActivity,
|
||||
QMatrix pagePointToDevicePointMatrix,
|
||||
const pdf::PDFMeshQualitySettings& meshQualitySettings,
|
||||
pdf::PDFInteger pageIndex,
|
||||
PDFToolFetchImages* tool) :
|
||||
BaseClass(page, document, fontCache, cms, optionalContentActivity, pagePointToDevicePointMatrix, meshQualitySettings),
|
||||
m_pageIndex(pageIndex),
|
||||
m_tool(tool)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual bool isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd) override;
|
||||
virtual bool isContentKindSuppressed(ContentKind kind) const override;
|
||||
virtual void performImagePainting(const QImage& image) override;
|
||||
|
||||
private:
|
||||
pdf::PDFInteger m_pageIndex;
|
||||
PDFToolFetchImages* m_tool;
|
||||
};
|
||||
|
||||
bool PDFImageContentExtractorProcessor::isContentSuppressedByOC(pdf::PDFObjectReference ocgOrOcmd)
|
||||
{
|
||||
Q_UNUSED(ocgOrOcmd);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PDFImageContentExtractorProcessor::isContentKindSuppressed(ContentKind kind) const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ContentKind::Shapes:
|
||||
case ContentKind::Text:
|
||||
case ContentKind::Shading:
|
||||
return true;
|
||||
|
||||
case ContentKind::Tiling:
|
||||
case ContentKind::Images:
|
||||
return false; // Tiling can have images
|
||||
|
||||
default:
|
||||
{
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void PDFImageContentExtractorProcessor::performImagePainting(const QImage& image)
|
||||
{
|
||||
m_tool->onImageExtracted(m_pageIndex, image);
|
||||
}
|
||||
|
||||
QString PDFToolFetchImages::getStandardString(PDFToolAbstractApplication::StandardString standardString) const
|
||||
{
|
||||
switch (standardString)
|
||||
{
|
||||
case Command:
|
||||
return "fetch-images";
|
||||
|
||||
case Name:
|
||||
return PDFToolTranslationContext::tr("Fetch images");
|
||||
|
||||
case Description:
|
||||
return PDFToolTranslationContext::tr("Fetch image content from document.");
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return QString();
|
||||
}
|
||||
|
||||
int PDFToolFetchImages::execute(const PDFToolOptions& options)
|
||||
{
|
||||
pdf::PDFDocument document;
|
||||
QByteArray sourceData;
|
||||
if (!readDocument(options, document, &sourceData))
|
||||
{
|
||||
return ErrorDocumentReading;
|
||||
}
|
||||
|
||||
if (!document.getStorage().getSecurityHandler()->isAllowed(pdf::PDFSecurityHandler::Permission::CopyContent))
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("Document doesn't allow to copy content."), options.outputCodec);
|
||||
return ErrorPermissions;
|
||||
}
|
||||
|
||||
QString parseError;
|
||||
std::vector<pdf::PDFInteger> pageIndices = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
||||
|
||||
if (!parseError.isEmpty())
|
||||
{
|
||||
PDFConsole::writeError(parseError, options.outputCodec);
|
||||
return ErrorInvalidArguments;
|
||||
}
|
||||
|
||||
QString errorMessage;
|
||||
Options optionFlags = getOptionsFlags();
|
||||
if (!options.imageExportSettings.validate(&errorMessage, false, optionFlags.testFlag(ImageExportSettingsFiles), optionFlags.testFlag(ImageExportSettingsResolution)))
|
||||
{
|
||||
PDFConsole::writeError(errorMessage, options.outputCodec);
|
||||
return ErrorInvalidArguments;
|
||||
}
|
||||
|
||||
// We are ready to render the document
|
||||
pdf::PDFOptionalContentActivity optionalContentActivity(&document, pdf::OCUsage::Export, nullptr);
|
||||
pdf::PDFCMSManager cmsManager(nullptr);
|
||||
cmsManager.setSettings(options.cmsSettings);
|
||||
pdf::PDFCMSPointer cms = cmsManager.getCurrentCMS();
|
||||
pdf::PDFMeshQualitySettings meshQualitySettings;
|
||||
pdf::PDFFontCache fontCache(pdf::DEFAULT_FONT_CACHE_LIMIT, pdf::DEFAULT_REALIZED_FONT_CACHE_LIMIT);
|
||||
pdf::PDFModifiedDocument md(&document, &optionalContentActivity);
|
||||
fontCache.setDocument(md);
|
||||
fontCache.setCacheShrinkEnabled(nullptr, false);
|
||||
|
||||
auto processPageContents = [&, this](pdf::PDFInteger pageIndex)
|
||||
{
|
||||
const pdf::PDFCatalog* catalog = document.getCatalog();
|
||||
if (!catalog->getPage(pageIndex))
|
||||
{
|
||||
// Invalid page index
|
||||
return;
|
||||
}
|
||||
|
||||
const pdf::PDFPage* page = catalog->getPage(pageIndex);
|
||||
Q_ASSERT(page);
|
||||
|
||||
PDFImageContentExtractorProcessor processor(page, &document, &fontCache, cms.data(), &optionalContentActivity,
|
||||
QMatrix(), meshQualitySettings, pageIndex, this);
|
||||
processor.processContents();
|
||||
};
|
||||
|
||||
pdf::PDFExecutionPolicy::execute(pdf::PDFExecutionPolicy::Scope::Page, pageIndices.begin(), pageIndices.end(), processPageContents);
|
||||
fontCache.setCacheShrinkEnabled(nullptr, true);
|
||||
|
||||
return ExitSuccess;
|
||||
}
|
||||
|
||||
PDFToolAbstractApplication::Options PDFToolFetchImages::getOptionsFlags() const
|
||||
{
|
||||
return ConsoleFormat | OpenDocument | PageSelector | ImageWriterSettings | ImageExportSettingsFiles | ColorManagementSystem;
|
||||
}
|
||||
|
||||
void PDFToolFetchImages::onImageExtracted(pdf::PDFInteger pageIndex, const QImage& image)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
} // namespace pdftool
|
38
PdfTool/pdftoolfetchimages.h
Normal file
38
PdfTool/pdftoolfetchimages.h
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright (C) 2020 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFTOOLFETCHIMAGES_H
|
||||
#define PDFTOOLFETCHIMAGES_H
|
||||
|
||||
#include "pdftoolabstractapplication.h"
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
class PDFToolFetchImages : public PDFToolAbstractApplication
|
||||
{
|
||||
public:
|
||||
virtual QString getStandardString(StandardString standardString) const override;
|
||||
virtual int execute(const PDFToolOptions& options) override;
|
||||
virtual Options getOptionsFlags() const override;
|
||||
|
||||
void onImageExtracted(pdf::PDFInteger pageIndex, const QImage& image);
|
||||
};
|
||||
|
||||
} // namespace pdftool
|
||||
|
||||
#endif // PDFTOOLFETCHIMAGES_H
|
Loading…
Reference in New Issue
Block a user