Optimization (basic structures)

This commit is contained in:
Jakub Melka 2020-05-31 18:31:59 +02:00
parent 23a1275b9f
commit 680252b634
16 changed files with 960 additions and 183 deletions

View File

@ -51,6 +51,8 @@ SOURCES += \
sources/pdfjbig2decoder.cpp \
sources/pdfmultimedia.cpp \
sources/pdfobject.cpp \
sources/pdfobjectutils.cpp \
sources/pdfoptimizer.cpp \
sources/pdfoptionalcontent.cpp \
sources/pdfoutline.cpp \
sources/pdfpagetransition.cpp \
@ -102,6 +104,8 @@ HEADERS += \
sources/pdfmultimedia.h \
sources/pdfnametreeloader.h \
sources/pdfobject.h \
sources/pdfobjectutils.h \
sources/pdfoptimizer.h \
sources/pdfoptionalcontent.h \
sources/pdfoutline.h \
sources/pdfpagetransition.h \

View File

@ -461,6 +461,7 @@ public:
private:
friend class PDFDocumentReader;
friend class PDFDocumentBuilder;
friend class PDFOptimizer;
explicit PDFDocument(PDFObjectStorage&& storage, PDFVersion version) :
m_pdfObjectStorage(std::move(storage))

View File

@ -19,7 +19,7 @@
#include "pdfencoding.h"
#include "pdfconstants.h"
#include "pdfdocumentreader.h"
#include "pdfvisitor.h"
#include "pdfobjectutils.h"
#include <QBuffer>
#include <QPainter>
@ -28,155 +28,6 @@
namespace pdf
{
class PDFCollectReferencesVisitor : public PDFAbstractVisitor
{
public:
explicit PDFCollectReferencesVisitor(std::set<PDFObjectReference>& references) :
m_references(references)
{
}
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
private:
std::set<PDFObjectReference>& m_references;
};
void PDFCollectReferencesVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
}
void PDFCollectReferencesVisitor::visitDictionary(const PDFDictionary* dictionary)
{
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
}
}
void PDFCollectReferencesVisitor::visitStream(const PDFStream* stream)
{
visitDictionary(stream->getDictionary());
}
void PDFCollectReferencesVisitor::visitReference(const PDFObjectReference reference)
{
m_references.insert(reference);
}
class PDFReplaceReferencesVisitor : public PDFAbstractVisitor
{
public:
explicit PDFReplaceReferencesVisitor(const std::map<PDFObjectReference, PDFObjectReference>& replacements) :
m_replacements(replacements)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
private:
const std::map<PDFObjectReference, PDFObjectReference>& m_replacements;
std::vector<PDFObject> m_objectStack;
};
void PDFReplaceReferencesVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFReplaceReferencesVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFReplaceReferencesVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFReplaceReferencesVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFReplaceReferencesVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFReplaceReferencesVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFReplaceReferencesVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFReplaceReferencesVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFReplaceReferencesVisitor::visitStream(const PDFStream* stream)
{
// Replace references in the dictionary
visitDictionary(stream->getDictionary());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(PDFDictionary(*dictionaryObject.getDictionary()), QByteArray(*stream->getContent()))));
}
void PDFReplaceReferencesVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(m_replacements.at(reference)));
}
PDFObject PDFReplaceReferencesVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
void PDFObjectFactory::beginArray()
{
m_items.emplace_back(ItemType::Array, PDFArray());
@ -1015,32 +866,7 @@ std::vector<PDFObject> PDFDocumentBuilder::copyFrom(const std::vector<PDFObject>
{
// 1) Collect all references, which we must copy. If object is referenced, then
// we must also collect references of referenced object.
std::set<PDFObjectReference> references;
{
PDFCollectReferencesVisitor collectReferencesVisitor(references);
for (const PDFObject& object : objects)
{
object.accept(&collectReferencesVisitor);
}
}
// Iterative algorihm, which adds additional references from referenced objects.
// If new reference is added, then we must also check, that all referenced objects
// from this object are added.
std::set<PDFObjectReference> workSet = references;
while (!workSet.empty())
{
std::set<PDFObjectReference> addedReferences;
PDFCollectReferencesVisitor collectReferencesVisitor(addedReferences);
for (const PDFObjectReference& objectReference : workSet)
{
storage.getObject(objectReference).accept(&collectReferencesVisitor);
}
workSet.clear();
std::set_difference(addedReferences.cbegin(), addedReferences.cend(), references.cbegin(), references.cend(), std::inserter(workSet, workSet.cend()));
references.merge(addedReferences);
}
std::set<PDFObjectReference> references = PDFObjectUtils::getReferences(objects, storage);
// 2) Make room for new objects, together with mapping
std::map<PDFObjectReference, PDFObjectReference> referenceMapping;
@ -1053,9 +879,7 @@ std::vector<PDFObject> PDFDocumentBuilder::copyFrom(const std::vector<PDFObject>
for (const PDFObjectReference& sourceReference : references)
{
const PDFObjectReference targetReference = referenceMapping.at(sourceReference);
PDFReplaceReferencesVisitor replaceReferencesVisitor(referenceMapping);
storage.getObject(sourceReference).accept(&replaceReferencesVisitor);
m_storage.setObject(targetReference, replaceReferencesVisitor.getObject());
m_storage.setObject(targetReference, PDFObjectUtils::replaceReferences(storage.getObject(sourceReference), referenceMapping));
}
std::vector<PDFObject> result;
@ -1069,16 +893,15 @@ std::vector<PDFObject> PDFDocumentBuilder::copyFrom(const std::vector<PDFObject>
}
else
{
PDFReplaceReferencesVisitor replaceReferencesVisitor(referenceMapping);
object.accept(&replaceReferencesVisitor);
PDFObject replacedObject = PDFObjectUtils::replaceReferences(object, referenceMapping);
if (createReferences)
{
result.push_back(PDFObject::createReference(addObject(replaceReferencesVisitor.getObject())));
result.push_back(PDFObject::createReference(addObject(qMove(replacedObject))));
}
else
{
result.push_back(replaceReferencesVisitor.getObject());
result.emplace_back(qMove(replacedObject));
}
}
}

View File

@ -297,4 +297,157 @@ void PDFDocumentWriter::writeObjectFooter(QIODevice* device)
writeCRLF(device);
}
class PDFSizeCounterIODevice : public QIODevice
{
public:
explicit PDFSizeCounterIODevice(QObject* parent) :
QIODevice(parent)
{
}
virtual bool isSequential() const override;
virtual bool open(OpenMode mode) override;
virtual void close() override;
virtual qint64 pos() const override;
virtual qint64 size() const override;
virtual bool seek(qint64 pos) override;
virtual bool atEnd() const override;
virtual bool reset() override;
virtual qint64 bytesAvailable() const override;
virtual qint64 bytesToWrite() const override;
virtual bool canReadLine() const override;
virtual bool waitForReadyRead(int msecs) override;
virtual bool waitForBytesWritten(int msecs) override;
protected:
virtual qint64 readData(char* data, qint64 maxlen) override;
virtual qint64 readLineData(char* data, qint64 maxlen) override;
virtual qint64 writeData(const char* data, qint64 len) override;
private:
OpenMode m_openMode = NotOpen;
qint64 m_fileSize = 0;
};
bool PDFSizeCounterIODevice::isSequential() const
{
return true;
}
bool PDFSizeCounterIODevice::open(OpenMode mode)
{
if (m_openMode == NotOpen)
{
setOpenMode(mode);
return true;
}
else
{
return false;
}
}
void PDFSizeCounterIODevice::close()
{
setOpenMode(NotOpen);
}
qint64 PDFSizeCounterIODevice::pos() const
{
return m_fileSize;
}
qint64 PDFSizeCounterIODevice::size() const
{
return m_fileSize;
}
bool PDFSizeCounterIODevice::seek(qint64 pos)
{
Q_UNUSED(pos);
return false;
}
bool PDFSizeCounterIODevice::atEnd() const
{
return true;
}
bool PDFSizeCounterIODevice::reset()
{
return false;
}
qint64 PDFSizeCounterIODevice::bytesAvailable() const
{
return 0;
}
qint64 PDFSizeCounterIODevice::bytesToWrite() const
{
return 0;
}
bool PDFSizeCounterIODevice::canReadLine() const
{
return false;
}
bool PDFSizeCounterIODevice::waitForReadyRead(int msecs)
{
Q_UNUSED(msecs);
return false;
}
bool PDFSizeCounterIODevice::waitForBytesWritten(int msecs)
{
Q_UNUSED(msecs);
return false;
}
qint64 PDFSizeCounterIODevice::readData(char* data, qint64 maxlen)
{
Q_UNUSED(data);
Q_UNUSED(maxlen);
return 0;
}
qint64 PDFSizeCounterIODevice::readLineData(char* data, qint64 maxlen)
{
Q_UNUSED(data);
Q_UNUSED(maxlen);
return 0;
}
qint64 PDFSizeCounterIODevice::writeData(const char* data, qint64 len)
{
Q_UNUSED(data);
m_fileSize += len;
return len;
}
qint64 PDFDocumentWriter::getDocumentFileSize(const PDFDocument* document)
{
PDFSizeCounterIODevice device(nullptr);
PDFDocumentWriter writer(nullptr);
device.open(QIODevice::WriteOnly);
if (writer.write(&device, document))
{
device.close();
return device.pos();
}
device.close();
return -1;
}
} // namespace pdf

View File

@ -43,6 +43,13 @@ public:
PDFOperationResult write(const QString& fileName, const PDFDocument* document);
PDFOperationResult write(QIODevice* device, const PDFDocument* document);
/// Calculates document file size, as if it is written to the disk.
/// No file is accessed by this function; document is written
/// to fake stream, which counts operations. If error occurs, and
/// size can't be determined, then -1 is returned.
/// \param document Document
static qint64 getDocumentFileSize(const PDFDocument* document);
private:
void writeCRLF(QIODevice* device);
void writeObjectHeader(QIODevice* device, PDFObjectReference reference);

View File

@ -0,0 +1,210 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfobjectutils.h"
#include "pdfvisitor.h"
namespace pdf
{
class PDFCollectReferencesVisitor : public PDFAbstractVisitor
{
public:
explicit PDFCollectReferencesVisitor(std::set<PDFObjectReference>& references) :
m_references(references)
{
}
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
private:
std::set<PDFObjectReference>& m_references;
};
void PDFCollectReferencesVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
}
void PDFCollectReferencesVisitor::visitDictionary(const PDFDictionary* dictionary)
{
acceptDictionary(dictionary);
}
void PDFCollectReferencesVisitor::visitStream(const PDFStream* stream)
{
acceptStream(stream);
}
void PDFCollectReferencesVisitor::visitReference(const PDFObjectReference reference)
{
m_references.insert(reference);
}
class PDFReplaceReferencesVisitor : public PDFAbstractVisitor
{
public:
explicit PDFReplaceReferencesVisitor(const std::map<PDFObjectReference, PDFObjectReference>& replacements) :
m_replacements(replacements)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getObject();
private:
const std::map<PDFObjectReference, PDFObjectReference>& m_replacements;
std::vector<PDFObject> m_objectStack;
};
void PDFReplaceReferencesVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFReplaceReferencesVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFReplaceReferencesVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFReplaceReferencesVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFReplaceReferencesVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFReplaceReferencesVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFReplaceReferencesVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFReplaceReferencesVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
dictionary->getValue(i).accept(this);
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFReplaceReferencesVisitor::visitStream(const PDFStream* stream)
{
// Replace references in the dictionary
visitDictionary(stream->getDictionary());
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(PDFDictionary(*dictionaryObject.getDictionary()), QByteArray(*stream->getContent()))));
}
void PDFReplaceReferencesVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(m_replacements.at(reference)));
}
PDFObject PDFReplaceReferencesVisitor::getObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
std::set<PDFObjectReference> PDFObjectUtils::getReferences(const std::vector<PDFObject>& objects, const PDFObjectStorage& storage)
{
std::set<PDFObjectReference> references;
{
PDFCollectReferencesVisitor collectReferencesVisitor(references);
for (const PDFObject& object : objects)
{
object.accept(&collectReferencesVisitor);
}
}
// Iterative algorihm, which adds additional references from referenced objects.
// If new reference is added, then we must also check, that all referenced objects
// from this object are added.
std::set<PDFObjectReference> workSet = references;
while (!workSet.empty())
{
std::set<PDFObjectReference> addedReferences;
PDFCollectReferencesVisitor collectReferencesVisitor(addedReferences);
for (const PDFObjectReference& objectReference : workSet)
{
storage.getObject(objectReference).accept(&collectReferencesVisitor);
}
workSet.clear();
std::set_difference(addedReferences.cbegin(), addedReferences.cend(), references.cbegin(), references.cend(), std::inserter(workSet, workSet.cend()));
references.merge(addedReferences);
}
return references;
}
PDFObject PDFObjectUtils::replaceReferences(const PDFObject& object, const std::map<PDFObjectReference, PDFObjectReference>& referenceMapping)
{
PDFReplaceReferencesVisitor replaceReferencesVisitor(referenceMapping);
object.accept(&replaceReferencesVisitor);
return replaceReferencesVisitor.getObject();
}
} // namespace pdf

View File

@ -0,0 +1,48 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFOBJECTUTILS_H
#define PDFOBJECTUTILS_H
#include "pdfobject.h"
#include <set>
namespace pdf
{
class PDFObjectStorage;
/// Utilities for manipulation with objects
class PDFObjectUtils
{
public:
/// Returns list of references referenced by \p objects. So, all references, which are present
/// in objects, appear in the result set, including objects, which are referenced by referenced
/// objects (so, transitive closure above reference graph is returned).
/// \param objects Objects
/// \param storage Storage
static std::set<PDFObjectReference> getReferences(const std::vector<PDFObject>& objects, const PDFObjectStorage& storage);
static PDFObject replaceReferences(const PDFObject& object, const std::map<PDFObjectReference, PDFObjectReference>& referenceMapping);
private:
PDFObjectUtils() = delete;
};
} // namespace pdf
#endif // PDFOBJECTUTILS_H

View File

@ -0,0 +1,125 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfoptimizer.h"
namespace pdf
{
PDFOptimizer::PDFOptimizer(OptimizationFlags flags, QObject* parent) :
QObject(parent),
m_flags(flags)
{
}
void PDFOptimizer::optimize()
{
// Jakub Melka: We divide optimization into stages, each
// stage can consist from multiple passes.
constexpr auto stages = { OptimizationFlags(DereferenceSimpleObjects),
OptimizationFlags(RemoveNullObjects),
OptimizationFlags(RemoveUnusedObjects | MergeIdenticalObjects),
OptimizationFlags(ShrinkObjectStorage),
OptimizationFlags(RecompressFlateStreams) };
int stage = 1;
emit optimizationStarted();
for (OptimizationFlags flags : stages)
{
emit optimizationProgress(tr("Stage %1").arg(stage++));
OptimizationFlags currentSteps = flags & m_flags;
int passIndex = 1;
bool pass = true;
while (pass)
{
emit optimizationProgress(tr("Pass %1").arg(passIndex++));
pass = false;
if (currentSteps.testFlag(DereferenceSimpleObjects))
{
pass = performDereferenceSimpleObjects() || pass;
}
if (currentSteps.testFlag(RemoveNullObjects))
{
pass = performRemoveNullObjects() || pass;
}
if (currentSteps.testFlag(RemoveUnusedObjects))
{
pass = performRemoveUnusedObjects() || pass;
}
if (currentSteps.testFlag(MergeIdenticalObjects))
{
pass = performMergeIdenticalObjects() || pass;
}
if (currentSteps.testFlag(ShrinkObjectStorage))
{
pass = performShrinkObjectStorage() || pass;
}
if (currentSteps.testFlag(RecompressFlateStreams))
{
pass = performRecompressFlateStreams() || pass;
}
}
}
emit optimizationFinished();
}
PDFOptimizer::OptimizationFlags PDFOptimizer::getFlags() const
{
return m_flags;
}
void PDFOptimizer::setFlags(OptimizationFlags flags)
{
m_flags = flags;
}
bool PDFOptimizer::performDereferenceSimpleObjects()
{
return false;
}
bool PDFOptimizer::performRemoveNullObjects()
{
return false;
}
bool PDFOptimizer::performRemoveUnusedObjects()
{
return false;
}
bool PDFOptimizer::performMergeIdenticalObjects()
{
return false;
}
bool PDFOptimizer::performShrinkObjectStorage()
{
return false;
}
bool PDFOptimizer::performRecompressFlateStreams()
{
return false;
}
} // namespace pdf

View File

@ -0,0 +1,97 @@
// Copyright (C) 2020 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFOPTIMIZER_H
#define PDFOPTIMIZER_H
#include "pdfdocument.h"
#include <QObject>
namespace pdf
{
/// Class for optimalizing documents. Can examine object structure and it's dependencies,
/// and remove unused objects, merge same objects, or even recompress some streams
/// to achieve better optimization ratio. Optimization is configurable, user can specify,
/// which optimization steps should be done and which not.
class PDFFORQTLIBSHARED_EXPORT PDFOptimizer : public QObject
{
Q_OBJECT
public:
enum OptimizationFlag
{
None = 0x0000, ///< No optimization is performed
DereferenceSimpleObjects = 0x0001, ///< If simple objects are referenced (such as integers, bools, etc.), then they are dereferenced
RemoveNullObjects = 0x0002, ///< Remove null objects from dictionary entries
RemoveUnusedObjects = 0x0004, ///< Remove not referenced objects
MergeIdenticalObjects = 0x0008, ///< Merge identical objects
ShrinkObjectStorage = 0x0010, ///< Shrink object storage, so unused objects are filled with used (and generation number increased)
RecompressFlateStreams = 0x0020, ///< Flate streams are recompressed with maximal compression
All = 0xFFFF, ///< All optimizations turned on
};
Q_DECLARE_FLAGS(OptimizationFlags, OptimizationFlag)
explicit PDFOptimizer(OptimizationFlags flags, QObject* parent);
/// Set document, which should be optimalized
/// \param document Document to optimalize
void setDocument(const PDFDocument* document) { setStorage(document->getStorage()); }
/// Set storage directly (storage must be valid and filled with objects)
/// \param storage Storage
void setStorage(const PDFObjectStorage& storage) { m_storage = storage; }
/// Perform document optimalization. During optimization process, various
/// signals are emitted to view progress.
void optimize();
/// Returns object storage used for optimization
const PDFObjectStorage& getStorage() const { return m_storage; }
/// Returns object storage by move semantics, old object storage is destroyed
PDFObjectStorage takeStorage() { return qMove(m_storage); }
/// Returns optimized document. Object storage is cleared after
/// this function call.
PDFDocument takeOptimizedDocument() { return PDFDocument(qMove(m_storage), PDFVersion(2, 0)); }
OptimizationFlags getFlags() const;
void setFlags(OptimizationFlags flags);
signals:
void optimizationStarted();
void optimizationProgress(QString progressText);
void optimizationFinished();
private:
bool performDereferenceSimpleObjects();
bool performRemoveNullObjects();
bool performRemoveUnusedObjects();
bool performMergeIdenticalObjects();
bool performShrinkObjectStorage();
bool performRecompressFlateStreams();
OptimizationFlags m_flags;
PDFObjectStorage m_storage;
};
} // namespace pdf
#endif // PDFOPTIMIZER_H

View File

@ -37,6 +37,7 @@ SOURCES += \
pdfaboutdialog.cpp \
pdfadvancedfindwidget.cpp \
pdfdocumentpropertiesdialog.cpp \
pdfoptimizedocumentdialog.cpp \
pdfrecentfilemanager.cpp \
pdfrendertoimagesdialog.cpp \
pdfsendmail.cpp \
@ -51,6 +52,7 @@ HEADERS += \
pdfaboutdialog.h \
pdfadvancedfindwidget.h \
pdfdocumentpropertiesdialog.h \
pdfoptimizedocumentdialog.h \
pdfrecentfilemanager.h \
pdfrendertoimagesdialog.h \
pdfsendmail.h \
@ -65,6 +67,7 @@ FORMS += \
pdfaboutdialog.ui \
pdfadvancedfindwidget.ui \
pdfdocumentpropertiesdialog.ui \
pdfoptimizedocumentdialog.ui \
pdfrendertoimagesdialog.ui \
pdfsidebarwidget.ui \
pdfviewermainwindow.ui \

View File

@ -0,0 +1,144 @@
#include "pdfoptimizedocumentdialog.h"
#include "ui_pdfoptimizedocumentdialog.h"
#include "pdfwidgetutils.h"
#include "pdfdocumentwriter.h"
#include <QCheckBox>
#include <QPushButton>
#include <QElapsedTimer>
#include <QtConcurrent/QtConcurrent>
namespace pdfviewer
{
PDFOptimizeDocumentDialog::PDFOptimizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent) :
QDialog(parent),
ui(new Ui::PDFOptimizeDocumentDialog),
m_document(document),
m_optimizer(pdf::PDFOptimizer::All, nullptr),
m_optimizeButton(nullptr),
m_optimizationInProgress(false),
m_wasOptimized(false)
{
ui->setupUi(this);
auto addCheckBox = [this](QString text, pdf::PDFOptimizer::OptimizationFlag flag)
{
QCheckBox* checkBox = new QCheckBox(text, this);
checkBox->setChecked(m_optimizer.getFlags().testFlag(flag));
connect(checkBox, &QCheckBox::clicked, this, [this, flag](bool checked) { m_optimizer.setFlags(m_optimizer.getFlags().setFlag(flag, checked)); });
ui->groupBoxLayout->addWidget(checkBox);
};
addCheckBox(tr("Embed (dereference) simple objects, such as int, bool, real"), pdf::PDFOptimizer::DereferenceSimpleObjects);
addCheckBox(tr("Remove null objects from dictionary entries"), pdf::PDFOptimizer::RemoveNullObjects);
addCheckBox(tr("Remove unused objects (objects unreachable from document root object)"), pdf::PDFOptimizer::RemoveUnusedObjects);
addCheckBox(tr("Merge identical objects"), pdf::PDFOptimizer::MergeIdenticalObjects);
addCheckBox(tr("Shrink object storage (squeeze free entries)"), pdf::PDFOptimizer::ShrinkObjectStorage);
addCheckBox(tr("Recompress flate streams by maximal compression"), pdf::PDFOptimizer::RecompressFlateStreams);
m_optimizeButton = ui->buttonBox->addButton(tr("Optimize"), QDialogButtonBox::ActionRole);
connect(m_optimizeButton, &QPushButton::clicked, this, &PDFOptimizeDocumentDialog::onOptimizeButtonClicked);
connect(&m_optimizer, &pdf::PDFOptimizer::optimizationStarted, this, &PDFOptimizeDocumentDialog::onOptimizationStarted);
connect(&m_optimizer, &pdf::PDFOptimizer::optimizationProgress, this, &PDFOptimizeDocumentDialog::onOptimizationProgress);
connect(&m_optimizer, &pdf::PDFOptimizer::optimizationFinished, this, &PDFOptimizeDocumentDialog::onOptimizationFinished);
connect(this, &PDFOptimizeDocumentDialog::displayOptimizationInfo, this, &PDFOptimizeDocumentDialog::onDisplayOptimizationInfo);
pdf::PDFWidgetUtils::scaleWidget(this, QSize(640, 380));
updateUi();
}
PDFOptimizeDocumentDialog::~PDFOptimizeDocumentDialog()
{
Q_ASSERT(!m_optimizationInProgress);
Q_ASSERT(!m_future.isRunning());
delete ui;
}
void PDFOptimizeDocumentDialog::optimize()
{
QElapsedTimer timer;
timer.start();
m_optimizer.setDocument(m_document);
m_optimizer.optimize();
m_optimizedDocument = m_optimizer.takeOptimizedDocument();
qreal msecsElapsed = timer.nsecsElapsed() / 1000000.0;
timer.invalidate();
m_optimizationInfo.msecsElapsed = msecsElapsed;
m_optimizationInfo.bytesBeforeOptimization = pdf::PDFDocumentWriter::getDocumentFileSize(m_document);
m_optimizationInfo.bytesAfterOptimization = pdf::PDFDocumentWriter::getDocumentFileSize(&m_optimizedDocument);
emit displayOptimizationInfo();
}
void PDFOptimizeDocumentDialog::onOptimizeButtonClicked()
{
Q_ASSERT(!m_optimizationInProgress);
Q_ASSERT(!m_future.isRunning());
m_optimizationInProgress = true;
m_future = QtConcurrent::run([this]() { optimize(); });
updateUi();
}
void PDFOptimizeDocumentDialog::onOptimizationStarted()
{
Q_ASSERT(m_optimizationInProgress);
ui->logTextEdit->setPlainText(tr("Optimization started!"));
}
void PDFOptimizeDocumentDialog::onOptimizationProgress(QString progressText)
{
Q_ASSERT(m_optimizationInProgress);
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText()).arg(progressText));
}
void PDFOptimizeDocumentDialog::onOptimizationFinished()
{
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText()).arg(tr("Optimization finished!")));
m_future.waitForFinished();
m_optimizationInProgress = false;
m_wasOptimized = true;
updateUi();
}
void PDFOptimizeDocumentDialog::onDisplayOptimizationInfo()
{
QStringList texts;
texts << tr("Optimized in %1 msecs").arg(m_optimizationInfo.msecsElapsed);
if (m_optimizationInfo.bytesBeforeOptimization != -1 &&
m_optimizationInfo.bytesAfterOptimization != -1)
{
texts << tr("Bytes before optimization: %1").arg(m_optimizationInfo.bytesBeforeOptimization);
texts << tr("Bytes after optimization: %1").arg(m_optimizationInfo.bytesAfterOptimization);
texts << tr("Bytes saved by optimization: %1").arg(m_optimizationInfo.bytesBeforeOptimization - m_optimizationInfo.bytesAfterOptimization);
qreal ratio = 100.0;
if (m_optimizationInfo.bytesBeforeOptimization > 0)
{
ratio = 100.0 * qreal(m_optimizationInfo.bytesAfterOptimization) / qreal(m_optimizationInfo.bytesBeforeOptimization);
}
texts << tr("Compression ratio: %1 %").arg(ratio);
}
ui->logTextEdit->setPlainText(QString("%1\n%2").arg(ui->logTextEdit->toPlainText(), texts.join("\n")));
}
void PDFOptimizeDocumentDialog::updateUi()
{
for (QCheckBox* checkBox : findChildren<QCheckBox*>(QString(), Qt::FindChildrenRecursively))
{
checkBox->setEnabled(!m_optimizationInProgress);
}
ui->buttonBox->button(QDialogButtonBox::Ok)->setEnabled(m_wasOptimized && !m_optimizationInProgress);
ui->buttonBox->button(QDialogButtonBox::Cancel)->setEnabled(!m_optimizationInProgress);
m_optimizeButton->setEnabled(!m_optimizationInProgress);
}
} // namespace pdfviewer

View File

@ -0,0 +1,58 @@
#ifndef PDFOPTIMIZEDOCUMENTDIALOG_H
#define PDFOPTIMIZEDOCUMENTDIALOG_H
#include "pdfoptimizer.h"
#include <QDialog>
#include <QFuture>
namespace Ui
{
class PDFOptimizeDocumentDialog;
}
namespace pdfviewer
{
class PDFOptimizeDocumentDialog : public QDialog
{
Q_OBJECT
public:
explicit PDFOptimizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent);
virtual ~PDFOptimizeDocumentDialog() override;
signals:
void displayOptimizationInfo();
private:
void optimize();
void onOptimizeButtonClicked();
void onOptimizationStarted();
void onOptimizationProgress(QString progressText);
void onOptimizationFinished();
void onDisplayOptimizationInfo();
void updateUi();
struct OptimizationInfo
{
qreal msecsElapsed = 0.0;
qint64 bytesBeforeOptimization = -1;
qint64 bytesAfterOptimization = -1;
};
Ui::PDFOptimizeDocumentDialog* ui;
const pdf::PDFDocument* m_document;
pdf::PDFOptimizer m_optimizer;
QPushButton* m_optimizeButton;
bool m_optimizationInProgress;
bool m_wasOptimized;
QFuture<void> m_future;
pdf::PDFDocument m_optimizedDocument;
OptimizationInfo m_optimizationInfo;
};
} // namespace pdfviewer
#endif // PDFOPTIMIZEDOCUMENTDIALOG_H

View File

@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>PDFOptimizeDocumentDialog</class>
<widget class="QDialog" name="PDFOptimizeDocumentDialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>741</width>
<height>530</height>
</rect>
</property>
<property name="windowTitle">
<string>Document optimization</string>
</property>
<layout class="QVBoxLayout" name="dialogLayout">
<item>
<widget class="QGroupBox" name="optimizationSettingsGroupBox">
<property name="title">
<string>Optimization Settings</string>
</property>
<layout class="QVBoxLayout" name="groupBoxLayout"/>
</widget>
</item>
<item>
<widget class="QPlainTextEdit" name="logTextEdit">
<property name="undoRedoEnabled">
<bool>false</bool>
</property>
<property name="readOnly">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>buttonBox</sender>
<signal>accepted()</signal>
<receiver>PDFOptimizeDocumentDialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
</hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>PDFOptimizeDocumentDialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -24,6 +24,7 @@
#include "pdfviewersettingsdialog.h"
#include "pdfdocumentpropertiesdialog.h"
#include "pdfrendertoimagesdialog.h"
#include "pdfoptimizedocumentdialog.h"
#include "pdfdocumentreader.h"
#include "pdfvisitor.h"
@ -886,6 +887,7 @@ void PDFViewerMainWindow::updateActionsAvailability()
ui->actionFind->setEnabled(hasValidDocument);
ui->actionPrint->setEnabled(hasValidDocument && canPrint);
ui->actionRender_to_Images->setEnabled(hasValidDocument && canPrint);
ui->actionOptimize->setEnabled(hasValidDocument);
setEnabled(!isBusy);
updateUndoRedoActions();
}
@ -1409,4 +1411,15 @@ void PDFViewerMainWindow::on_actionRender_to_Images_triggered()
dialog.exec();
}
void PDFViewerMainWindow::on_actionOptimize_triggered()
{
PDFOptimizeDocumentDialog dialog(m_pdfDocument.data(), this);
if (dialog.exec() == QDialog::Accepted)
{
}
}
} // namespace pdfviewer

View File

@ -99,6 +99,8 @@ private slots:
void on_actionPrint_triggered();
void on_actionRender_to_Images_triggered();
void on_actionOptimize_triggered();
private:
void onActionOpenTriggered();
void onActionCloseTriggered();

View File

@ -124,6 +124,8 @@
<addaction name="actionSelectTextAll"/>
<addaction name="actionDeselectText"/>
<addaction name="separator"/>
<addaction name="actionOptimize"/>
<addaction name="separator"/>
</widget>
<addaction name="menuFile"/>
<addaction name="menuEdit"/>
@ -529,6 +531,11 @@
<string>Redo</string>
</property>
</action>
<action name="actionOptimize">
<property name="text">
<string>Optimize</string>
</property>
</action>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources>