Finishing of optimalization

This commit is contained in:
Jakub Melka
2020-06-06 16:30:06 +02:00
parent 8a97f20ce9
commit 5cb9bdee26
5 changed files with 151 additions and 17 deletions

View File

@ -22,6 +22,7 @@
#include "pdfutils.h" #include "pdfutils.h"
#include "pdfconstants.h" #include "pdfconstants.h"
#include "pdfdocumentbuilder.h" #include "pdfdocumentbuilder.h"
#include "pdfstreamfilters.h"
namespace pdf namespace pdf
{ {
@ -502,6 +503,51 @@ bool PDFOptimizer::performShrinkObjectStorage()
bool PDFOptimizer::performRecompressFlateStreams() bool PDFOptimizer::performRecompressFlateStreams()
{ {
std::atomic<PDFInteger> bytesSaved = 0;
PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
auto processEntry = [this, &bytesSaved](PDFObjectStorage::Entry& entry)
{
if (entry.object.isStream())
{
const PDFStream* stream = entry.object.getStream();
const PDFDictionary* dictionary = stream->getDictionary();
if (dictionary->hasKey("F"))
{
// External file stream, we do not recompress it
return;
}
PDFStreamFilterStorage::StreamFilters streamFilters = PDFStreamFilterStorage::getStreamFilters(stream, std::bind(QOverload<const PDFObject&>::of(&PDFObjectStorage::getObject), &m_storage, std::placeholders::_1));
if (streamFilters.filterObjects.empty())
{
// No filters
return;
}
const PDFStreamFilter* streamFilter = streamFilters.filterObjects.front();
if (dynamic_cast<const PDFFlateDecodeFilter*>(streamFilter))
{
// Try to recompress. If we end with less data, then we use recompressed stream
QByteArray recompressedData = PDFFlateDecodeFilter::recompress(*stream->getContent());
const PDFInteger currentBytesSaved = stream->getContent()->size() - recompressedData.size();
if (currentBytesSaved > 0)
{
bytesSaved += currentBytesSaved;
PDFDictionary updatedDictionary = *dictionary;
updatedDictionary.setEntry("Length", PDFObject::createInteger(recompressedData.size()));
entry.object = PDFObject::createStream(std::make_shared<PDFStream>(qMove(updatedDictionary), qMove(recompressedData)));
}
}
}
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry);
m_storage.setObjects(qMove(objects));
emit optimizationProgress(tr("Bytes saved by recompressing stream: %1").arg(bytesSaved));
return false; return false;
} }

View File

@ -402,6 +402,61 @@ QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data,
return predictor.apply(uncompress(data)); return predictor.apply(uncompress(data));
} }
QByteArray PDFFlateDecodeFilter::recompress(const QByteArray& data)
{
QByteArray result;
QByteArray decompressedData = uncompress(data);
z_stream stream = { };
stream.next_in = const_cast<Bytef*>(convertByteArrayToUcharPtr(decompressedData));
stream.avail_in = decompressedData.size();
std::array<Bytef, 1024> outputBuffer = { };
int error = deflateInit(&stream, Z_BEST_COMPRESSION);
if (error != Z_OK)
{
throw PDFException(PDFTranslationContext::tr("Failed to initialize flate compression stream."));
}
do
{
stream.next_out = outputBuffer.data();
stream.avail_out = static_cast<uInt>(outputBuffer.size());
error = deflate(&stream, Z_FINISH);
int bytesWritten = int(outputBuffer.size()) - stream.avail_out;
result.append(reinterpret_cast<const char*>(outputBuffer.data()), bytesWritten);
} while (error == Z_OK);
QString errorMessage;
if (stream.msg)
{
errorMessage = QString::fromLatin1(stream.msg);
}
deflateEnd(&stream);
switch (error)
{
case Z_STREAM_END:
break; // No error, normal behaviour
default:
{
if (errorMessage.isEmpty())
{
errorMessage = PDFTranslationContext::tr("zlib code: %1").arg(error);
}
throw PDFException(PDFTranslationContext::tr("Error decompressing by flate method: %1").arg(errorMessage));
}
}
return result;
}
QByteArray PDFFlateDecodeFilter::uncompress(const QByteArray& data) QByteArray PDFFlateDecodeFilter::uncompress(const QByteArray& data)
{ {
QByteArray result; QByteArray result;
@ -514,8 +569,9 @@ const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filte
return nullptr; return nullptr;
} }
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler) PDFStreamFilterStorage::StreamFilters PDFStreamFilterStorage::getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
{ {
StreamFilters result;
const PDFDictionary* dictionary = stream->getDictionary(); const PDFDictionary* dictionary = stream->getDictionary();
// Retrieve filters // Retrieve filters
@ -540,12 +596,9 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS)); filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
} }
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
if (filters.isName()) if (filters.isName())
{ {
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString())); result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
} }
else if (filters.isArray()) else if (filters.isArray())
{ {
@ -556,17 +609,19 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
const PDFObject& object = objectFetcher(filterArray->getItem(i)); const PDFObject& object = objectFetcher(filterArray->getItem(i));
if (object.isName()) if (object.isName())
{ {
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString())); result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
} }
else else
{ {
return QByteArray(); result.valid = false;
return result;
} }
} }
} }
else if (!filters.isNull()) else if (!filters.isNull())
{ {
return QByteArray(); result.valid = false;
return result;
} }
if (filterParameters.isArray()) if (filterParameters.isArray())
@ -576,24 +631,36 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
for (size_t i = 0; i < filterParameterCount; ++i) for (size_t i = 0; i < filterParameterCount; ++i)
{ {
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i)); const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
filterParameterObjects.push_back(object); result.filterParameterObjects.push_back(object);
} }
} }
else else
{ {
filterParameterObjects.push_back(filterParameters); result.filterParameterObjects.push_back(filterParameters);
} }
filterParameterObjects.resize(filterObjects.size()); result.filterParameterObjects.resize(result.filterObjects.size());
std::reverse(filterObjects.begin(), filterObjects.end()); std::reverse(result.filterObjects.begin(), result.filterObjects.end());
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end()); std::reverse(result.filterParameterObjects.begin(), result.filterParameterObjects.end());
return result;
}
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler)
{
StreamFilters streamFilters = getStreamFilters(stream, objectFetcher);
QByteArray result = *stream->getContent(); QByteArray result = *stream->getContent();
for (size_t i = 0, count = filterObjects.size(); i < count; ++i) if (!streamFilters.valid)
{ {
const PDFStreamFilter* streamFilter = filterObjects[i]; // Stream filters are invalid
const PDFObject& streamFilterParameters = filterParameterObjects[i]; return QByteArray();
}
for (size_t i = 0, count = streamFilters.filterObjects.size(); i < count; ++i)
{
const PDFStreamFilter* streamFilter = streamFilters.filterObjects[i];
const PDFObject& streamFilterParameters = streamFilters.filterParameterObjects[i];
if (streamFilter) if (streamFilter)
{ {

View File

@ -54,6 +54,18 @@ public:
/// \param securityHandler Security handler for Crypt filters /// \param securityHandler Security handler for Crypt filters
static QByteArray getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler); static QByteArray getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler);
struct StreamFilters
{
bool valid = true;
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
};
/// Returns stream filters along with it's parameters, for this stream
/// \param stream Stream containing data
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
static StreamFilters getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
private: private:
explicit PDFStreamFilterStorage(); explicit PDFStreamFilterStorage();
@ -185,6 +197,11 @@ public:
const PDFObject& parameters, const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const override; const PDFSecurityHandler* securityHandler) const override;
/// Recompresses data. So, first, data are decompressed, and then
/// recompressed again with maximal compress ratio possible.
/// \param data Compressed data to be recompressed
static QByteArray recompress(const QByteArray& data);
private: private:
static QByteArray uncompress(const QByteArray& data); static QByteArray uncompress(const QByteArray& data);
}; };

View File

@ -22,6 +22,8 @@ public:
explicit PDFOptimizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent); explicit PDFOptimizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent);
virtual ~PDFOptimizeDocumentDialog() override; virtual ~PDFOptimizeDocumentDialog() override;
pdf::PDFDocument takeOptimizedDocument() { return qMove(m_optimizedDocument); }
signals: signals:
void displayOptimizationInfo(); void displayOptimizationInfo();

View File

@ -1417,7 +1417,9 @@ void PDFViewerMainWindow::on_actionOptimize_triggered()
if (dialog.exec() == QDialog::Accepted) if (dialog.exec() == QDialog::Accepted)
{ {
pdf::PDFDocumentPointer pointer(new pdf::PDFDocument(dialog.takeOptimizedDocument()));
pdf::PDFModifiedDocument document(qMove(pointer), m_optionalContentActivity, pdf::PDFModifiedDocument::Reset);
onDocumentModified(qMove(document));
} }
} }