Finishing of optimalization

This commit is contained in:
Jakub Melka 2020-06-06 16:30:06 +02:00
parent 8a97f20ce9
commit 5cb9bdee26
5 changed files with 151 additions and 17 deletions

View File

@ -22,6 +22,7 @@
#include "pdfutils.h"
#include "pdfconstants.h"
#include "pdfdocumentbuilder.h"
#include "pdfstreamfilters.h"
namespace pdf
{
@ -502,6 +503,51 @@ bool PDFOptimizer::performShrinkObjectStorage()
bool PDFOptimizer::performRecompressFlateStreams()
{
std::atomic<PDFInteger> bytesSaved = 0;
PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
auto processEntry = [this, &bytesSaved](PDFObjectStorage::Entry& entry)
{
if (entry.object.isStream())
{
const PDFStream* stream = entry.object.getStream();
const PDFDictionary* dictionary = stream->getDictionary();
if (dictionary->hasKey("F"))
{
// External file stream, we do not recompress it
return;
}
PDFStreamFilterStorage::StreamFilters streamFilters = PDFStreamFilterStorage::getStreamFilters(stream, std::bind(QOverload<const PDFObject&>::of(&PDFObjectStorage::getObject), &m_storage, std::placeholders::_1));
if (streamFilters.filterObjects.empty())
{
// No filters
return;
}
const PDFStreamFilter* streamFilter = streamFilters.filterObjects.front();
if (dynamic_cast<const PDFFlateDecodeFilter*>(streamFilter))
{
// Try to recompress. If we end with less data, then we use recompressed stream
QByteArray recompressedData = PDFFlateDecodeFilter::recompress(*stream->getContent());
const PDFInteger currentBytesSaved = stream->getContent()->size() - recompressedData.size();
if (currentBytesSaved > 0)
{
bytesSaved += currentBytesSaved;
PDFDictionary updatedDictionary = *dictionary;
updatedDictionary.setEntry("Length", PDFObject::createInteger(recompressedData.size()));
entry.object = PDFObject::createStream(std::make_shared<PDFStream>(qMove(updatedDictionary), qMove(recompressedData)));
}
}
}
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, objects.begin(), objects.end(), processEntry);
m_storage.setObjects(qMove(objects));
emit optimizationProgress(tr("Bytes saved by recompressing stream: %1").arg(bytesSaved));
return false;
}

View File

@ -402,6 +402,61 @@ QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data,
return predictor.apply(uncompress(data));
}
QByteArray PDFFlateDecodeFilter::recompress(const QByteArray& data)
{
QByteArray result;
QByteArray decompressedData = uncompress(data);
z_stream stream = { };
stream.next_in = const_cast<Bytef*>(convertByteArrayToUcharPtr(decompressedData));
stream.avail_in = decompressedData.size();
std::array<Bytef, 1024> outputBuffer = { };
int error = deflateInit(&stream, Z_BEST_COMPRESSION);
if (error != Z_OK)
{
throw PDFException(PDFTranslationContext::tr("Failed to initialize flate compression stream."));
}
do
{
stream.next_out = outputBuffer.data();
stream.avail_out = static_cast<uInt>(outputBuffer.size());
error = deflate(&stream, Z_FINISH);
int bytesWritten = int(outputBuffer.size()) - stream.avail_out;
result.append(reinterpret_cast<const char*>(outputBuffer.data()), bytesWritten);
} while (error == Z_OK);
QString errorMessage;
if (stream.msg)
{
errorMessage = QString::fromLatin1(stream.msg);
}
deflateEnd(&stream);
switch (error)
{
case Z_STREAM_END:
break; // No error, normal behaviour
default:
{
if (errorMessage.isEmpty())
{
errorMessage = PDFTranslationContext::tr("zlib code: %1").arg(error);
}
throw PDFException(PDFTranslationContext::tr("Error decompressing by flate method: %1").arg(errorMessage));
}
}
return result;
}
QByteArray PDFFlateDecodeFilter::uncompress(const QByteArray& data)
{
QByteArray result;
@ -514,8 +569,9 @@ const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filte
return nullptr;
}
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler)
PDFStreamFilterStorage::StreamFilters PDFStreamFilterStorage::getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
{
StreamFilters result;
const PDFDictionary* dictionary = stream->getDictionary();
// Retrieve filters
@ -540,12 +596,9 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
}
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
if (filters.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
}
else if (filters.isArray())
{
@ -556,17 +609,19 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
const PDFObject& object = objectFetcher(filterArray->getItem(i));
if (object.isName())
{
filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
}
else
{
return QByteArray();
result.valid = false;
return result;
}
}
}
else if (!filters.isNull())
{
return QByteArray();
result.valid = false;
return result;
}
if (filterParameters.isArray())
@ -576,24 +631,36 @@ QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, con
for (size_t i = 0; i < filterParameterCount; ++i)
{
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
filterParameterObjects.push_back(object);
result.filterParameterObjects.push_back(object);
}
}
else
{
filterParameterObjects.push_back(filterParameters);
result.filterParameterObjects.push_back(filterParameters);
}
filterParameterObjects.resize(filterObjects.size());
std::reverse(filterObjects.begin(), filterObjects.end());
std::reverse(filterParameterObjects.begin(), filterParameterObjects.end());
result.filterParameterObjects.resize(result.filterObjects.size());
std::reverse(result.filterObjects.begin(), result.filterObjects.end());
std::reverse(result.filterParameterObjects.begin(), result.filterParameterObjects.end());
return result;
}
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler)
{
StreamFilters streamFilters = getStreamFilters(stream, objectFetcher);
QByteArray result = *stream->getContent();
for (size_t i = 0, count = filterObjects.size(); i < count; ++i)
if (!streamFilters.valid)
{
const PDFStreamFilter* streamFilter = filterObjects[i];
const PDFObject& streamFilterParameters = filterParameterObjects[i];
// Stream filters are invalid
return QByteArray();
}
for (size_t i = 0, count = streamFilters.filterObjects.size(); i < count; ++i)
{
const PDFStreamFilter* streamFilter = streamFilters.filterObjects[i];
const PDFObject& streamFilterParameters = streamFilters.filterParameterObjects[i];
if (streamFilter)
{

View File

@ -54,6 +54,18 @@ public:
/// \param securityHandler Security handler for Crypt filters
static QByteArray getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler);
struct StreamFilters
{
bool valid = true;
std::vector<const PDFStreamFilter*> filterObjects;
std::vector<PDFObject> filterParameterObjects;
};
/// Returns stream filters along with it's parameters, for this stream
/// \param stream Stream containing data
/// \param objectFetcher Function which retrieves objects (for example, reads objects from reference)
static StreamFilters getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher);
private:
explicit PDFStreamFilterStorage();
@ -185,6 +197,11 @@ public:
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const override;
/// Recompresses data. So, first, data are decompressed, and then
/// recompressed again with maximal compress ratio possible.
/// \param data Compressed data to be recompressed
static QByteArray recompress(const QByteArray& data);
private:
static QByteArray uncompress(const QByteArray& data);
};

View File

@ -22,6 +22,8 @@ public:
explicit PDFOptimizeDocumentDialog(const pdf::PDFDocument* document, QWidget* parent);
virtual ~PDFOptimizeDocumentDialog() override;
pdf::PDFDocument takeOptimizedDocument() { return qMove(m_optimizedDocument); }
signals:
void displayOptimizationInfo();

View File

@ -1417,7 +1417,9 @@ void PDFViewerMainWindow::on_actionOptimize_triggered()
if (dialog.exec() == QDialog::Accepted)
{
pdf::PDFDocumentPointer pointer(new pdf::PDFDocument(dialog.takeOptimizedDocument()));
pdf::PDFModifiedDocument document(qMove(pointer), m_optionalContentActivity, pdf::PDFModifiedDocument::Reset);
onDocumentModified(qMove(document));
}
}