diff --git a/PdfForQtLib/sources/pdfconstants.h b/PdfForQtLib/sources/pdfconstants.h index 40da2dd..be696ce 100644 --- a/PdfForQtLib/sources/pdfconstants.h +++ b/PdfForQtLib/sources/pdfconstants.h @@ -58,6 +58,9 @@ static constexpr const char* PDF_XREF_OCCUPIED = "n"; static constexpr const char* PDF_OBJECT_START_MARK = "obj"; static constexpr const char* PDF_OBJECT_END_MARK = "endobj"; +// maximum generation limit +static constexpr const int PDF_MAX_OBJECT_GENERATION = 65535; + // Colors static constexpr const int PDF_MAX_COLOR_COMPONENTS = 32; diff --git a/PdfForQtLib/sources/pdfobject.h b/PdfForQtLib/sources/pdfobject.h index a2c66ce..b6aaf06 100644 --- a/PdfForQtLib/sources/pdfobject.h +++ b/PdfForQtLib/sources/pdfobject.h @@ -411,6 +411,7 @@ public: enum MergeFlag { + NoFlag = 0x0000, RemoveNullObjects = 0x0001, ///< Remove null object from dictionaries ConcatenateArrays = 0x0002, ///< Concatenate arrays instead of replace }; diff --git a/PdfForQtLib/sources/pdfoptimizer.cpp b/PdfForQtLib/sources/pdfoptimizer.cpp index 75704e3..a54ee93 100644 --- a/PdfForQtLib/sources/pdfoptimizer.cpp +++ b/PdfForQtLib/sources/pdfoptimizer.cpp @@ -20,6 +20,8 @@ #include "pdfexecutionpolicy.h" #include "pdfobjectutils.h" #include "pdfutils.h" +#include "pdfconstants.h" +#include "pdfdocumentbuilder.h" namespace pdf { @@ -404,6 +406,97 @@ bool PDFOptimizer::performMergeIdenticalObjects() bool PDFOptimizer::performShrinkObjectStorage() { + std::map replacementMap; + PDFObjectStorage::PDFObjects objects = m_storage.getObjects(); + + auto isFree = [](const PDFObjectStorage::Entry& entry) + { + return entry.object.isNull() && entry.generation < PDF_MAX_OBJECT_GENERATION; + }; + auto isOccupied = [](const PDFObjectStorage::Entry& entry) + { + return !entry.object.isNull(); + }; + + // Make list of free usable indices + std::vector freeIndices; + freeIndices.reserve(objects.size() / 8); + + const size_t objectCount = objects.size(); + for (size_t sourceIndex = 1; sourceIndex < objectCount; ++sourceIndex) + { + if (isFree(objects[sourceIndex])) + { + freeIndices.push_back(sourceIndex); + } + } + std::reverse(freeIndices.begin(), freeIndices.end()); + + // Move objects to free entries + for (size_t sourceIndex = objectCount - 1; sourceIndex > 0; --sourceIndex) + { + if (freeIndices.empty()) + { + // Jakub Melka: We have run out of free indices + break; + } + + PDFObjectStorage::Entry& sourceEntry = objects[sourceIndex]; + if (isOccupied(sourceEntry)) + { + size_t targetIndex = freeIndices.back(); + freeIndices.pop_back(); + + if (targetIndex >= sourceIndex) + { + break; + } + + PDFObjectStorage::Entry& targetEntry = objects[targetIndex]; + Q_ASSERT(isFree(targetEntry)); + + ++targetEntry.generation; + targetEntry.object = qMove(sourceEntry.object); + sourceEntry.object = PDFObject(); + + replacementMap[PDFObjectReference(PDFInteger(sourceIndex), sourceEntry.generation)] = PDFObjectReference(PDFInteger(targetIndex), targetEntry.generation); + } + } + + // Shrink objects array + for (size_t sourceIndex = objectCount - 1; sourceIndex > 0; --sourceIndex) + { + if (isOccupied(objects[sourceIndex])) + { + objects.resize(sourceIndex + 1); + break; + } + } + + // Update objects + if (!replacementMap.empty()) + { + for (size_t i = 0; i < objects.size(); ++i) + { + objects[i].object = PDFObjectUtils::replaceReferences(objects[i].object, replacementMap); + } + PDFObject trailerDictionary = PDFObjectUtils::replaceReferences(m_storage.getTrailerDictionary(), replacementMap); + + PDFObjectFactory factory; + factory.beginDictionary(); + factory.beginDictionaryItem("Size"); + factory << PDFInteger(objects.size()); + factory.endDictionaryItem(); + factory.endDictionary(); + + trailerDictionary = PDFObjectManipulator::merge(trailerDictionary, factory.takeObject(), PDFObjectManipulator::NoFlag); + m_storage.setTrailerDictionary(trailerDictionary); + } + + const size_t newObjectCount = objects.size(); + m_storage.setObjects(qMove(objects)); + emit optimizationProgress(tr("Object list shrinked by: %1").arg(objectCount - newObjectCount)); + return false; }