Optimization: merging identical objects

This commit is contained in:
Jakub Melka 2020-06-03 19:41:42 +02:00
parent 93ecdd0bfc
commit 6a986178ff
4 changed files with 83 additions and 4 deletions

View File

@ -131,6 +131,10 @@ public:
/// \param stream Stream to be decoded
QByteArray getDecodedStream(const PDFStream* stream) const;
/// Set trailer dictionary
/// \param object Object defining trailer dictionary
void setTrailerDictionary(const PDFObject& object) { m_trailerDictionary = object; }
private:
PDFObjects m_objects;
PDFObject m_trailerDictionary;

View File

@ -68,10 +68,30 @@ const PDFArray* PDFObject::getArray() const
return static_cast<const PDFArray*>(objectContent.get());
}
bool PDFObject::operator==(const PDFObject &other) const
bool PDFObject::operator==(const PDFObject& other) const
{
if (m_type == other.m_type)
{
if (m_type == Type::String || m_type == Type::Name)
{
PDFStringRef leftString = getStringObject();
PDFStringRef rightString = other.getStringObject();
if (leftString.inplaceString && rightString.inplaceString)
{
return *leftString.inplaceString == *rightString.inplaceString;
}
else if (leftString.memoryString && rightString.memoryString)
{
return leftString.memoryString->equals(rightString.memoryString);
}
// We have inplace string in one object and memory string in the other.
// So they are not equal, because memory strings have always greater
// size, than inplace strings.
return false;
}
Q_ASSERT(std::holds_alternative<PDFObjectContentPointer>(m_data) == std::holds_alternative<PDFObjectContentPointer>(other.m_data));
// If we have content object defined, then use its equal operator,

View File

@ -159,7 +159,17 @@ void PDFReplaceReferencesVisitor::visitStream(const PDFStream* stream)
void PDFReplaceReferencesVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(m_replacements.at(reference)));
auto it = m_replacements.find(reference);
if (it != m_replacements.cend())
{
// Replace the reference
m_objectStack.push_back(PDFObject::createReference(it->second));
}
else
{
// Preserve old reference
m_objectStack.push_back(PDFObject::createReference(reference));
}
}
PDFObject PDFReplaceReferencesVisitor::getObject()

View File

@ -349,12 +349,57 @@ bool PDFOptimizer::performRemoveUnusedObjects()
m_storage.setObjects(qMove(objects));
emit optimizationProgress(tr("Unused objects removed: %1").arg(counter));
return false;
return counter > 0;
}
bool PDFOptimizer::performMergeIdenticalObjects()
{
return false;
std::atomic<PDFInteger> counter = 0;
std::map<PDFObjectReference, PDFObjectReference> replacementMap;
PDFObjectStorage::PDFObjects objects = m_storage.getObjects();
// Find same objects
QMutex mutex;
PDFIntegerRange<size_t> range(0, objects.size());
auto processEntry = [this, &counter, &objects, &mutex, &replacementMap](size_t index)
{
const PDFObjectStorage::Entry& entry = objects[index];
for (size_t i = 0; i < index; ++i)
{
if (objects[i].object.isNull())
{
// Jakub Melka: we do not merge null objects, they are just removed
continue;
}
if (objects[i].object == entry.object)
{
QMutexLocker lock(&mutex);
PDFObjectReference oldReference = PDFObjectReference(PDFInteger(index), objects[index].generation);
PDFObjectReference newReference = PDFObjectReference(PDFInteger(i), objects[i].generation);
replacementMap[oldReference] = newReference;
++counter;
break;
}
}
};
PDFExecutionPolicy::execute(PDFExecutionPolicy::Scope::Unknown, range.begin(), range.end(), processEntry);
// Replace objects
if (!replacementMap.empty())
{
for (size_t i = 0; i < objects.size(); ++i)
{
objects[i].object = PDFObjectUtils::replaceReferences(objects[i].object, replacementMap);
}
PDFObject trailerDictionary = PDFObjectUtils::replaceReferences(m_storage.getTrailerDictionary(), replacementMap);
m_storage.setTrailerDictionary(trailerDictionary);
}
m_storage.setObjects(qMove(objects));
emit optimizationProgress(tr("Identical objects merged: %1").arg(counter));
return counter > 0;
}
bool PDFOptimizer::performShrinkObjectStorage()