Dictionary optimalization

This commit is contained in:
Jakub Melka 2020-06-08 19:42:00 +02:00
parent 4e4bf111da
commit 958737f359
14 changed files with 188 additions and 29 deletions

View File

@ -123,7 +123,7 @@ PDFAppeareanceStreams PDFAppeareanceStreams::parse(const PDFObjectStorage* stora
const PDFDictionary* subdictionary = storage->getDictionaryFromObject(subdictionaryObject);
for (size_t i = 0; i < subdictionary->getCount(); ++i)
{
result.m_appearanceStreams[std::make_pair(appearance, subdictionary->getKey(i))] = subdictionary->getValue(i);
result.m_appearanceStreams[std::make_pair(appearance, subdictionary->getKey(i).getString())] = subdictionary->getValue(i);
}
}
else if (!subdictionaryObject.isNull())

View File

@ -158,7 +158,7 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
const size_t count = destsDictionary->getCount();
for (size_t i = 0; i < count; ++i)
{
catalogObject.m_destinations[destsDictionary->getKey(i)] = PDFDestination::parse(&document->getStorage(), destsDictionary->getValue(i));
catalogObject.m_destinations[destsDictionary->getKey(i).getString()] = PDFDestination::parse(&document->getStorage(), destsDictionary->getValue(i));
}
}

View File

@ -199,7 +199,7 @@ void PDFDocument::initInfo()
PDF_DOCUMENT_INFO_ENTRY_CREATION_DATE, PDF_DOCUMENT_INFO_ENTRY_MODIFIED_DATE, PDF_DOCUMENT_INFO_ENTRY_TRAPPED };
for (size_t i = 0; i < infoDictionary->getCount(); ++i)
{
const QByteArray& key = infoDictionary->getKey(i);
QByteArray key = infoDictionary->getKey(i).getString();
if (std::none_of(std::begin(PREDEFINED_ITEMS), std::end(PREDEFINED_ITEMS), [&key](const char* item) { return item == key; }))
{
const PDFObject& value = getObject(infoDictionary->getValue(i));

View File

@ -67,7 +67,7 @@ void PDFObjectFactory::endDictionaryItem()
Item& dictionaryItem = m_items.back();
Q_ASSERT(dictionaryItem.type == ItemType::Dictionary);
std::get<PDFDictionary>(dictionaryItem.object).addEntry(qMove(topItem.itemName), qMove(std::get<PDFObject>(topItem.object)));
std::get<PDFDictionary>(dictionaryItem.object).addEntry(PDFInplaceOrMemoryString(qMove(topItem.itemName)), qMove(std::get<PDFObject>(topItem.object)));
}
PDFObjectFactory& PDFObjectFactory::operator<<(FileAttachmentIcon icon)

View File

@ -145,7 +145,7 @@ void PDFWriteObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
writeName(dictionary->getKey(i));
writeName(dictionary->getKey(i).getString());
dictionary->getValue(i).accept(this);
}

View File

@ -135,7 +135,7 @@ PDFFileSpecification PDFFileSpecification::parse(const PDFObjectStorage* storage
const PDFDictionary* embeddedFilesDictionary = embeddedFiles.getDictionary();
for (size_t i = 0; i < embeddedFilesDictionary->getCount(); ++i)
{
result.m_embeddedFiles[embeddedFilesDictionary->getKey(i)] = PDFEmbeddedFile::parse(storage, embeddedFilesDictionary->getValue(i));
result.m_embeddedFiles[embeddedFilesDictionary->getKey(i).getString()] = PDFEmbeddedFile::parse(storage, embeddedFilesDictionary->getValue(i));
}
}
}

View File

@ -277,7 +277,21 @@ const PDFObject& PDFDictionary::get(const char* key) const
}
}
void PDFDictionary::setEntry(const QByteArray& key, PDFObject&& value)
const PDFObject& PDFDictionary::get(const PDFInplaceOrMemoryString& key) const
{
auto it = find(key);
if (it != m_dictionary.cend())
{
return it->second;
}
else
{
static PDFObject dummy;
return dummy;
}
}
void PDFDictionary::setEntry(const PDFInplaceOrMemoryString& key, PDFObject&& value)
{
auto it = find(key);
if (it != m_dictionary.end())
@ -286,7 +300,7 @@ void PDFDictionary::setEntry(const QByteArray& key, PDFObject&& value)
}
else
{
addEntry(QByteArray(key), qMove(value));
addEntry(key, qMove(value));
}
}
@ -299,11 +313,6 @@ void PDFDictionary::removeNullObjects()
void PDFDictionary::optimize()
{
m_dictionary.shrink_to_fit();
for (DictionaryEntry& entry : m_dictionary)
{
entry.first.shrink_to_fit();
}
}
std::vector<PDFDictionary::DictionaryEntry>::const_iterator PDFDictionary::find(const QByteArray& key) const
@ -321,6 +330,16 @@ std::vector<PDFDictionary::DictionaryEntry>::const_iterator PDFDictionary::find(
return std::find_if(m_dictionary.cbegin(), m_dictionary.cend(), [&key](const DictionaryEntry& entry) { return entry.first == key; });
}
std::vector<PDFDictionary::DictionaryEntry>::const_iterator PDFDictionary::find(const PDFInplaceOrMemoryString& key) const
{
return std::find_if(m_dictionary.cbegin(), m_dictionary.cend(), [&key](const DictionaryEntry& entry) { return entry.first == key; });
}
std::vector<PDFDictionary::DictionaryEntry>::iterator PDFDictionary::find(const PDFInplaceOrMemoryString& key)
{
return std::find_if(m_dictionary.begin(), m_dictionary.end(), [&key](const DictionaryEntry& entry) { return entry.first == key; });
}
bool PDFStream::equals(const PDFObjectContent* other) const
{
Q_ASSERT(dynamic_cast<const PDFStream*>(other));
@ -348,7 +367,7 @@ PDFObject PDFObjectManipulator::merge(PDFObject left, PDFObject right, MergeFlag
for (size_t i = 0, count = sourceDictionary.getCount(); i < count; ++i)
{
const QByteArray& key = sourceDictionary.getKey(i);
const auto& key = sourceDictionary.getKey(i);
PDFObject value = merge(targetDictionary.get(key), sourceDictionary.getValue(i), flags);
targetDictionary.setEntry(key, qMove(value));
}
@ -369,7 +388,7 @@ PDFObject PDFObjectManipulator::merge(PDFObject left, PDFObject right, MergeFlag
for (size_t i = 0, count = sourceDictionary.getCount(); i < count; ++i)
{
const QByteArray& key = sourceDictionary.getKey(i);
const auto& key = sourceDictionary.getKey(i);
PDFObject value = merge(targetDictionary.get(key), sourceDictionary.getValue(i), flags);
targetDictionary.setEntry(key, qMove(value));
}
@ -410,7 +429,78 @@ PDFObject PDFObjectManipulator::removeNullObjects(PDFObject object)
QByteArray PDFStringRef::getString() const
{
return inplaceString ? inplaceString->getString() : memoryString->getString();
if (inplaceString)
{
return inplaceString->getString();
}
if (memoryString)
{
return memoryString->getString();
}
return QByteArray();
}
PDFInplaceOrMemoryString::PDFInplaceOrMemoryString(const char* string)
{
const int size = static_cast<int>(qMin(std::strlen(string), size_t(std::numeric_limits<int>::max())));
if (size > PDFInplaceString::MAX_STRING_SIZE)
{
m_value = QByteArray(string, size);
}
else
{
m_value = PDFInplaceString(string, size);
}
}
PDFInplaceOrMemoryString::PDFInplaceOrMemoryString(QByteArray string)
{
const int size = string.size();
if (size > PDFInplaceString::MAX_STRING_SIZE)
{
m_value = qMove(string);
}
else
{
m_value = PDFInplaceString(qMove(string));
}
}
bool PDFInplaceOrMemoryString::equals(const char* value, size_t length) const
{
if (std::holds_alternative<PDFInplaceString>(m_value))
{
const PDFInplaceString& string = std::get<PDFInplaceString>(m_value);
return std::equal(string.string.data(), string.string.data() + string.size, value, value + length);
}
if (std::holds_alternative<QByteArray>(m_value))
{
const QByteArray& string = std::get<QByteArray>(m_value);
return std::equal(string.constData(), string.constData() + string.size(), value, value + length);
}
return length == 0;
}
bool PDFInplaceOrMemoryString::isInplace() const
{
return std::holds_alternative<PDFInplaceString>(m_value);
}
QByteArray PDFInplaceOrMemoryString::getString() const
{
if (std::holds_alternative<PDFInplaceString>(m_value))
{
return std::get<PDFInplaceString>(m_value).getString();
}
if (std::holds_alternative<QByteArray>(m_value))
{
return std::get<QByteArray>(m_value);
}
return QByteArray();
}
} // namespace pdf

View File

@ -63,6 +63,14 @@ struct PDFInplaceString
static constexpr const int MAX_STRING_SIZE = sizeof(PDFObjectReference) - 1;
constexpr PDFInplaceString() = default;
inline PDFInplaceString(const char* data, int size)
{
Q_ASSERT(size <= MAX_STRING_SIZE);
this->size = static_cast<uint8_t>(size);
std::copy(data, data + size, string.data());
}
inline PDFInplaceString(const QByteArray& data)
{
Q_ASSERT(data.size() <= MAX_STRING_SIZE);
@ -111,6 +119,44 @@ struct PDFStringRef
QByteArray getString() const;
};
/// This class represents string, which can be inplace string (no memory allocation),
/// or classic byte array string, if not enough space for embedded string.
class PDFFORQTLIBSHARED_EXPORT PDFInplaceOrMemoryString
{
public:
constexpr PDFInplaceOrMemoryString() = default;
explicit PDFInplaceOrMemoryString(const char* string);
explicit PDFInplaceOrMemoryString(QByteArray string);
// Default destructor should be OK
inline ~PDFInplaceOrMemoryString() = default;
// Enforce default copy constructor and default move constructor
constexpr inline PDFInplaceOrMemoryString(const PDFInplaceOrMemoryString&) = default;
constexpr inline PDFInplaceOrMemoryString(PDFInplaceOrMemoryString&&) = default;
// Enforce default copy assignment operator and move assignment operator
constexpr inline PDFInplaceOrMemoryString& operator=(const PDFInplaceOrMemoryString&) = default;
constexpr inline PDFInplaceOrMemoryString& operator=(PDFInplaceOrMemoryString&&) = default;
bool equals(const char* value, size_t length) const;
inline bool operator==(const PDFInplaceOrMemoryString&) const = default;
inline bool operator!=(const PDFInplaceOrMemoryString&) const = default;
inline bool operator==(const QByteArray& value) const { return equals(value.constData(), value.size()); }
inline bool operator==(const char* value) const { return equals(value, std::strlen(value)); }
/// Returns true, if string is inplace (i.e. doesn't allocate memory)
bool isInplace() const;
/// Returns string. If string is inplace, byte array is constructed.
QByteArray getString() const;
private:
std::variant<typename std::monostate, PDFInplaceString, QByteArray> m_value;
};
class PDFFORQTLIBSHARED_EXPORT PDFObject
{
public:
@ -297,7 +343,7 @@ private:
class PDFFORQTLIBSHARED_EXPORT PDFDictionary : public PDFObjectContent
{
public:
using DictionaryEntry = std::pair<QByteArray, PDFObject>;
using DictionaryEntry = std::pair<PDFInplaceOrMemoryString, PDFObject>;
inline constexpr PDFDictionary() = default;
inline PDFDictionary(std::vector<DictionaryEntry>&& dictionary) : m_dictionary(qMove(dictionary)) { }
@ -315,6 +361,11 @@ public:
/// \param key Key
const PDFObject& get(const char* key) const;
/// Returns object for the key. If key is not found in the dictionary,
/// then valid reference to the null object is returned.
/// \param key Key
const PDFObject& get(const PDFInplaceOrMemoryString& key) const;
/// Returns true, if dictionary contains a particular key
/// \param key Key to be found in the dictionary
bool hasKey(const QByteArray& key) const { return find(key) != m_dictionary.cend(); }
@ -326,13 +377,18 @@ public:
/// Adds a new entry to the dictionary.
/// \param key Key
/// \param value Value
void addEntry(QByteArray&& key, PDFObject&& value) { m_dictionary.emplace_back(std::move(key), std::move(value)); }
void addEntry(PDFInplaceOrMemoryString&& key, PDFObject&& value) { m_dictionary.emplace_back(std::move(key), std::move(value)); }
/// Adds a new entry to the dictionary.
/// \param key Key
/// \param value Value
void addEntry(const PDFInplaceOrMemoryString& key, PDFObject&& value) { m_dictionary.emplace_back(key, std::move(value)); }
/// Sets entry value. If entry with given key doesn't exist,
/// then it is created.
/// \param key Key
/// \param value Value
void setEntry(const QByteArray& key, PDFObject&& value);
void setEntry(const PDFInplaceOrMemoryString& key, PDFObject&& value);
/// Returns count of items in the dictionary
size_t getCount() const { return m_dictionary.size(); }
@ -342,7 +398,7 @@ public:
/// Returns n-th key of the dictionary
/// \param index Zero-based index of key in the dictionary
const QByteArray& getKey(size_t index) const { return m_dictionary[index].first; }
const PDFInplaceOrMemoryString& getKey(size_t index) const { return m_dictionary[index].first; }
/// Returns n-th value of the dictionary
/// \param index Zero-based index of value in the dictionary
@ -370,6 +426,16 @@ private:
/// \param key Key to be found
std::vector<DictionaryEntry>::const_iterator find(const char* key) const;
/// Finds an item in the dictionary array, if the item is not in the dictionary,
/// then end iterator is returned.
/// \param key Key to be found
std::vector<DictionaryEntry>::const_iterator find(const PDFInplaceOrMemoryString& key) const;
/// Finds an item in the dictionary array, if the item is not in the dictionary,
/// then end iterator is returned.
/// \param key Key to be found
std::vector<DictionaryEntry>::iterator find(const PDFInplaceOrMemoryString& key);
std::vector<DictionaryEntry> m_dictionary;
};

View File

@ -537,7 +537,7 @@ bool PDFOptimizer::performRecompressFlateStreams()
{
bytesSaved += currentBytesSaved;
PDFDictionary updatedDictionary = *dictionary;
updatedDictionary.setEntry("Length", PDFObject::createInteger(recompressedData.size()));
updatedDictionary.setEntry(PDFInplaceOrMemoryString("Length"), PDFObject::createInteger(recompressedData.size()));
entry.object = PDFObject::createStream(std::make_shared<PDFStream>(qMove(updatedDictionary), qMove(recompressedData)));
}
}

View File

@ -489,7 +489,7 @@ void PDFPageContentProcessor::processContent(const QByteArray& content)
}
}
dictionary->addEntry(qMove(name), qMove(valueObject));
dictionary->addEntry(PDFInplaceOrMemoryString(qMove(name)), qMove(valueObject));
}
PDFDocumentDataLoaderDecorator loader(m_document);

View File

@ -802,7 +802,7 @@ PDFObject PDFParser::getObject()
// Second value should be a value
PDFObject object = getObject();
dictionary->addEntry(std::move(key), std::move(object));
dictionary->addEntry(PDFInplaceOrMemoryString(std::move(key)), std::move(object));
}
// Now, we should reach dictionary end. If it is not the case, then end of stream occured.

View File

@ -172,7 +172,7 @@ void PDFDecryptObjectVisitor::visitStream(const PDFStream* stream)
else
{
decryptedData = *stream->getContent();
decryptedDictionary.addEntry(PDFSecurityHandler::OBJECT_REFERENCE_DICTIONARY_NAME, PDFObject::createReference(m_reference));
decryptedDictionary.addEntry(PDFInplaceOrMemoryString(PDFSecurityHandler::OBJECT_REFERENCE_DICTIONARY_NAME), PDFObject::createReference(m_reference));
}
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(decryptedDictionary), qMove(decryptedData))));
@ -358,7 +358,7 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj
const PDFDictionary* cryptFilters = cryptFilterObjects.getDictionary();
for (size_t i = 0, cryptFilterCount = cryptFilters->getCount(); i < cryptFilterCount; ++i)
{
handler.m_cryptFilters[cryptFilters->getKey(i)] = parseCryptFilter(cryptFilters->getValue(i));
handler.m_cryptFilters[cryptFilters->getKey(i).getString()] = parseCryptFilter(cryptFilters->getValue(i));
}
}

View File

@ -162,10 +162,13 @@ void PDFStatisticsCollector::collectStatisticsOfDictionary(Statistics& statistic
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
const QByteArray& key = dictionary->getKey(i);
if (!dictionary->getKey(i).isInplace())
{
QByteArray key = dictionary->getKey(i).getString();
consumptionEstimate += key.size() * sizeof(char);
overheadEstimate += (key.capacity() - key.size()) * sizeof(char);
consumptionEstimate += key.size() * sizeof(char);
overheadEstimate += (key.capacity() - key.size()) * sizeof(char);
}
}
statistics.memoryConsumptionEstimate += consumptionEstimate;

View File

@ -307,7 +307,7 @@ void PDFDocumentPropertiesDialog::initializeFonts(const pdf::PDFDocument* docume
if (fontName.isEmpty())
{
fontName = QString::fromLatin1(fontsDictionary->getKey(i));
fontName = QString::fromLatin1(fontsDictionary->getKey(i).getString());
}
std::unique_ptr<QTreeWidgetItem> fontRootItemPtr = std::make_unique<QTreeWidgetItem>(QStringList({ fontName }));