Using flat map instead of std::set, optimalization

This commit is contained in:
Jakub Melka
2018-11-25 17:57:39 +01:00
parent 89d4ee606b
commit 670a260265
8 changed files with 219 additions and 46 deletions

View File

@ -49,7 +49,8 @@ HEADERS += \
sources/pdfconstants.h \
sources/pdfdocument.h \
sources/pdfdocumentreader.h \
sources/pdfxreftable.h
sources/pdfxreftable.h \
sources/pdfflatmap.h
unix {
target.path = /usr/lib

View File

@ -171,15 +171,13 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
PDFXRefTable xrefTable;
xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset);
PDFParsingContext context;
// This lambda function fetches object from the buffer from the specified offset.
// Can throw exception, returns a pair of scanned reference and object content.
auto getObject = [&buffer, &context](PDFInteger offset, PDFObjectReference reference) -> PDFObject
auto getObject = [&buffer](PDFParsingContext* context, PDFInteger offset, PDFObjectReference reference) -> PDFObject
{
PDFParsingContext::PDFParsingContextGuard guard(&context, reference);
PDFParsingContext::PDFParsingContextGuard guard(context, reference);
PDFParser parser(buffer, &context, PDFParser::AllowStreams);
PDFParser parser(buffer, context, PDFParser::AllowStreams);
parser.seek(offset);
PDFObject objectNumber = parser.getObject();
@ -211,7 +209,7 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
return object;
};
auto objectFetcher = [&getObject, &xrefTable](PDFObjectReference reference) -> PDFObject
auto objectFetcher = [&getObject, &xrefTable](PDFParsingContext* context, PDFObjectReference reference) -> PDFObject
{
const PDFXRefTable::Entry& entry = xrefTable.getEntry(reference);
switch (entry.type)
@ -222,24 +220,25 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
case PDFXRefTable::EntryType::Occupied:
{
Q_ASSERT(entry.reference == reference);
return getObject(entry.offset, reference);
return getObject(context, entry.offset, reference);
}
default:
{
Q_ASSERT(false);
break;
}
}
return PDFObject();
};
context.setObjectFetcher(objectFetcher);
PDFObjectStorage::PDFObjects objects;
objects.resize(xrefTable.getSize());
std::vector<PDFXRefTable::Entry> occupiedEntries = xrefTable.getOccupiedEntries();
auto processEntry = [this, &getObject, &objects](const PDFXRefTable::Entry& entry)
auto processEntry = [this, &getObject, &objectFetcher, &objects](const PDFXRefTable::Entry& entry)
{
Q_ASSERT(entry.type == PDFXRefTable::EntryType::Occupied);
@ -247,7 +246,8 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
{
try
{
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(entry.offset, entry.reference));
PDFParsingContext context(objectFetcher);
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(&context, entry.offset, entry.reference));
}
catch (PDFParserException exception)
{
@ -259,7 +259,6 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
};
// Now, we are ready to scan all objects
//std::for_each<std::execution::parallel_policy, std::vector<PDFXRefTable::Entry>::const_iterator, decltype(processEntry)>(occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
}
catch (PDFParserException parserException)

View File

@ -0,0 +1,130 @@
// Copyright (C) 2018 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFFLATMAP_H
#define PDFFLATMAP_H
#include <set>
#include <array>
#include <algorithm>
namespace pdf
{
/// This class behaves like std::set, but have "flat" part, and if size of the set
/// is small (smaller than \p FlatSize), then no memory allocation is needed. This
/// container supports inserting, deleting and searching for the object presence.
template<typename Key, int FlatSize>
class PDFFlatMap
{
public:
constexpr inline PDFFlatMap();
/// Inserts a key in the container. Checks, if key is already present
/// in the container, in this case no insertion occurs.
/// \param key Key to be inserted
void insert(const Key& key);
/// Erases a key in the container, if it is in the set
/// \param key Key to be erased
void erase(const Key& key);
/// Searchs for a given key. If it is found, true is returned, false otherwise.
/// \param key Key to be searched
bool search(const Key& key) const;
/// Returns size of the container
std::size_t size() const;
/// Returns true, if container is empty
bool empty() const;
private:
/// Flat part of the set
std::array<Key, FlatSize> m_flat;
/// This iterator points to first empty position, or it is
/// the last iterator (pointing to the end of the array).
typename std::array<Key, FlatSize>::iterator m_flatEmptyPosition;
std::set<Key> m_overflowContainer;
};
template<typename Key, int FlatSize>
constexpr PDFFlatMap<Key, FlatSize>::PDFFlatMap() :
m_flat(),
m_flatEmptyPosition(m_flat.begin()),
m_overflowContainer()
{
}
template<typename Key, int FlatSize>
void PDFFlatMap<Key, FlatSize>::insert(const Key& key)
{
if (!search(key))
{
// Try to insert key in the flat part, if possible (we are not at end of the array)
if (m_flatEmptyPosition != m_flat.end())
{
*m_flatEmptyPosition++ = key;
}
else
{
m_overflowContainer.insert(key);
}
}
}
template<typename Key, int FlatSize>
void PDFFlatMap<Key, FlatSize>::erase(const Key& key)
{
// First we must check, if the key is present in the flat part. If yes, then remove key
// from the flat part and try to move one item from the overflow part to the flat part, if possible.
// Otherwise check overflow part.
m_flatEmptyPosition = std::remove_if(m_flat.begin(), m_flatEmptyPosition, [&key](const Key& otherKey) { return key == otherKey; });
m_overflowContainer.erase(key);
if (!m_overflowContainer.empty() && m_flatEmptyPosition != m_flat.end())
{
*m_flatEmptyPosition++ = *m_overflowContainer.begin();
m_overflowContainer.erase(m_overflowContainer.begin());
}
}
template<typename Key, int FlatSize>
bool PDFFlatMap<Key, FlatSize>::search(const Key& key) const
{
return std::find<typename std::array<Key, FlatSize>::const_iterator, Key>(m_flat.begin(), m_flatEmptyPosition, key) != m_flatEmptyPosition || static_cast<bool>(m_overflowContainer.count(key));
}
template<typename Key, int FlatSize>
std::size_t PDFFlatMap<Key, FlatSize>::size() const
{
return std::distance<typename std::array<Key, FlatSize>::const_iterator>(m_flat.begin(), m_flatEmptyPosition) + m_overflowContainer.size();
}
template<typename Key, int FlatSize>
bool PDFFlatMap<Key, FlatSize>::empty() const
{
return size() == 0;
}
} // namespace pdf
#endif // PDFFLATMAP_H

View File

@ -592,12 +592,12 @@ void PDFLexicalAnalyzer::error(const QString& message) const
throw PDFParserException(tr("Error near position %1. %2").arg(distance).arg(message));
}
PDFObject PDFParsingContext::getObject(const PDFObject& object) const
PDFObject PDFParsingContext::getObject(const PDFObject& object)
{
if (object.isReference())
{
Q_ASSERT(m_objectFetcher);
return m_objectFetcher(object.getReference());
return m_objectFetcher(this, object.getReference());
}
return object;
@ -605,26 +605,20 @@ PDFObject PDFParsingContext::getObject(const PDFObject& object) const
void PDFParsingContext::beginParsingObject(PDFObjectReference reference)
{
QMutexLocker lock(&m_mutex);
Key key(QThread::currentThreadId(), reference);
if (m_activeParsedObjectSet.count(key))
if (m_activeParsedObjectSet.search(reference))
{
throw PDFParserException(tr("Cyclical reference found while parsing object %1 %2.").arg(reference.objectNumber).arg(reference.generation));
}
else
{
m_activeParsedObjectSet.insert(key);
m_activeParsedObjectSet.insert(reference);
}
}
void PDFParsingContext::endParsingObject(PDFObjectReference reference)
{
QMutexLocker lock(&m_mutex);
Key key(QThread::currentThreadId(), reference);
Q_ASSERT(m_activeParsedObjectSet.count(key));
m_activeParsedObjectSet.erase(key);
Q_ASSERT(m_activeParsedObjectSet.search(reference));
m_activeParsedObjectSet.erase(reference);
}
PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) :

View File

@ -21,9 +21,9 @@
#include "pdfglobal.h"
#include "pdfobject.h"
#include "pdfflatmap.h"
#include <QtCore>
#include <QMutex>
#include <QVariant>
#include <QByteArray>
@ -208,7 +208,11 @@ class PDFParsingContext
Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext)
public:
explicit PDFParsingContext() = default;
explicit PDFParsingContext(std::function<PDFObject(PDFParsingContext*, PDFObjectReference)> objectFetcher) :
m_objectFetcher(std::move(objectFetcher))
{
}
/// Guard guarding the cyclical references.
class PDFParsingContextGuard
@ -233,34 +237,19 @@ public:
/// Returns dereferenced object, if object is a reference. If it is not a reference,
/// then same object is returned.
PDFObject getObject(const PDFObject& object) const;
/// Sets function which provides object fetching
void setObjectFetcher(std::function<PDFObject(PDFObjectReference)> objectFetcher) { m_objectFetcher = std::move(objectFetcher); }
PDFObject getObject(const PDFObject& object);
private:
void beginParsingObject(PDFObjectReference reference);
void endParsingObject(PDFObjectReference reference);
struct Key
{
constexpr inline Key() = default;
constexpr inline Key(Qt::HANDLE threadContext, PDFObjectReference reference) : threadContext(threadContext), reference(reference) { }
Qt::HANDLE threadContext = nullptr;
PDFObjectReference reference;
inline bool operator<(const Key& other) const { return std::tie(threadContext, reference) < std::tie(other.threadContext, other.reference); }
};
using KeySet = PDFFlatMap<PDFObjectReference, 2>;
/// This function fetches object, if it is needed
std::function<PDFObject(PDFObjectReference)> m_objectFetcher;
std::function<PDFObject(PDFParsingContext*, PDFObjectReference)> m_objectFetcher;
/// Set containing objects currently being parsed.
std::set<Key> m_activeParsedObjectSet;
/// Mutex protecting object for multiple thread access
QMutex m_mutex;
KeySet m_activeParsedObjectSet;
};
/// Class for parsing objects. Checks cyclical references. If

View File

@ -162,7 +162,7 @@ std::vector<PDFXRefTable::Entry> PDFXRefTable::getOccupiedEntries() const
const PDFXRefTable::Entry& PDFXRefTable::getEntry(PDFObjectReference reference) const
{
// We must also check generation number here. For this reason, we compare references of the entry at given position.
if (reference.objectNumber >= 0 && reference.objectNumber < m_entries.size() && m_entries[reference.objectNumber].reference == reference)
if (reference.objectNumber >= 0 && reference.objectNumber < static_cast<PDFInteger>(m_entries.size()) && m_entries[reference.objectNumber].reference == reference)
{
return m_entries[reference.objectNumber];
}

View File

@ -42,6 +42,8 @@ HEADERS += \
FORMS += \
pdfviewermainwindow.ui
CONFIG += force_debug_info
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin

View File

@ -21,6 +21,7 @@
#include "pdfparser.h"
#include "pdfconstants.h"
#include "pdfflatmap.h"
#include <regex>
@ -42,6 +43,7 @@ private slots:
void test_command();
void test_invalid_input();
void test_header_regexp();
void test_flat_map();
private:
void scanWholeStream(const char* stream);
@ -236,6 +238,62 @@ void LexicalAnalyzerTest::test_header_regexp()
}
}
void LexicalAnalyzerTest::test_flat_map()
{
using Map = pdf::PDFFlatMap<int, 2>;
struct Item
{
int order;
int number;
bool erase;
bool operator<(const Item& other) const { return order < other.order; }
};
for (int count = 1; count < 5; ++count)
{
std::vector<Item> items;
items.reserve(2 * count);
int order = 0;
for (int i = 0; i < count; ++i)
{
items.emplace_back(Item{order++, i, false});
items.emplace_back(Item{order++, i, true});
}
do
{
std::set<int> testSet;
Map testFlatMap;
for (const Item& item : items)
{
if (!item.erase)
{
testSet.insert(item.number);
testFlatMap.insert(item.number);
}
else
{
testSet.erase(item.number);
testFlatMap.erase(item.number);
}
QCOMPARE(testSet.size(), testFlatMap.size());
QCOMPARE(testSet.empty(), testFlatMap.empty());
for (const int testInteger : testSet)
{
QVERIFY(testFlatMap.search(testInteger));
}
}
} while (std::next_permutation(items.begin(), items.end()));
}
}
void LexicalAnalyzerTest::scanWholeStream(const char* stream)
{
pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));