mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Using flat map instead of std::set, optimalization
This commit is contained in:
@ -49,7 +49,8 @@ HEADERS += \
|
||||
sources/pdfconstants.h \
|
||||
sources/pdfdocument.h \
|
||||
sources/pdfdocumentreader.h \
|
||||
sources/pdfxreftable.h
|
||||
sources/pdfxreftable.h \
|
||||
sources/pdfflatmap.h
|
||||
|
||||
unix {
|
||||
target.path = /usr/lib
|
||||
|
@ -171,15 +171,13 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
||||
PDFXRefTable xrefTable;
|
||||
xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset);
|
||||
|
||||
PDFParsingContext context;
|
||||
|
||||
// This lambda function fetches object from the buffer from the specified offset.
|
||||
// Can throw exception, returns a pair of scanned reference and object content.
|
||||
auto getObject = [&buffer, &context](PDFInteger offset, PDFObjectReference reference) -> PDFObject
|
||||
auto getObject = [&buffer](PDFParsingContext* context, PDFInteger offset, PDFObjectReference reference) -> PDFObject
|
||||
{
|
||||
PDFParsingContext::PDFParsingContextGuard guard(&context, reference);
|
||||
PDFParsingContext::PDFParsingContextGuard guard(context, reference);
|
||||
|
||||
PDFParser parser(buffer, &context, PDFParser::AllowStreams);
|
||||
PDFParser parser(buffer, context, PDFParser::AllowStreams);
|
||||
parser.seek(offset);
|
||||
|
||||
PDFObject objectNumber = parser.getObject();
|
||||
@ -211,7 +209,7 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
||||
return object;
|
||||
};
|
||||
|
||||
auto objectFetcher = [&getObject, &xrefTable](PDFObjectReference reference) -> PDFObject
|
||||
auto objectFetcher = [&getObject, &xrefTable](PDFParsingContext* context, PDFObjectReference reference) -> PDFObject
|
||||
{
|
||||
const PDFXRefTable::Entry& entry = xrefTable.getEntry(reference);
|
||||
switch (entry.type)
|
||||
@ -222,24 +220,25 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
||||
case PDFXRefTable::EntryType::Occupied:
|
||||
{
|
||||
Q_ASSERT(entry.reference == reference);
|
||||
return getObject(entry.offset, reference);
|
||||
return getObject(context, entry.offset, reference);
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return PDFObject();
|
||||
};
|
||||
context.setObjectFetcher(objectFetcher);
|
||||
|
||||
PDFObjectStorage::PDFObjects objects;
|
||||
objects.resize(xrefTable.getSize());
|
||||
|
||||
std::vector<PDFXRefTable::Entry> occupiedEntries = xrefTable.getOccupiedEntries();
|
||||
|
||||
auto processEntry = [this, &getObject, &objects](const PDFXRefTable::Entry& entry)
|
||||
auto processEntry = [this, &getObject, &objectFetcher, &objects](const PDFXRefTable::Entry& entry)
|
||||
{
|
||||
Q_ASSERT(entry.type == PDFXRefTable::EntryType::Occupied);
|
||||
|
||||
@ -247,7 +246,8 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
||||
{
|
||||
try
|
||||
{
|
||||
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(entry.offset, entry.reference));
|
||||
PDFParsingContext context(objectFetcher);
|
||||
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(&context, entry.offset, entry.reference));
|
||||
}
|
||||
catch (PDFParserException exception)
|
||||
{
|
||||
@ -259,7 +259,6 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
||||
};
|
||||
|
||||
// Now, we are ready to scan all objects
|
||||
//std::for_each<std::execution::parallel_policy, std::vector<PDFXRefTable::Entry>::const_iterator, decltype(processEntry)>(occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
|
||||
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
|
||||
}
|
||||
catch (PDFParserException parserException)
|
||||
|
130
PdfForQtLib/sources/pdfflatmap.h
Normal file
130
PdfForQtLib/sources/pdfflatmap.h
Normal file
@ -0,0 +1,130 @@
|
||||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#ifndef PDFFLATMAP_H
|
||||
#define PDFFLATMAP_H
|
||||
|
||||
#include <set>
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
/// This class behaves like std::set, but have "flat" part, and if size of the set
|
||||
/// is small (smaller than \p FlatSize), then no memory allocation is needed. This
|
||||
/// container supports inserting, deleting and searching for the object presence.
|
||||
template<typename Key, int FlatSize>
|
||||
class PDFFlatMap
|
||||
{
|
||||
public:
|
||||
constexpr inline PDFFlatMap();
|
||||
|
||||
/// Inserts a key in the container. Checks, if key is already present
|
||||
/// in the container, in this case no insertion occurs.
|
||||
/// \param key Key to be inserted
|
||||
void insert(const Key& key);
|
||||
|
||||
/// Erases a key in the container, if it is in the set
|
||||
/// \param key Key to be erased
|
||||
void erase(const Key& key);
|
||||
|
||||
/// Searchs for a given key. If it is found, true is returned, false otherwise.
|
||||
/// \param key Key to be searched
|
||||
bool search(const Key& key) const;
|
||||
|
||||
/// Returns size of the container
|
||||
std::size_t size() const;
|
||||
|
||||
/// Returns true, if container is empty
|
||||
bool empty() const;
|
||||
|
||||
private:
|
||||
/// Flat part of the set
|
||||
std::array<Key, FlatSize> m_flat;
|
||||
|
||||
/// This iterator points to first empty position, or it is
|
||||
/// the last iterator (pointing to the end of the array).
|
||||
typename std::array<Key, FlatSize>::iterator m_flatEmptyPosition;
|
||||
|
||||
std::set<Key> m_overflowContainer;
|
||||
};
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
constexpr PDFFlatMap<Key, FlatSize>::PDFFlatMap() :
|
||||
m_flat(),
|
||||
m_flatEmptyPosition(m_flat.begin()),
|
||||
m_overflowContainer()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
void PDFFlatMap<Key, FlatSize>::insert(const Key& key)
|
||||
{
|
||||
if (!search(key))
|
||||
{
|
||||
// Try to insert key in the flat part, if possible (we are not at end of the array)
|
||||
if (m_flatEmptyPosition != m_flat.end())
|
||||
{
|
||||
*m_flatEmptyPosition++ = key;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_overflowContainer.insert(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
void PDFFlatMap<Key, FlatSize>::erase(const Key& key)
|
||||
{
|
||||
// First we must check, if the key is present in the flat part. If yes, then remove key
|
||||
// from the flat part and try to move one item from the overflow part to the flat part, if possible.
|
||||
// Otherwise check overflow part.
|
||||
m_flatEmptyPosition = std::remove_if(m_flat.begin(), m_flatEmptyPosition, [&key](const Key& otherKey) { return key == otherKey; });
|
||||
m_overflowContainer.erase(key);
|
||||
|
||||
if (!m_overflowContainer.empty() && m_flatEmptyPosition != m_flat.end())
|
||||
{
|
||||
*m_flatEmptyPosition++ = *m_overflowContainer.begin();
|
||||
m_overflowContainer.erase(m_overflowContainer.begin());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
bool PDFFlatMap<Key, FlatSize>::search(const Key& key) const
|
||||
{
|
||||
return std::find<typename std::array<Key, FlatSize>::const_iterator, Key>(m_flat.begin(), m_flatEmptyPosition, key) != m_flatEmptyPosition || static_cast<bool>(m_overflowContainer.count(key));
|
||||
}
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
std::size_t PDFFlatMap<Key, FlatSize>::size() const
|
||||
{
|
||||
return std::distance<typename std::array<Key, FlatSize>::const_iterator>(m_flat.begin(), m_flatEmptyPosition) + m_overflowContainer.size();
|
||||
}
|
||||
|
||||
template<typename Key, int FlatSize>
|
||||
bool PDFFlatMap<Key, FlatSize>::empty() const
|
||||
{
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFFLATMAP_H
|
@ -592,12 +592,12 @@ void PDFLexicalAnalyzer::error(const QString& message) const
|
||||
throw PDFParserException(tr("Error near position %1. %2").arg(distance).arg(message));
|
||||
}
|
||||
|
||||
PDFObject PDFParsingContext::getObject(const PDFObject& object) const
|
||||
PDFObject PDFParsingContext::getObject(const PDFObject& object)
|
||||
{
|
||||
if (object.isReference())
|
||||
{
|
||||
Q_ASSERT(m_objectFetcher);
|
||||
return m_objectFetcher(object.getReference());
|
||||
return m_objectFetcher(this, object.getReference());
|
||||
}
|
||||
|
||||
return object;
|
||||
@ -605,26 +605,20 @@ PDFObject PDFParsingContext::getObject(const PDFObject& object) const
|
||||
|
||||
void PDFParsingContext::beginParsingObject(PDFObjectReference reference)
|
||||
{
|
||||
QMutexLocker lock(&m_mutex);
|
||||
|
||||
Key key(QThread::currentThreadId(), reference);
|
||||
if (m_activeParsedObjectSet.count(key))
|
||||
if (m_activeParsedObjectSet.search(reference))
|
||||
{
|
||||
throw PDFParserException(tr("Cyclical reference found while parsing object %1 %2.").arg(reference.objectNumber).arg(reference.generation));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_activeParsedObjectSet.insert(key);
|
||||
m_activeParsedObjectSet.insert(reference);
|
||||
}
|
||||
}
|
||||
|
||||
void PDFParsingContext::endParsingObject(PDFObjectReference reference)
|
||||
{
|
||||
QMutexLocker lock(&m_mutex);
|
||||
|
||||
Key key(QThread::currentThreadId(), reference);
|
||||
Q_ASSERT(m_activeParsedObjectSet.count(key));
|
||||
m_activeParsedObjectSet.erase(key);
|
||||
Q_ASSERT(m_activeParsedObjectSet.search(reference));
|
||||
m_activeParsedObjectSet.erase(reference);
|
||||
}
|
||||
|
||||
PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) :
|
||||
|
@ -21,9 +21,9 @@
|
||||
|
||||
#include "pdfglobal.h"
|
||||
#include "pdfobject.h"
|
||||
#include "pdfflatmap.h"
|
||||
|
||||
#include <QtCore>
|
||||
#include <QMutex>
|
||||
#include <QVariant>
|
||||
#include <QByteArray>
|
||||
|
||||
@ -208,7 +208,11 @@ class PDFParsingContext
|
||||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext)
|
||||
|
||||
public:
|
||||
explicit PDFParsingContext() = default;
|
||||
explicit PDFParsingContext(std::function<PDFObject(PDFParsingContext*, PDFObjectReference)> objectFetcher) :
|
||||
m_objectFetcher(std::move(objectFetcher))
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/// Guard guarding the cyclical references.
|
||||
class PDFParsingContextGuard
|
||||
@ -233,34 +237,19 @@ public:
|
||||
|
||||
/// Returns dereferenced object, if object is a reference. If it is not a reference,
|
||||
/// then same object is returned.
|
||||
PDFObject getObject(const PDFObject& object) const;
|
||||
|
||||
/// Sets function which provides object fetching
|
||||
void setObjectFetcher(std::function<PDFObject(PDFObjectReference)> objectFetcher) { m_objectFetcher = std::move(objectFetcher); }
|
||||
PDFObject getObject(const PDFObject& object);
|
||||
|
||||
private:
|
||||
void beginParsingObject(PDFObjectReference reference);
|
||||
void endParsingObject(PDFObjectReference reference);
|
||||
|
||||
struct Key
|
||||
{
|
||||
constexpr inline Key() = default;
|
||||
constexpr inline Key(Qt::HANDLE threadContext, PDFObjectReference reference) : threadContext(threadContext), reference(reference) { }
|
||||
|
||||
Qt::HANDLE threadContext = nullptr;
|
||||
PDFObjectReference reference;
|
||||
|
||||
inline bool operator<(const Key& other) const { return std::tie(threadContext, reference) < std::tie(other.threadContext, other.reference); }
|
||||
};
|
||||
using KeySet = PDFFlatMap<PDFObjectReference, 2>;
|
||||
|
||||
/// This function fetches object, if it is needed
|
||||
std::function<PDFObject(PDFObjectReference)> m_objectFetcher;
|
||||
std::function<PDFObject(PDFParsingContext*, PDFObjectReference)> m_objectFetcher;
|
||||
|
||||
/// Set containing objects currently being parsed.
|
||||
std::set<Key> m_activeParsedObjectSet;
|
||||
|
||||
/// Mutex protecting object for multiple thread access
|
||||
QMutex m_mutex;
|
||||
KeySet m_activeParsedObjectSet;
|
||||
};
|
||||
|
||||
/// Class for parsing objects. Checks cyclical references. If
|
||||
|
@ -162,7 +162,7 @@ std::vector<PDFXRefTable::Entry> PDFXRefTable::getOccupiedEntries() const
|
||||
const PDFXRefTable::Entry& PDFXRefTable::getEntry(PDFObjectReference reference) const
|
||||
{
|
||||
// We must also check generation number here. For this reason, we compare references of the entry at given position.
|
||||
if (reference.objectNumber >= 0 && reference.objectNumber < m_entries.size() && m_entries[reference.objectNumber].reference == reference)
|
||||
if (reference.objectNumber >= 0 && reference.objectNumber < static_cast<PDFInteger>(m_entries.size()) && m_entries[reference.objectNumber].reference == reference)
|
||||
{
|
||||
return m_entries[reference.objectNumber];
|
||||
}
|
||||
|
@ -42,6 +42,8 @@ HEADERS += \
|
||||
FORMS += \
|
||||
pdfviewermainwindow.ui
|
||||
|
||||
CONFIG += force_debug_info
|
||||
|
||||
# Default rules for deployment.
|
||||
qnx: target.path = /tmp/$${TARGET}/bin
|
||||
else: unix:!android: target.path = /opt/$${TARGET}/bin
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "pdfparser.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfflatmap.h"
|
||||
|
||||
#include <regex>
|
||||
|
||||
@ -42,6 +43,7 @@ private slots:
|
||||
void test_command();
|
||||
void test_invalid_input();
|
||||
void test_header_regexp();
|
||||
void test_flat_map();
|
||||
|
||||
private:
|
||||
void scanWholeStream(const char* stream);
|
||||
@ -236,6 +238,62 @@ void LexicalAnalyzerTest::test_header_regexp()
|
||||
}
|
||||
}
|
||||
|
||||
void LexicalAnalyzerTest::test_flat_map()
|
||||
{
|
||||
using Map = pdf::PDFFlatMap<int, 2>;
|
||||
|
||||
struct Item
|
||||
{
|
||||
int order;
|
||||
int number;
|
||||
bool erase;
|
||||
|
||||
bool operator<(const Item& other) const { return order < other.order; }
|
||||
};
|
||||
|
||||
for (int count = 1; count < 5; ++count)
|
||||
{
|
||||
std::vector<Item> items;
|
||||
items.reserve(2 * count);
|
||||
|
||||
int order = 0;
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
items.emplace_back(Item{order++, i, false});
|
||||
items.emplace_back(Item{order++, i, true});
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
std::set<int> testSet;
|
||||
Map testFlatMap;
|
||||
|
||||
for (const Item& item : items)
|
||||
{
|
||||
if (!item.erase)
|
||||
{
|
||||
testSet.insert(item.number);
|
||||
testFlatMap.insert(item.number);
|
||||
}
|
||||
else
|
||||
{
|
||||
testSet.erase(item.number);
|
||||
testFlatMap.erase(item.number);
|
||||
}
|
||||
|
||||
QCOMPARE(testSet.size(), testFlatMap.size());
|
||||
QCOMPARE(testSet.empty(), testFlatMap.empty());
|
||||
|
||||
for (const int testInteger : testSet)
|
||||
{
|
||||
QVERIFY(testFlatMap.search(testInteger));
|
||||
}
|
||||
}
|
||||
|
||||
} while (std::next_permutation(items.begin(), items.end()));
|
||||
}
|
||||
}
|
||||
|
||||
void LexicalAnalyzerTest::scanWholeStream(const char* stream)
|
||||
{
|
||||
pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));
|
||||
|
Reference in New Issue
Block a user