diff --git a/PdfForQtLib/PdfForQtLib.pro b/PdfForQtLib/PdfForQtLib.pro index ef976b4..8e39ebc 100644 --- a/PdfForQtLib/PdfForQtLib.pro +++ b/PdfForQtLib/PdfForQtLib.pro @@ -49,7 +49,8 @@ HEADERS += \ sources/pdfconstants.h \ sources/pdfdocument.h \ sources/pdfdocumentreader.h \ - sources/pdfxreftable.h + sources/pdfxreftable.h \ + sources/pdfflatmap.h unix { target.path = /usr/lib diff --git a/PdfForQtLib/sources/pdfdocumentreader.cpp b/PdfForQtLib/sources/pdfdocumentreader.cpp index a4cbe5b..6701d77 100644 --- a/PdfForQtLib/sources/pdfdocumentreader.cpp +++ b/PdfForQtLib/sources/pdfdocumentreader.cpp @@ -171,15 +171,13 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) PDFXRefTable xrefTable; xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset); - PDFParsingContext context; - // This lambda function fetches object from the buffer from the specified offset. // Can throw exception, returns a pair of scanned reference and object content. - auto getObject = [&buffer, &context](PDFInteger offset, PDFObjectReference reference) -> PDFObject + auto getObject = [&buffer](PDFParsingContext* context, PDFInteger offset, PDFObjectReference reference) -> PDFObject { - PDFParsingContext::PDFParsingContextGuard guard(&context, reference); + PDFParsingContext::PDFParsingContextGuard guard(context, reference); - PDFParser parser(buffer, &context, PDFParser::AllowStreams); + PDFParser parser(buffer, context, PDFParser::AllowStreams); parser.seek(offset); PDFObject objectNumber = parser.getObject(); @@ -211,7 +209,7 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) return object; }; - auto objectFetcher = [&getObject, &xrefTable](PDFObjectReference reference) -> PDFObject + auto objectFetcher = [&getObject, &xrefTable](PDFParsingContext* context, PDFObjectReference reference) -> PDFObject { const PDFXRefTable::Entry& entry = xrefTable.getEntry(reference); switch (entry.type) @@ -222,24 +220,25 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) case PDFXRefTable::EntryType::Occupied: { Q_ASSERT(entry.reference == reference); - return getObject(entry.offset, reference); + return getObject(context, entry.offset, reference); } default: + { Q_ASSERT(false); break; + } } return PDFObject(); }; - context.setObjectFetcher(objectFetcher); PDFObjectStorage::PDFObjects objects; objects.resize(xrefTable.getSize()); std::vector occupiedEntries = xrefTable.getOccupiedEntries(); - auto processEntry = [this, &getObject, &objects](const PDFXRefTable::Entry& entry) + auto processEntry = [this, &getObject, &objectFetcher, &objects](const PDFXRefTable::Entry& entry) { Q_ASSERT(entry.type == PDFXRefTable::EntryType::Occupied); @@ -247,7 +246,8 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) { try { - objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(entry.offset, entry.reference)); + PDFParsingContext context(objectFetcher); + objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(&context, entry.offset, entry.reference)); } catch (PDFParserException exception) { @@ -259,7 +259,6 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) }; // Now, we are ready to scan all objects - //std::for_each::const_iterator, decltype(processEntry)>(occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry); std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry); } catch (PDFParserException parserException) diff --git a/PdfForQtLib/sources/pdfflatmap.h b/PdfForQtLib/sources/pdfflatmap.h new file mode 100644 index 0000000..14da266 --- /dev/null +++ b/PdfForQtLib/sources/pdfflatmap.h @@ -0,0 +1,130 @@ +// Copyright (C) 2018 Jakub Melka +// +// This file is part of PdfForQt. +// +// PdfForQt is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// PdfForQt is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with PDFForQt. If not, see . + + +#ifndef PDFFLATMAP_H +#define PDFFLATMAP_H + +#include +#include +#include + +namespace pdf +{ + +/// This class behaves like std::set, but have "flat" part, and if size of the set +/// is small (smaller than \p FlatSize), then no memory allocation is needed. This +/// container supports inserting, deleting and searching for the object presence. +template +class PDFFlatMap +{ +public: + constexpr inline PDFFlatMap(); + + /// Inserts a key in the container. Checks, if key is already present + /// in the container, in this case no insertion occurs. + /// \param key Key to be inserted + void insert(const Key& key); + + /// Erases a key in the container, if it is in the set + /// \param key Key to be erased + void erase(const Key& key); + + /// Searchs for a given key. If it is found, true is returned, false otherwise. + /// \param key Key to be searched + bool search(const Key& key) const; + + /// Returns size of the container + std::size_t size() const; + + /// Returns true, if container is empty + bool empty() const; + +private: + /// Flat part of the set + std::array m_flat; + + /// This iterator points to first empty position, or it is + /// the last iterator (pointing to the end of the array). + typename std::array::iterator m_flatEmptyPosition; + + std::set m_overflowContainer; +}; + +template +constexpr PDFFlatMap::PDFFlatMap() : + m_flat(), + m_flatEmptyPosition(m_flat.begin()), + m_overflowContainer() +{ + +} + +template +void PDFFlatMap::insert(const Key& key) +{ + if (!search(key)) + { + // Try to insert key in the flat part, if possible (we are not at end of the array) + if (m_flatEmptyPosition != m_flat.end()) + { + *m_flatEmptyPosition++ = key; + } + else + { + m_overflowContainer.insert(key); + } + } +} + +template +void PDFFlatMap::erase(const Key& key) +{ + // First we must check, if the key is present in the flat part. If yes, then remove key + // from the flat part and try to move one item from the overflow part to the flat part, if possible. + // Otherwise check overflow part. + m_flatEmptyPosition = std::remove_if(m_flat.begin(), m_flatEmptyPosition, [&key](const Key& otherKey) { return key == otherKey; }); + m_overflowContainer.erase(key); + + if (!m_overflowContainer.empty() && m_flatEmptyPosition != m_flat.end()) + { + *m_flatEmptyPosition++ = *m_overflowContainer.begin(); + m_overflowContainer.erase(m_overflowContainer.begin()); + } +} + +template +bool PDFFlatMap::search(const Key& key) const +{ + return std::find::const_iterator, Key>(m_flat.begin(), m_flatEmptyPosition, key) != m_flatEmptyPosition || static_cast(m_overflowContainer.count(key)); +} + +template +std::size_t PDFFlatMap::size() const +{ + return std::distance::const_iterator>(m_flat.begin(), m_flatEmptyPosition) + m_overflowContainer.size(); +} + +template +bool PDFFlatMap::empty() const +{ + return size() == 0; +} + +} // namespace pdf + +#endif // PDFFLATMAP_H diff --git a/PdfForQtLib/sources/pdfparser.cpp b/PdfForQtLib/sources/pdfparser.cpp index 324eb37..e00b5c5 100644 --- a/PdfForQtLib/sources/pdfparser.cpp +++ b/PdfForQtLib/sources/pdfparser.cpp @@ -592,12 +592,12 @@ void PDFLexicalAnalyzer::error(const QString& message) const throw PDFParserException(tr("Error near position %1. %2").arg(distance).arg(message)); } -PDFObject PDFParsingContext::getObject(const PDFObject& object) const +PDFObject PDFParsingContext::getObject(const PDFObject& object) { if (object.isReference()) { Q_ASSERT(m_objectFetcher); - return m_objectFetcher(object.getReference()); + return m_objectFetcher(this, object.getReference()); } return object; @@ -605,26 +605,20 @@ PDFObject PDFParsingContext::getObject(const PDFObject& object) const void PDFParsingContext::beginParsingObject(PDFObjectReference reference) { - QMutexLocker lock(&m_mutex); - - Key key(QThread::currentThreadId(), reference); - if (m_activeParsedObjectSet.count(key)) + if (m_activeParsedObjectSet.search(reference)) { throw PDFParserException(tr("Cyclical reference found while parsing object %1 %2.").arg(reference.objectNumber).arg(reference.generation)); } else { - m_activeParsedObjectSet.insert(key); + m_activeParsedObjectSet.insert(reference); } } void PDFParsingContext::endParsingObject(PDFObjectReference reference) { - QMutexLocker lock(&m_mutex); - - Key key(QThread::currentThreadId(), reference); - Q_ASSERT(m_activeParsedObjectSet.count(key)); - m_activeParsedObjectSet.erase(key); + Q_ASSERT(m_activeParsedObjectSet.search(reference)); + m_activeParsedObjectSet.erase(reference); } PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) : diff --git a/PdfForQtLib/sources/pdfparser.h b/PdfForQtLib/sources/pdfparser.h index 5d79afb..c7a0ebd 100644 --- a/PdfForQtLib/sources/pdfparser.h +++ b/PdfForQtLib/sources/pdfparser.h @@ -21,9 +21,9 @@ #include "pdfglobal.h" #include "pdfobject.h" +#include "pdfflatmap.h" #include -#include #include #include @@ -208,7 +208,11 @@ class PDFParsingContext Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext) public: - explicit PDFParsingContext() = default; + explicit PDFParsingContext(std::function objectFetcher) : + m_objectFetcher(std::move(objectFetcher)) + { + + } /// Guard guarding the cyclical references. class PDFParsingContextGuard @@ -233,34 +237,19 @@ public: /// Returns dereferenced object, if object is a reference. If it is not a reference, /// then same object is returned. - PDFObject getObject(const PDFObject& object) const; - - /// Sets function which provides object fetching - void setObjectFetcher(std::function objectFetcher) { m_objectFetcher = std::move(objectFetcher); } + PDFObject getObject(const PDFObject& object); private: void beginParsingObject(PDFObjectReference reference); void endParsingObject(PDFObjectReference reference); - struct Key - { - constexpr inline Key() = default; - constexpr inline Key(Qt::HANDLE threadContext, PDFObjectReference reference) : threadContext(threadContext), reference(reference) { } - - Qt::HANDLE threadContext = nullptr; - PDFObjectReference reference; - - inline bool operator<(const Key& other) const { return std::tie(threadContext, reference) < std::tie(other.threadContext, other.reference); } - }; + using KeySet = PDFFlatMap; /// This function fetches object, if it is needed - std::function m_objectFetcher; + std::function m_objectFetcher; /// Set containing objects currently being parsed. - std::set m_activeParsedObjectSet; - - /// Mutex protecting object for multiple thread access - QMutex m_mutex; + KeySet m_activeParsedObjectSet; }; /// Class for parsing objects. Checks cyclical references. If diff --git a/PdfForQtLib/sources/pdfxreftable.cpp b/PdfForQtLib/sources/pdfxreftable.cpp index 194ea92..f2bd14b 100644 --- a/PdfForQtLib/sources/pdfxreftable.cpp +++ b/PdfForQtLib/sources/pdfxreftable.cpp @@ -162,7 +162,7 @@ std::vector PDFXRefTable::getOccupiedEntries() const const PDFXRefTable::Entry& PDFXRefTable::getEntry(PDFObjectReference reference) const { // We must also check generation number here. For this reason, we compare references of the entry at given position. - if (reference.objectNumber >= 0 && reference.objectNumber < m_entries.size() && m_entries[reference.objectNumber].reference == reference) + if (reference.objectNumber >= 0 && reference.objectNumber < static_cast(m_entries.size()) && m_entries[reference.objectNumber].reference == reference) { return m_entries[reference.objectNumber]; } diff --git a/PdfForQtViewer/PdfForQtViewer.pro b/PdfForQtViewer/PdfForQtViewer.pro index bf74b48..400db9e 100644 --- a/PdfForQtViewer/PdfForQtViewer.pro +++ b/PdfForQtViewer/PdfForQtViewer.pro @@ -42,6 +42,8 @@ HEADERS += \ FORMS += \ pdfviewermainwindow.ui +CONFIG += force_debug_info + # Default rules for deployment. qnx: target.path = /tmp/$${TARGET}/bin else: unix:!android: target.path = /opt/$${TARGET}/bin diff --git a/UnitTests/tst_lexicalanalyzertest.cpp b/UnitTests/tst_lexicalanalyzertest.cpp index 71df4e3..da24916 100644 --- a/UnitTests/tst_lexicalanalyzertest.cpp +++ b/UnitTests/tst_lexicalanalyzertest.cpp @@ -21,6 +21,7 @@ #include "pdfparser.h" #include "pdfconstants.h" +#include "pdfflatmap.h" #include @@ -42,6 +43,7 @@ private slots: void test_command(); void test_invalid_input(); void test_header_regexp(); + void test_flat_map(); private: void scanWholeStream(const char* stream); @@ -236,6 +238,62 @@ void LexicalAnalyzerTest::test_header_regexp() } } +void LexicalAnalyzerTest::test_flat_map() +{ + using Map = pdf::PDFFlatMap; + + struct Item + { + int order; + int number; + bool erase; + + bool operator<(const Item& other) const { return order < other.order; } + }; + + for (int count = 1; count < 5; ++count) + { + std::vector items; + items.reserve(2 * count); + + int order = 0; + for (int i = 0; i < count; ++i) + { + items.emplace_back(Item{order++, i, false}); + items.emplace_back(Item{order++, i, true}); + } + + do + { + std::set testSet; + Map testFlatMap; + + for (const Item& item : items) + { + if (!item.erase) + { + testSet.insert(item.number); + testFlatMap.insert(item.number); + } + else + { + testSet.erase(item.number); + testFlatMap.erase(item.number); + } + + QCOMPARE(testSet.size(), testFlatMap.size()); + QCOMPARE(testSet.empty(), testFlatMap.empty()); + + for (const int testInteger : testSet) + { + QVERIFY(testFlatMap.search(testInteger)); + } + } + + } while (std::next_permutation(items.begin(), items.end())); + } +} + void LexicalAnalyzerTest::scanWholeStream(const char* stream) { pdf::PDFLexicalAnalyzer analyzer(stream, stream + strlen(stream));