Object loading

This commit is contained in:
Jakub Melka 2018-11-25 14:48:08 +01:00
parent 8c93c82228
commit 89d4ee606b
12 changed files with 234 additions and 18 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 4.7.2, 2018-11-20T19:38:56. -->
<!-- Written by QtCreator 4.7.2, 2018-11-24T18:51:27. -->
<qtcreator>
<data>
<variable>EnvironmentId</variable>
@ -67,7 +67,7 @@
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">qt.qt5.5112.win64_msvc2017_64_kit</value>
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">1</value>
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">1</value>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
@ -296,7 +296,7 @@
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">UnitTests/UnitTests.pro</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/UnitTests/..</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/UnitTests/..</value>
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
@ -353,7 +353,7 @@
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">PdfForQtViewer/PdfForQtViewer.pro</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/PdfForQtViewer/..</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/PdfForQtViewer/..</value>
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>

View File

@ -56,4 +56,8 @@ unix {
INSTALLS += target
}
CONFIG += force_debug_info
QMAKE_CXXFLAGS += /std:c++latest

View File

@ -50,6 +50,11 @@ static constexpr const char* PDF_XREF_TRAILER_XREFSTM = "XRefStm";
static constexpr const char* PDF_XREF_FREE = "f";
static constexpr const char* PDF_XREF_OCCUPIED = "n";
// objects
static constexpr const char* PDF_OBJECT_START_MARK = "obj";
static constexpr const char* PDF_OBJECT_END_MARK = "endobj";
} // namespace pdf
#endif // PDFCONSTANTS_H

View File

@ -20,14 +20,48 @@
#define PDFDOCUMENT_H
#include "pdfglobal.h"
#include "pdfobject.h"
namespace pdf
{
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
class PDFObjectStorage
{
public:
constexpr inline PDFObjectStorage() = default;
constexpr inline PDFObjectStorage(const PDFObjectStorage&) = default;
constexpr inline PDFObjectStorage(PDFObjectStorage&&) = default;
constexpr inline PDFObjectStorage& operator=(const PDFObjectStorage&) = default;
constexpr inline PDFObjectStorage& operator=(PDFObjectStorage&&) = default;
struct Entry
{
constexpr inline explicit Entry() = default;
inline explicit Entry(PDFInteger generation, PDFObject object) : generation(generation), object(std::move(object)) { }
PDFInteger generation = 0;
PDFObject object;
};
using PDFObjects = std::vector<Entry>;
private:
PDFObjects m_pdfObjects;
};
class PDFDocument
{
public:
explicit PDFDocument() = default;
private:
/// Storage of objects
PDFObjectStorage m_pdfObjectStorage;
};
} // namespace pdf

View File

@ -26,6 +26,7 @@
#include <regex>
#include <cctype>
#include <algorithm>
#include <execution>
namespace pdf
{
@ -170,6 +171,96 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
PDFXRefTable xrefTable;
xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset);
PDFParsingContext context;
// This lambda function fetches object from the buffer from the specified offset.
// Can throw exception, returns a pair of scanned reference and object content.
auto getObject = [&buffer, &context](PDFInteger offset, PDFObjectReference reference) -> PDFObject
{
PDFParsingContext::PDFParsingContextGuard guard(&context, reference);
PDFParser parser(buffer, &context, PDFParser::AllowStreams);
parser.seek(offset);
PDFObject objectNumber = parser.getObject();
PDFObject generation = parser.getObject();
if (!objectNumber.isInt() || !generation.isInt())
{
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
}
if (!parser.fetchCommand(PDF_OBJECT_START_MARK))
{
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
}
PDFObject object = parser.getObject();
if (!parser.fetchCommand(PDF_OBJECT_END_MARK))
{
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
}
PDFObjectReference scannedReference(objectNumber.getInteger(), generation.getInteger());
if (scannedReference != reference)
{
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
}
return object;
};
auto objectFetcher = [&getObject, &xrefTable](PDFObjectReference reference) -> PDFObject
{
const PDFXRefTable::Entry& entry = xrefTable.getEntry(reference);
switch (entry.type)
{
case PDFXRefTable::EntryType::Free:
return PDFObject();
case PDFXRefTable::EntryType::Occupied:
{
Q_ASSERT(entry.reference == reference);
return getObject(entry.offset, reference);
}
default:
Q_ASSERT(false);
break;
}
return PDFObject();
};
context.setObjectFetcher(objectFetcher);
PDFObjectStorage::PDFObjects objects;
objects.resize(xrefTable.getSize());
std::vector<PDFXRefTable::Entry> occupiedEntries = xrefTable.getOccupiedEntries();
auto processEntry = [this, &getObject, &objects](const PDFXRefTable::Entry& entry)
{
Q_ASSERT(entry.type == PDFXRefTable::EntryType::Occupied);
if (m_successfull)
{
try
{
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(entry.offset, entry.reference));
}
catch (PDFParserException exception)
{
QMutexLocker lock(&m_mutex);
m_successfull = false;
m_errorMessage = exception.getMessage();
}
}
};
// Now, we are ready to scan all objects
//std::for_each<std::execution::parallel_policy, std::vector<PDFXRefTable::Entry>::const_iterator, decltype(processEntry)>(occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
}
catch (PDFParserException parserException)
{

View File

@ -79,8 +79,12 @@ private:
/// \returns Position of string, or FIND_NOT_FOUND_RESULT
int findFromEnd(const char* what, const QByteArray& byteArray, int limit);
/// Mutex for access to variables of this reader from more threads
/// (providing thread safety)
QMutex m_mutex;
/// This bool flag is set, if pdf document was successfully read from the device
bool m_successfull;
std::atomic<bool> m_successfull;
/// In case if error occurs, it is stored here
QString m_errorMessage;

View File

@ -100,6 +100,7 @@ public:
inline PDFInteger getInteger() const { return std::get<PDFInteger>(m_data); }
QByteArray getString() const;
const PDFDictionary* getDictionary() const;
PDFObjectReference getReference() const { return std::get<PDFObjectReference>(m_data); }
bool operator==(const PDFObject& other) const;
bool operator!=(const PDFObject& other) const { return !(*this == other); }

View File

@ -20,6 +20,7 @@
#include "pdfconstants.h"
#include <QFile>
#include <QThread>
#include <cctype>
#include <memory>
@ -125,9 +126,9 @@ PDFLexicalAnalyzer::Token PDFLexicalAnalyzer::fetch()
}
}
}
else if (isWhitespace(lookChar()))
else if (isWhitespace(lookChar()) || isDelimiter(lookChar()))
{
// Whitespace appeared - whitespaces delimits tokens - break
// Whitespace appeared - whitespaces/delimiters delimits tokens - break
break;
}
else
@ -587,31 +588,43 @@ constexpr bool PDFLexicalAnalyzer::isHexCharacter(const char character)
void PDFLexicalAnalyzer::error(const QString& message) const
{
throw PDFParserException(message);
std::size_t distance = std::distance(m_begin, m_current);
throw PDFParserException(tr("Error near position %1. %2").arg(distance).arg(message));
}
PDFObject PDFParsingContext::getObject(const PDFObject& object) const
{
Q_ASSERT(false);
return PDFObject();
if (object.isReference())
{
Q_ASSERT(m_objectFetcher);
return m_objectFetcher(object.getReference());
}
return object;
}
void PDFParsingContext::beginParsingObject(PDFObjectReference reference)
{
if (m_activeParsedObjectSet.count(reference))
QMutexLocker lock(&m_mutex);
Key key(QThread::currentThreadId(), reference);
if (m_activeParsedObjectSet.count(key))
{
throw PDFParserException(tr("Cyclical reference found while parsing object %1 %2.").arg(reference.objectNumber).arg(reference.generation));
}
else
{
m_activeParsedObjectSet.insert(reference);
m_activeParsedObjectSet.insert(key);
}
}
void PDFParsingContext::endParsingObject(PDFObjectReference reference)
{
Q_ASSERT(m_activeParsedObjectSet.count(reference));
m_activeParsedObjectSet.erase(reference);
QMutexLocker lock(&m_mutex);
Key key(QThread::currentThreadId(), reference);
Q_ASSERT(m_activeParsedObjectSet.count(key));
m_activeParsedObjectSet.erase(key);
}
PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) :
@ -786,6 +799,8 @@ PDFObject PDFParser::getObject()
error(tr("Length of the stream buffer is negative (%1). It must be a positive number.").arg(length));
}
// Skip the stream start, then fetch data of the stream
m_lexicalAnalyzer.skipStreamStart();
QByteArray buffer = m_lexicalAnalyzer.fetchByteArray(length);
// According to the PDF Reference 1.7, chapter 3.2.7, stream content can also be specified

View File

@ -23,10 +23,12 @@
#include "pdfobject.h"
#include <QtCore>
#include <QMutex>
#include <QVariant>
#include <QByteArray>
#include <set>
#include <functional>
namespace pdf
{
@ -199,12 +201,14 @@ private:
const char* m_end;
};
/// Parsing context. Used for example to detect cyclic reference errors.
/// Parsing context. Used for example to detect cyclic reference errors. Can handle multiple threads
/// simultaneously (e.g class is thread safe).
class PDFParsingContext
{
Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext)
public:
explicit PDFParsingContext() = default;
/// Guard guarding the cyclical references.
class PDFParsingContextGuard
@ -231,12 +235,32 @@ public:
/// then same object is returned.
PDFObject getObject(const PDFObject& object) const;
/// Sets function which provides object fetching
void setObjectFetcher(std::function<PDFObject(PDFObjectReference)> objectFetcher) { m_objectFetcher = std::move(objectFetcher); }
private:
void beginParsingObject(PDFObjectReference reference);
void endParsingObject(PDFObjectReference reference);
struct Key
{
constexpr inline Key() = default;
constexpr inline Key(Qt::HANDLE threadContext, PDFObjectReference reference) : threadContext(threadContext), reference(reference) { }
Qt::HANDLE threadContext = nullptr;
PDFObjectReference reference;
inline bool operator<(const Key& other) const { return std::tie(threadContext, reference) < std::tie(other.threadContext, other.reference); }
};
/// This function fetches object, if it is needed
std::function<PDFObject(PDFObjectReference)> m_objectFetcher;
/// Set containing objects currently being parsed.
std::set<PDFObjectReference> m_activeParsedObjectSet;
std::set<Key> m_activeParsedObjectSet;
/// Mutex protecting object for multiple thread access
QMutex m_mutex;
};
/// Class for parsing objects. Checks cyclical references. If

View File

@ -103,7 +103,10 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
entry.type = EntryType::Occupied;
}
m_entries[objectNumber] = std::move(entry);
if (m_entries[objectNumber].type == EntryType::Free)
{
m_entries[objectNumber] = std::move(entry);
}
}
}
@ -145,4 +148,29 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
}
}
std::vector<PDFXRefTable::Entry> PDFXRefTable::getOccupiedEntries() const
{
std::vector<PDFXRefTable::Entry> result;
// Suppose majority of items are occupied
result.reserve(m_entries.size());
std::copy_if(m_entries.cbegin(), m_entries.cend(), std::back_inserter(result), [](const Entry& entry) { return entry.type == EntryType::Occupied; });
return result;
}
const PDFXRefTable::Entry& PDFXRefTable::getEntry(PDFObjectReference reference) const
{
// We must also check generation number here. For this reason, we compare references of the entry at given position.
if (reference.objectNumber >= 0 && reference.objectNumber < m_entries.size() && m_entries[reference.objectNumber].reference == reference)
{
return m_entries[reference.objectNumber];
}
else
{
static Entry dummy;
return dummy;
}
}
} // namespace pdf

View File

@ -67,6 +67,16 @@ public:
/// \param startTableOffset Offset of first reference table
void readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset);
/// Filters only occupied entries and returns them
std::vector<Entry> getOccupiedEntries() const;
/// Returns size of the reference table
std::size_t getSize() const { return m_entries.size(); }
/// Gets the entry for given reference. If entry for given reference is not found,
/// then free entry is returned.
const Entry& getEntry(PDFObjectReference reference) const;
private:
/// Reference table entries
std::vector<Entry> m_entries;

View File

@ -37,7 +37,7 @@ void PDFViewerMainWindow::onActionOpenTriggered()
}
else
{
QMessageBox::information(this, tr("PDF Reader"), tr("Document read error: %1").arg(fileName));
QMessageBox::information(this, tr("PDF Reader"), tr("Document read error: %1").arg(reader.getErrorMessage()));
}
}
}