mirror of https://github.com/JakubMelka/PDF4QT.git
Object loading
This commit is contained in:
parent
8c93c82228
commit
89d4ee606b
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 4.7.2, 2018-11-20T19:38:56. -->
|
||||
<!-- Written by QtCreator 4.7.2, 2018-11-24T18:51:27. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
|
@ -67,7 +67,7 @@
|
|||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">qt.qt5.5112.win64_msvc2017_64_kit</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">1</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
|
@ -296,7 +296,7 @@
|
|||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">UnitTests/UnitTests.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/UnitTests/..</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/UnitTests/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
|
@ -353,7 +353,7 @@
|
|||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">PdfForQtViewer/PdfForQtViewer.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/PdfForQtViewer/..</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/PdfForQtViewer/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
|
|
|
@ -56,4 +56,8 @@ unix {
|
|||
INSTALLS += target
|
||||
}
|
||||
|
||||
|
||||
CONFIG += force_debug_info
|
||||
|
||||
|
||||
QMAKE_CXXFLAGS += /std:c++latest
|
||||
|
|
|
@ -50,6 +50,11 @@ static constexpr const char* PDF_XREF_TRAILER_XREFSTM = "XRefStm";
|
|||
static constexpr const char* PDF_XREF_FREE = "f";
|
||||
static constexpr const char* PDF_XREF_OCCUPIED = "n";
|
||||
|
||||
// objects
|
||||
|
||||
static constexpr const char* PDF_OBJECT_START_MARK = "obj";
|
||||
static constexpr const char* PDF_OBJECT_END_MARK = "endobj";
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFCONSTANTS_H
|
||||
|
|
|
@ -20,14 +20,48 @@
|
|||
#define PDFDOCUMENT_H
|
||||
|
||||
#include "pdfglobal.h"
|
||||
#include "pdfobject.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
/// Storage for objects. This class is not thread safe for writing (calling non-const functions). Caller must ensure
|
||||
/// locking, if this object is used from multiple threads. Calling const functions should be thread safe.
|
||||
class PDFObjectStorage
|
||||
{
|
||||
public:
|
||||
constexpr inline PDFObjectStorage() = default;
|
||||
|
||||
constexpr inline PDFObjectStorage(const PDFObjectStorage&) = default;
|
||||
constexpr inline PDFObjectStorage(PDFObjectStorage&&) = default;
|
||||
|
||||
constexpr inline PDFObjectStorage& operator=(const PDFObjectStorage&) = default;
|
||||
constexpr inline PDFObjectStorage& operator=(PDFObjectStorage&&) = default;
|
||||
|
||||
|
||||
struct Entry
|
||||
{
|
||||
constexpr inline explicit Entry() = default;
|
||||
inline explicit Entry(PDFInteger generation, PDFObject object) : generation(generation), object(std::move(object)) { }
|
||||
|
||||
PDFInteger generation = 0;
|
||||
PDFObject object;
|
||||
};
|
||||
|
||||
using PDFObjects = std::vector<Entry>;
|
||||
|
||||
private:
|
||||
PDFObjects m_pdfObjects;
|
||||
};
|
||||
|
||||
class PDFDocument
|
||||
{
|
||||
public:
|
||||
explicit PDFDocument() = default;
|
||||
|
||||
private:
|
||||
/// Storage of objects
|
||||
PDFObjectStorage m_pdfObjectStorage;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <regex>
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
@ -170,6 +171,96 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
|||
PDFXRefTable xrefTable;
|
||||
xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset);
|
||||
|
||||
PDFParsingContext context;
|
||||
|
||||
// This lambda function fetches object from the buffer from the specified offset.
|
||||
// Can throw exception, returns a pair of scanned reference and object content.
|
||||
auto getObject = [&buffer, &context](PDFInteger offset, PDFObjectReference reference) -> PDFObject
|
||||
{
|
||||
PDFParsingContext::PDFParsingContextGuard guard(&context, reference);
|
||||
|
||||
PDFParser parser(buffer, &context, PDFParser::AllowStreams);
|
||||
parser.seek(offset);
|
||||
|
||||
PDFObject objectNumber = parser.getObject();
|
||||
PDFObject generation = parser.getObject();
|
||||
|
||||
if (!objectNumber.isInt() || !generation.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
|
||||
}
|
||||
|
||||
if (!parser.fetchCommand(PDF_OBJECT_START_MARK))
|
||||
{
|
||||
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
|
||||
}
|
||||
|
||||
PDFObject object = parser.getObject();
|
||||
|
||||
if (!parser.fetchCommand(PDF_OBJECT_END_MARK))
|
||||
{
|
||||
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
|
||||
}
|
||||
|
||||
PDFObjectReference scannedReference(objectNumber.getInteger(), generation.getInteger());
|
||||
if (scannedReference != reference)
|
||||
{
|
||||
throw PDFParserException(tr("Can't read object at position %1.").arg(offset));
|
||||
}
|
||||
|
||||
return object;
|
||||
};
|
||||
|
||||
auto objectFetcher = [&getObject, &xrefTable](PDFObjectReference reference) -> PDFObject
|
||||
{
|
||||
const PDFXRefTable::Entry& entry = xrefTable.getEntry(reference);
|
||||
switch (entry.type)
|
||||
{
|
||||
case PDFXRefTable::EntryType::Free:
|
||||
return PDFObject();
|
||||
|
||||
case PDFXRefTable::EntryType::Occupied:
|
||||
{
|
||||
Q_ASSERT(entry.reference == reference);
|
||||
return getObject(entry.offset, reference);
|
||||
}
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return PDFObject();
|
||||
};
|
||||
context.setObjectFetcher(objectFetcher);
|
||||
|
||||
PDFObjectStorage::PDFObjects objects;
|
||||
objects.resize(xrefTable.getSize());
|
||||
|
||||
std::vector<PDFXRefTable::Entry> occupiedEntries = xrefTable.getOccupiedEntries();
|
||||
|
||||
auto processEntry = [this, &getObject, &objects](const PDFXRefTable::Entry& entry)
|
||||
{
|
||||
Q_ASSERT(entry.type == PDFXRefTable::EntryType::Occupied);
|
||||
|
||||
if (m_successfull)
|
||||
{
|
||||
try
|
||||
{
|
||||
objects[entry.reference.objectNumber] = PDFObjectStorage::Entry(entry.reference.generation, getObject(entry.offset, entry.reference));
|
||||
}
|
||||
catch (PDFParserException exception)
|
||||
{
|
||||
QMutexLocker lock(&m_mutex);
|
||||
m_successfull = false;
|
||||
m_errorMessage = exception.getMessage();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Now, we are ready to scan all objects
|
||||
//std::for_each<std::execution::parallel_policy, std::vector<PDFXRefTable::Entry>::const_iterator, decltype(processEntry)>(occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
|
||||
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
|
||||
}
|
||||
catch (PDFParserException parserException)
|
||||
{
|
||||
|
|
|
@ -79,8 +79,12 @@ private:
|
|||
/// \returns Position of string, or FIND_NOT_FOUND_RESULT
|
||||
int findFromEnd(const char* what, const QByteArray& byteArray, int limit);
|
||||
|
||||
/// Mutex for access to variables of this reader from more threads
|
||||
/// (providing thread safety)
|
||||
QMutex m_mutex;
|
||||
|
||||
/// This bool flag is set, if pdf document was successfully read from the device
|
||||
bool m_successfull;
|
||||
std::atomic<bool> m_successfull;
|
||||
|
||||
/// In case if error occurs, it is stored here
|
||||
QString m_errorMessage;
|
||||
|
|
|
@ -100,6 +100,7 @@ public:
|
|||
inline PDFInteger getInteger() const { return std::get<PDFInteger>(m_data); }
|
||||
QByteArray getString() const;
|
||||
const PDFDictionary* getDictionary() const;
|
||||
PDFObjectReference getReference() const { return std::get<PDFObjectReference>(m_data); }
|
||||
|
||||
bool operator==(const PDFObject& other) const;
|
||||
bool operator!=(const PDFObject& other) const { return !(*this == other); }
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "pdfconstants.h"
|
||||
|
||||
#include <QFile>
|
||||
#include <QThread>
|
||||
|
||||
#include <cctype>
|
||||
#include <memory>
|
||||
|
@ -125,9 +126,9 @@ PDFLexicalAnalyzer::Token PDFLexicalAnalyzer::fetch()
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (isWhitespace(lookChar()))
|
||||
else if (isWhitespace(lookChar()) || isDelimiter(lookChar()))
|
||||
{
|
||||
// Whitespace appeared - whitespaces delimits tokens - break
|
||||
// Whitespace appeared - whitespaces/delimiters delimits tokens - break
|
||||
break;
|
||||
}
|
||||
else
|
||||
|
@ -587,31 +588,43 @@ constexpr bool PDFLexicalAnalyzer::isHexCharacter(const char character)
|
|||
|
||||
void PDFLexicalAnalyzer::error(const QString& message) const
|
||||
{
|
||||
throw PDFParserException(message);
|
||||
std::size_t distance = std::distance(m_begin, m_current);
|
||||
throw PDFParserException(tr("Error near position %1. %2").arg(distance).arg(message));
|
||||
}
|
||||
|
||||
PDFObject PDFParsingContext::getObject(const PDFObject& object) const
|
||||
{
|
||||
Q_ASSERT(false);
|
||||
return PDFObject();
|
||||
if (object.isReference())
|
||||
{
|
||||
Q_ASSERT(m_objectFetcher);
|
||||
return m_objectFetcher(object.getReference());
|
||||
}
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
void PDFParsingContext::beginParsingObject(PDFObjectReference reference)
|
||||
{
|
||||
if (m_activeParsedObjectSet.count(reference))
|
||||
QMutexLocker lock(&m_mutex);
|
||||
|
||||
Key key(QThread::currentThreadId(), reference);
|
||||
if (m_activeParsedObjectSet.count(key))
|
||||
{
|
||||
throw PDFParserException(tr("Cyclical reference found while parsing object %1 %2.").arg(reference.objectNumber).arg(reference.generation));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_activeParsedObjectSet.insert(reference);
|
||||
m_activeParsedObjectSet.insert(key);
|
||||
}
|
||||
}
|
||||
|
||||
void PDFParsingContext::endParsingObject(PDFObjectReference reference)
|
||||
{
|
||||
Q_ASSERT(m_activeParsedObjectSet.count(reference));
|
||||
m_activeParsedObjectSet.erase(reference);
|
||||
QMutexLocker lock(&m_mutex);
|
||||
|
||||
Key key(QThread::currentThreadId(), reference);
|
||||
Q_ASSERT(m_activeParsedObjectSet.count(key));
|
||||
m_activeParsedObjectSet.erase(key);
|
||||
}
|
||||
|
||||
PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) :
|
||||
|
@ -786,6 +799,8 @@ PDFObject PDFParser::getObject()
|
|||
error(tr("Length of the stream buffer is negative (%1). It must be a positive number.").arg(length));
|
||||
}
|
||||
|
||||
// Skip the stream start, then fetch data of the stream
|
||||
m_lexicalAnalyzer.skipStreamStart();
|
||||
QByteArray buffer = m_lexicalAnalyzer.fetchByteArray(length);
|
||||
|
||||
// According to the PDF Reference 1.7, chapter 3.2.7, stream content can also be specified
|
||||
|
|
|
@ -23,10 +23,12 @@
|
|||
#include "pdfobject.h"
|
||||
|
||||
#include <QtCore>
|
||||
#include <QMutex>
|
||||
#include <QVariant>
|
||||
#include <QByteArray>
|
||||
|
||||
#include <set>
|
||||
#include <functional>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
@ -199,12 +201,14 @@ private:
|
|||
const char* m_end;
|
||||
};
|
||||
|
||||
/// Parsing context. Used for example to detect cyclic reference errors.
|
||||
/// Parsing context. Used for example to detect cyclic reference errors. Can handle multiple threads
|
||||
/// simultaneously (e.g class is thread safe).
|
||||
class PDFParsingContext
|
||||
{
|
||||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext)
|
||||
|
||||
public:
|
||||
explicit PDFParsingContext() = default;
|
||||
|
||||
/// Guard guarding the cyclical references.
|
||||
class PDFParsingContextGuard
|
||||
|
@ -231,12 +235,32 @@ public:
|
|||
/// then same object is returned.
|
||||
PDFObject getObject(const PDFObject& object) const;
|
||||
|
||||
/// Sets function which provides object fetching
|
||||
void setObjectFetcher(std::function<PDFObject(PDFObjectReference)> objectFetcher) { m_objectFetcher = std::move(objectFetcher); }
|
||||
|
||||
private:
|
||||
void beginParsingObject(PDFObjectReference reference);
|
||||
void endParsingObject(PDFObjectReference reference);
|
||||
|
||||
struct Key
|
||||
{
|
||||
constexpr inline Key() = default;
|
||||
constexpr inline Key(Qt::HANDLE threadContext, PDFObjectReference reference) : threadContext(threadContext), reference(reference) { }
|
||||
|
||||
Qt::HANDLE threadContext = nullptr;
|
||||
PDFObjectReference reference;
|
||||
|
||||
inline bool operator<(const Key& other) const { return std::tie(threadContext, reference) < std::tie(other.threadContext, other.reference); }
|
||||
};
|
||||
|
||||
/// This function fetches object, if it is needed
|
||||
std::function<PDFObject(PDFObjectReference)> m_objectFetcher;
|
||||
|
||||
/// Set containing objects currently being parsed.
|
||||
std::set<PDFObjectReference> m_activeParsedObjectSet;
|
||||
std::set<Key> m_activeParsedObjectSet;
|
||||
|
||||
/// Mutex protecting object for multiple thread access
|
||||
QMutex m_mutex;
|
||||
};
|
||||
|
||||
/// Class for parsing objects. Checks cyclical references. If
|
||||
|
|
|
@ -103,7 +103,10 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
|||
entry.type = EntryType::Occupied;
|
||||
}
|
||||
|
||||
m_entries[objectNumber] = std::move(entry);
|
||||
if (m_entries[objectNumber].type == EntryType::Free)
|
||||
{
|
||||
m_entries[objectNumber] = std::move(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -145,4 +148,29 @@ void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& b
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<PDFXRefTable::Entry> PDFXRefTable::getOccupiedEntries() const
|
||||
{
|
||||
std::vector<PDFXRefTable::Entry> result;
|
||||
|
||||
// Suppose majority of items are occupied
|
||||
result.reserve(m_entries.size());
|
||||
std::copy_if(m_entries.cbegin(), m_entries.cend(), std::back_inserter(result), [](const Entry& entry) { return entry.type == EntryType::Occupied; });
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const PDFXRefTable::Entry& PDFXRefTable::getEntry(PDFObjectReference reference) const
|
||||
{
|
||||
// We must also check generation number here. For this reason, we compare references of the entry at given position.
|
||||
if (reference.objectNumber >= 0 && reference.objectNumber < m_entries.size() && m_entries[reference.objectNumber].reference == reference)
|
||||
{
|
||||
return m_entries[reference.objectNumber];
|
||||
}
|
||||
else
|
||||
{
|
||||
static Entry dummy;
|
||||
return dummy;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -67,6 +67,16 @@ public:
|
|||
/// \param startTableOffset Offset of first reference table
|
||||
void readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset);
|
||||
|
||||
/// Filters only occupied entries and returns them
|
||||
std::vector<Entry> getOccupiedEntries() const;
|
||||
|
||||
/// Returns size of the reference table
|
||||
std::size_t getSize() const { return m_entries.size(); }
|
||||
|
||||
/// Gets the entry for given reference. If entry for given reference is not found,
|
||||
/// then free entry is returned.
|
||||
const Entry& getEntry(PDFObjectReference reference) const;
|
||||
|
||||
private:
|
||||
/// Reference table entries
|
||||
std::vector<Entry> m_entries;
|
||||
|
|
|
@ -37,7 +37,7 @@ void PDFViewerMainWindow::onActionOpenTriggered()
|
|||
}
|
||||
else
|
||||
{
|
||||
QMessageBox::information(this, tr("PDF Reader"), tr("Document read error: %1").arg(fileName));
|
||||
QMessageBox::information(this, tr("PDF Reader"), tr("Document read error: %1").arg(reader.getErrorMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue