mirror of https://github.com/JakubMelka/PDF4QT.git
Parsing X Reference table
This commit is contained in:
parent
58ad59e407
commit
8c93c82228
|
@ -19,5 +19,6 @@ TEMPLATE = subdirs
|
|||
|
||||
SUBDIRS += \
|
||||
PdfForQtLib \
|
||||
UnitTests
|
||||
UnitTests \
|
||||
PdfForQtViewer
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 4.7.2, 2018-11-17T16:30:34. -->
|
||||
<!-- Written by QtCreator 4.7.2, 2018-11-20T19:38:56. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
|
@ -69,7 +69,7 @@
|
|||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">qt.qt5.5112.win64_msvc2017_64_kit</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">K:/Programming/PDF/PDF_For_Qt/bin_debug</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
|
@ -296,7 +296,7 @@
|
|||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">UnitTests/UnitTests.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/UnitTests/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
|
@ -304,7 +304,64 @@
|
|||
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.RunConfiguration.1">
|
||||
<value type="bool" key="Analyzer.QmlProfiler.AggregateTraces">false</value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.FlushEnabled">false</value>
|
||||
<value type="uint" key="Analyzer.QmlProfiler.FlushInterval">1000</value>
|
||||
<value type="QString" key="Analyzer.QmlProfiler.LastTraceFile"></value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.Settings.UseGlobalSettings">true</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.AddedSuppressionFiles"/>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectBusEvents">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectSystime">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableBranchSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableCacheSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableEventToolTips">true</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.MinimumCostRatio">0.01</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.VisualisationMinimumCostRatio">10</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.FilterExternalIssues">true</value>
|
||||
<value type="int" key="Analyzer.Valgrind.LeakCheckOnFinish">1</value>
|
||||
<value type="int" key="Analyzer.Valgrind.NumCallers">25</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.RemovedSuppressionFiles"/>
|
||||
<value type="int" key="Analyzer.Valgrind.SelfModifyingCodeDetection">1</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Settings.UseGlobalSettings">true</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.ShowReachable">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.TrackOrigins">true</value>
|
||||
<value type="QString" key="Analyzer.Valgrind.ValgrindExecutable">valgrind</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.VisibleErrorKinds">
|
||||
<value type="int">0</value>
|
||||
<value type="int">1</value>
|
||||
<value type="int">2</value>
|
||||
<value type="int">3</value>
|
||||
<value type="int">4</value>
|
||||
<value type="int">5</value>
|
||||
<value type="int">6</value>
|
||||
<value type="int">7</value>
|
||||
<value type="int">8</value>
|
||||
<value type="int">9</value>
|
||||
<value type="int">10</value>
|
||||
<value type="int">11</value>
|
||||
<value type="int">12</value>
|
||||
<value type="int">13</value>
|
||||
<value type="int">14</value>
|
||||
</valuelist>
|
||||
<value type="int" key="PE.EnvironmentAspect.Base">2</value>
|
||||
<valuelist type="QVariantList" key="PE.EnvironmentAspect.Changes"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">PdfForQtViewer</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:K:/Programming/PDF/PDF_For_Qt/PdfForQt/PdfForQtViewer/PdfForQtViewer.pro</value>
|
||||
<value type="bool" key="QmakeProjectManager.QmakeRunConfiguration.UseLibrarySearchPath">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">PdfForQtViewer/PdfForQtViewer.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/PdfForQtViewer/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
<value type="bool" key="RunConfiguration.UseMultiProcess">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">2</value>
|
||||
</valuemap>
|
||||
</data>
|
||||
<data>
|
||||
|
|
|
@ -39,7 +39,8 @@ SOURCES += \
|
|||
sources/pdfobject.cpp \
|
||||
sources/pdfparser.cpp \
|
||||
sources/pdfdocument.cpp \
|
||||
sources/pdfdocumentreader.cpp
|
||||
sources/pdfdocumentreader.cpp \
|
||||
sources/pdfxreftable.cpp
|
||||
|
||||
HEADERS += \
|
||||
sources/pdfobject.h \
|
||||
|
@ -47,7 +48,8 @@ HEADERS += \
|
|||
sources/pdfglobal.h \
|
||||
sources/pdfconstants.h \
|
||||
sources/pdfdocument.h \
|
||||
sources/pdfdocumentreader.h
|
||||
sources/pdfdocumentreader.h \
|
||||
sources/pdfxreftable.h
|
||||
|
||||
unix {
|
||||
target.path = /usr/lib
|
||||
|
|
|
@ -42,6 +42,14 @@ static constexpr const char* PDF_STREAM_DICT_FILE_FILTER = "FFilter";
|
|||
static constexpr const char* PDF_STREAM_DICT_FDECODE_PARMS = "FDecodeParms";
|
||||
static constexpr const char* PDF_STREAM_DICT_DECODED_LENGTH = "DL";
|
||||
|
||||
// xref table constants
|
||||
static constexpr const char* PDF_XREF_HEADER = "xref";
|
||||
static constexpr const char* PDF_XREF_TRAILER = "trailer";
|
||||
static constexpr const char* PDF_XREF_TRAILER_PREVIOUS = "Prev";
|
||||
static constexpr const char* PDF_XREF_TRAILER_XREFSTM = "XRefStm";
|
||||
static constexpr const char* PDF_XREF_FREE = "f";
|
||||
static constexpr const char* PDF_XREF_OCCUPIED = "n";
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFCONSTANTS_H
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "pdfdocumentreader.h"
|
||||
#include "pdfparser.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfxreftable.h"
|
||||
|
||||
#include <QFile>
|
||||
|
||||
|
@ -114,6 +115,15 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
|||
throw PDFParserException(tr("Start of object reference table not found."));
|
||||
}
|
||||
|
||||
Q_ASSERT(startXRefPosition + std::strlen(PDF_START_OF_XREF_MARK) < buffer.size());
|
||||
PDFLexicalAnalyzer analyzer(buffer.constData() + startXRefPosition + std::strlen(PDF_START_OF_XREF_MARK), buffer.constData() + buffer.size());
|
||||
const PDFLexicalAnalyzer::Token token = analyzer.fetch();
|
||||
if (token.type != PDFLexicalAnalyzer::TokenType::Integer)
|
||||
{
|
||||
throw PDFParserException(tr("Start of object reference table not found."));
|
||||
}
|
||||
const PDFInteger firstXrefTableOffset = token.data.toLongLong();
|
||||
|
||||
// HEADER CHECKING
|
||||
// 1) Check if header is present
|
||||
// 2) Scan header version
|
||||
|
@ -123,7 +133,7 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
|||
// - %!PS-Adobe-y.y PDF-x.x
|
||||
// We will search for both of these formats.
|
||||
|
||||
std::regex headerRegExp("(%PDF-[[:digit:]]\\.[[:digit:]])|(%!PS-Adobe-[[:digit:]]\\.[[:digit:]] PDF-[[:digit:]]\\.[[:digit:]])");
|
||||
std::regex headerRegExp(PDF_FILE_HEADER_REGEXP);
|
||||
std::cmatch headerMatch;
|
||||
|
||||
auto itBegin = buffer.cbegin();
|
||||
|
@ -156,6 +166,9 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
|
|||
throw PDFParserException(tr("Version of the PDF file is not valid."));
|
||||
}
|
||||
|
||||
// Now, we are ready to scan xref table
|
||||
PDFXRefTable xrefTable;
|
||||
xrefTable.readXRefTable(nullptr, buffer, firstXrefTableOffset);
|
||||
|
||||
}
|
||||
catch (PDFParserException parserException)
|
||||
|
|
|
@ -31,13 +31,18 @@ namespace pdf
|
|||
/// This class is a reader of PDF document from various devices (file, io device,
|
||||
/// byte buffer). This class doesn't throw exceptions, to check errors, use
|
||||
/// appropriate functions.
|
||||
class PDFDocumentReader
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFDocumentReader
|
||||
{
|
||||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFDocumentReader)
|
||||
|
||||
public:
|
||||
explicit PDFDocumentReader();
|
||||
|
||||
constexpr inline PDFDocumentReader(const PDFDocumentReader&) = delete;
|
||||
constexpr inline PDFDocumentReader(PDFDocumentReader&&) = delete;
|
||||
constexpr inline PDFDocumentReader& operator=(const PDFDocumentReader&) = delete;
|
||||
constexpr inline PDFDocumentReader& operator=(PDFDocumentReader&&) = delete;
|
||||
|
||||
/// Reads a PDF document from the specified file. If file doesn't exist,
|
||||
/// cannot be opened or contain invalid pdf, empty PDF file is returned.
|
||||
/// No exception is thrown.
|
||||
|
@ -56,6 +61,9 @@ public:
|
|||
/// Returns true, if document was successfully read from device
|
||||
bool isSuccessfull() const { return m_successfull; }
|
||||
|
||||
/// Returns error message, if document reading was unsuccessfull
|
||||
const QString& getErrorMessage() const { return m_errorMessage; }
|
||||
|
||||
private:
|
||||
static constexpr const int FIND_NOT_FOUND_RESULT = -1;
|
||||
|
||||
|
|
|
@ -30,6 +30,14 @@ QByteArray PDFObject::getString() const
|
|||
return string->getString();
|
||||
}
|
||||
|
||||
const PDFDictionary*PDFObject::getDictionary() const
|
||||
{
|
||||
const PDFObjectContentPointer& objectContent = std::get<PDFObjectContentPointer>(m_data);
|
||||
|
||||
Q_ASSERT(dynamic_cast<const PDFDictionary*>(objectContent.get()));
|
||||
return static_cast<const PDFDictionary*>(objectContent.get());
|
||||
}
|
||||
|
||||
bool PDFObject::operator==(const PDFObject &other) const
|
||||
{
|
||||
if (m_type == other.m_type)
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDictionary;
|
||||
|
||||
/// This class represents a content of the PDF object. It can be
|
||||
/// array of objects, dictionary, content stream data, or string data.
|
||||
|
@ -98,6 +99,7 @@ public:
|
|||
|
||||
inline PDFInteger getInteger() const { return std::get<PDFInteger>(m_data); }
|
||||
QByteArray getString() const;
|
||||
const PDFDictionary* getDictionary() const;
|
||||
|
||||
bool operator==(const PDFObject& other) const;
|
||||
bool operator!=(const PDFObject& other) const { return !(*this == other); }
|
||||
|
|
|
@ -460,6 +460,19 @@ PDFLexicalAnalyzer::Token PDFLexicalAnalyzer::fetch()
|
|||
return Token(TokenType::EndOfFile);
|
||||
}
|
||||
|
||||
void PDFLexicalAnalyzer::seek(PDFInteger offset)
|
||||
{
|
||||
const PDFInteger limit = std::distance(m_begin, m_end);
|
||||
if (offset >= 0 && offset < limit)
|
||||
{
|
||||
m_current = std::next(m_begin, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
error(tr("Trying to seek stream position to %1 bytes from the start, byte offset is invalid.").arg(offset));
|
||||
}
|
||||
}
|
||||
|
||||
void PDFLexicalAnalyzer::skipWhitespaceAndComments()
|
||||
{
|
||||
bool isComment = false;
|
||||
|
@ -601,9 +614,19 @@ void PDFParsingContext::endParsingObject(PDFObjectReference reference)
|
|||
m_activeParsedObjectSet.erase(reference);
|
||||
}
|
||||
|
||||
PDFParser::PDFParser(const char* begin, const char* end, PDFParsingContext* context) :
|
||||
PDFParser::PDFParser(const QByteArray& data, PDFParsingContext* context, Features features) :
|
||||
m_context(context),
|
||||
m_lexicalAnalyzer(begin, end)
|
||||
m_lexicalAnalyzer(data.constData(), data.constData() + data.size()),
|
||||
m_features(features)
|
||||
{
|
||||
m_lookAhead1 = m_lexicalAnalyzer.fetch();
|
||||
m_lookAhead2 = m_lexicalAnalyzer.fetch();
|
||||
}
|
||||
|
||||
PDFParser::PDFParser(const char* begin, const char* end, PDFParsingContext* context, Features features) :
|
||||
m_context(context),
|
||||
m_lexicalAnalyzer(begin, end),
|
||||
m_features(features)
|
||||
{
|
||||
m_lookAhead1 = m_lexicalAnalyzer.fetch();
|
||||
m_lookAhead2 = m_lexicalAnalyzer.fetch();
|
||||
|
@ -611,13 +634,6 @@ PDFParser::PDFParser(const char* begin, const char* end, PDFParsingContext* cont
|
|||
|
||||
PDFObject PDFParser::getObject()
|
||||
{
|
||||
/*
|
||||
*
|
||||
// Complex PDF objects
|
||||
,
|
||||
Dictionary,
|
||||
Stream,
|
||||
*/
|
||||
switch (m_lookAhead1.type)
|
||||
{
|
||||
case PDFLexicalAnalyzer::TokenType::Boolean:
|
||||
|
@ -744,6 +760,11 @@ PDFObject PDFParser::getObject()
|
|||
if (m_lookAhead2.type == PDFLexicalAnalyzer::TokenType::Command &&
|
||||
m_lookAhead2.data.toByteArray() == PDF_STREAM_START_COMMAND)
|
||||
{
|
||||
if (!m_features.testFlag(AllowStreams))
|
||||
{
|
||||
error(tr("Streams are not allowed in this context."));
|
||||
}
|
||||
|
||||
// Read stream content. According to the PDF Reference 1.7, chapter 3.2.7, stream
|
||||
// content can be placed in the file. If this is the case, then try to load file
|
||||
// content in the memory. But even in this case, stream content should be skipped.
|
||||
|
@ -852,6 +873,27 @@ void PDFParser::error(const QString& message) const
|
|||
throw new PDFParserException(message);
|
||||
}
|
||||
|
||||
void PDFParser::seek(PDFInteger offset)
|
||||
{
|
||||
m_lexicalAnalyzer.seek(offset);
|
||||
|
||||
// We must read lookahead symbols, because we invalidated them
|
||||
m_lookAhead1 = m_lexicalAnalyzer.fetch();
|
||||
m_lookAhead2 = m_lexicalAnalyzer.fetch();
|
||||
}
|
||||
|
||||
bool PDFParser::fetchCommand(const char* command)
|
||||
{
|
||||
if (m_lookAhead1.type == PDFLexicalAnalyzer::TokenType::Command &&
|
||||
m_lookAhead1.data.toByteArray() == command)
|
||||
{
|
||||
shift();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void PDFParser::shift()
|
||||
{
|
||||
m_lookAhead1 = std::move(m_lookAhead2);
|
||||
|
|
|
@ -138,6 +138,10 @@ public:
|
|||
/// stream, then EndOfFile token is returned.
|
||||
Token fetch();
|
||||
|
||||
/// Seeks stream from the start. If stream cannot be seeked (position is invalid),
|
||||
/// then exception is thrown.
|
||||
void seek(PDFInteger offset);
|
||||
|
||||
/// Skips whitespace and comments
|
||||
void skipWhitespaceAndComments();
|
||||
|
||||
|
@ -242,14 +246,23 @@ class PDFParser
|
|||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFParser)
|
||||
|
||||
public:
|
||||
explicit PDFParser(const char* begin, const char* end, PDFParsingContext* context);
|
||||
enum Feature
|
||||
{
|
||||
None = 0x0000,
|
||||
AllowStreams = 0x0001,
|
||||
};
|
||||
|
||||
Q_DECLARE_FLAGS(Features, Feature)
|
||||
|
||||
explicit PDFParser(const QByteArray& data, PDFParsingContext* context, Features features);
|
||||
explicit PDFParser(const char* begin, const char* end, PDFParsingContext* context, Features features);
|
||||
|
||||
/// Fetches single object from the stream. Does not check
|
||||
/// cyclical references. If object cannot be fetched, then
|
||||
/// exception is thrown.
|
||||
PDFObject getObject();
|
||||
|
||||
/// Fetches signle object from the stream. Performs check for
|
||||
/// Fetches single object from the stream. Performs check for
|
||||
/// cyclical references. If object cannot be fetched, then
|
||||
/// exception is thrown.
|
||||
PDFObject getObject(PDFObjectReference reference);
|
||||
|
@ -257,12 +270,27 @@ public:
|
|||
/// Throws an error exception
|
||||
void error(const QString& message) const;
|
||||
|
||||
/// Seeks stream from the start. If stream cannot be seeked (position is invalid),
|
||||
/// then exception is thrown.
|
||||
void seek(PDFInteger offset);
|
||||
|
||||
/// Returns currently scanned token
|
||||
const PDFLexicalAnalyzer::Token& lookahead() const { return m_lookAhead1; }
|
||||
|
||||
/// If current token is a command with same string, then eat this command
|
||||
/// and return true. Otherwise do nothing and return false.
|
||||
/// \param command Command to be fetched
|
||||
bool fetchCommand(const char* command);
|
||||
|
||||
private:
|
||||
void shift();
|
||||
|
||||
/// Parsing context (multiple parsers can share it)
|
||||
PDFParsingContext* m_context;
|
||||
|
||||
/// Enabled features
|
||||
Features m_features;
|
||||
|
||||
/// Lexical analyzer for scanning tokens
|
||||
PDFLexicalAnalyzer m_lexicalAnalyzer;
|
||||
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfxreftable.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfparser.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
|
||||
{
|
||||
PDFParser parser(byteArray, context, PDFParser::None);
|
||||
|
||||
m_entries.clear();
|
||||
|
||||
std::set<PDFInteger> processedOffsets;
|
||||
std::stack<PDFInteger> workSet;
|
||||
workSet.push(startTableOffset);
|
||||
|
||||
while (!workSet.empty())
|
||||
{
|
||||
PDFInteger currentOffset = workSet.top();
|
||||
workSet.pop();
|
||||
|
||||
// Check, if we have cyclical references between tables
|
||||
if (processedOffsets.count(currentOffset))
|
||||
{
|
||||
throw PDFParserException(tr("Cyclic reference found in reference table."));
|
||||
}
|
||||
else
|
||||
{
|
||||
processedOffsets.insert(currentOffset);
|
||||
}
|
||||
|
||||
// Now, we are ready to scan the table. Seek to the start of the reference table.
|
||||
parser.seek(currentOffset);
|
||||
|
||||
if (parser.fetchCommand(PDF_XREF_HEADER))
|
||||
{
|
||||
while (!parser.fetchCommand(PDF_XREF_TRAILER))
|
||||
{
|
||||
// Now, first number is start offset, second number is count of table items
|
||||
PDFObject firstObject = parser.getObject();
|
||||
PDFObject countObject = parser.getObject();
|
||||
|
||||
if (!firstObject.isInt() || !countObject.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
|
||||
PDFInteger firstObjectNumber = firstObject.getInteger();
|
||||
PDFInteger count = countObject.getInteger();
|
||||
|
||||
const PDFInteger lastObjectIndex = firstObjectNumber + count - 1;
|
||||
const PDFInteger desiredSize = lastObjectIndex + 1;
|
||||
|
||||
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||
{
|
||||
m_entries.resize(desiredSize);
|
||||
}
|
||||
|
||||
// Now, read the records
|
||||
for (PDFInteger i = 0; i < count; ++i)
|
||||
{
|
||||
const PDFInteger objectNumber = firstObjectNumber + i;
|
||||
|
||||
PDFObject offset = parser.getObject();
|
||||
PDFObject generation = parser.getObject();
|
||||
|
||||
bool occupied = parser.fetchCommand(PDF_XREF_OCCUPIED);
|
||||
if (!occupied && !parser.fetchCommand(PDF_XREF_FREE))
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
if (!offset.isInt() || !generation.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
Entry entry;
|
||||
if (occupied)
|
||||
{
|
||||
entry.reference = PDFObjectReference(objectNumber, generation.getInteger());
|
||||
entry.offset = offset.getInteger();
|
||||
entry.type = EntryType::Occupied;
|
||||
}
|
||||
|
||||
m_entries[objectNumber] = std::move(entry);
|
||||
}
|
||||
}
|
||||
|
||||
PDFObject trailerDictionary = parser.getObject();
|
||||
if (!trailerDictionary.isDictionary())
|
||||
{
|
||||
throw PDFParserException(tr("Trailer dictionary is invalid."));
|
||||
}
|
||||
|
||||
// Now, we have scanned the table. If we didn't have a trailer dictionary yet, then
|
||||
// try to load it. We must also check, that trailer dictionary is OK.
|
||||
if (m_trailerDictionary.isNull())
|
||||
{
|
||||
m_trailerDictionary = trailerDictionary;
|
||||
}
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
if (dictionary->hasKey(PDF_XREF_TRAILER_PREVIOUS))
|
||||
{
|
||||
PDFObject previousOffset = dictionary->get(PDF_XREF_TRAILER_PREVIOUS);
|
||||
|
||||
if (!previousOffset.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Offset of previous reference table is invalid."));
|
||||
}
|
||||
|
||||
workSet.push(previousOffset.getInteger());
|
||||
}
|
||||
|
||||
if (dictionary->hasKey(PDF_XREF_TRAILER_XREFSTM))
|
||||
{
|
||||
throw PDFParserException(tr("Hybrid reference tables not supported."));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFXREFTABLE_H
|
||||
#define PDFXREFTABLE_H
|
||||
|
||||
#include "pdfglobal.h"
|
||||
#include "pdfobject.h"
|
||||
|
||||
#include <QtCore>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFParsingContext;
|
||||
|
||||
/// Represents table of references in the PDF file. It contains
|
||||
/// scanned table in the PDF file, together with information, if entry
|
||||
/// is occupied, or it is free.
|
||||
class PDFXRefTable
|
||||
{
|
||||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFXRefTable)
|
||||
|
||||
public:
|
||||
constexpr inline explicit PDFXRefTable() = default;
|
||||
|
||||
// Enforce default copy constructor and default move constructor
|
||||
constexpr inline PDFXRefTable(const PDFXRefTable&) = default;
|
||||
constexpr inline PDFXRefTable(PDFXRefTable&&) = default;
|
||||
|
||||
// Enforce default copy assignment operator and move assignment operator
|
||||
constexpr inline PDFXRefTable& operator=(const PDFXRefTable&) = default;
|
||||
constexpr inline PDFXRefTable& operator=(PDFXRefTable&&) = default;
|
||||
|
||||
enum class EntryType
|
||||
{
|
||||
Free, ///< Entry represents a free item (no object)
|
||||
Occupied ///< Entry represents a occupied item (object)
|
||||
};
|
||||
|
||||
struct Entry
|
||||
{
|
||||
PDFObjectReference reference;
|
||||
PDFInteger offset = -1;
|
||||
EntryType type = EntryType::Free;
|
||||
};
|
||||
|
||||
/// Tries to read reference table from the byte array. If error occurs, then exception
|
||||
/// is raised. This fuction also checks redundant entries.
|
||||
/// \param context Current parsing context
|
||||
/// \param byteArray Input byte array (containing the PDF file)
|
||||
/// \param startTableOffset Offset of first reference table
|
||||
void readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset);
|
||||
|
||||
private:
|
||||
/// Reference table entries
|
||||
std::vector<Entry> m_entries;
|
||||
|
||||
/// Trailer dictionary
|
||||
PDFObject m_trailerDictionary;
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFXREFTABLE_H
|
|
@ -0,0 +1,48 @@
|
|||
#-------------------------------------------------
|
||||
#
|
||||
# Project created by QtCreator 2018-11-18T16:50:12
|
||||
#
|
||||
#-------------------------------------------------
|
||||
|
||||
QT += core gui
|
||||
|
||||
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
|
||||
|
||||
TARGET = PdfForQtViewer
|
||||
TEMPLATE = app
|
||||
|
||||
# The following define makes your compiler emit warnings if you use
|
||||
# any feature of Qt which has been marked as deprecated (the exact warnings
|
||||
# depend on your compiler). Please consult the documentation of the
|
||||
# deprecated API in order to know how to port your code away from it.
|
||||
DEFINES += QT_DEPRECATED_WARNINGS
|
||||
|
||||
# You can also make your code fail to compile if you use deprecated APIs.
|
||||
# In order to do so, uncomment the following line.
|
||||
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||
|
||||
QMAKE_CXXFLAGS += /std:c++latest
|
||||
|
||||
INCLUDEPATH += $$PWD/../PDFForQtLib/Sources
|
||||
|
||||
DESTDIR = $$OUT_PWD/..
|
||||
|
||||
LIBS += -L$$OUT_PWD/..
|
||||
|
||||
LIBS += -lPDFForQtLib
|
||||
|
||||
SOURCES += \
|
||||
main.cpp \
|
||||
pdfviewermainwindow.cpp
|
||||
|
||||
HEADERS += \
|
||||
pdfviewermainwindow.h
|
||||
|
||||
FORMS += \
|
||||
pdfviewermainwindow.ui
|
||||
|
||||
# Default rules for deployment.
|
||||
qnx: target.path = /tmp/$${TARGET}/bin
|
||||
else: unix:!android: target.path = /opt/$${TARGET}/bin
|
||||
!isEmpty(target.path): INSTALLS += target
|
|
@ -0,0 +1,12 @@
|
|||
#include "pdfviewermainwindow.h"
|
||||
|
||||
#include <QApplication>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
QApplication a(argc, argv);
|
||||
pdfviewer::PDFViewerMainWindow w;
|
||||
w.show();
|
||||
|
||||
return a.exec();
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#include "pdfviewermainwindow.h"
|
||||
#include "ui_pdfviewermainwindow.h"
|
||||
|
||||
#include "pdfdocumentreader.h"
|
||||
|
||||
#include <QFileDialog>
|
||||
#include <QMessageBox>
|
||||
|
||||
namespace pdfviewer
|
||||
{
|
||||
|
||||
PDFViewerMainWindow::PDFViewerMainWindow(QWidget *parent) :
|
||||
QMainWindow(parent),
|
||||
ui(new Ui::PDFViewerMainWindow)
|
||||
{
|
||||
ui->setupUi(this);
|
||||
|
||||
connect(ui->actionOpen, &QAction::triggered, this, &PDFViewerMainWindow::onActionOpenTriggered);
|
||||
}
|
||||
|
||||
PDFViewerMainWindow::~PDFViewerMainWindow()
|
||||
{
|
||||
delete ui;
|
||||
}
|
||||
|
||||
void PDFViewerMainWindow::onActionOpenTriggered()
|
||||
{
|
||||
QString fileName = QFileDialog::getOpenFileName(this, tr("Select PDF document"), "K:/Programming/PDF/testpdf", tr("PDF document (*.pdf)"));
|
||||
if (!fileName.isEmpty())
|
||||
{
|
||||
pdf::PDFDocumentReader reader;
|
||||
pdf::PDFDocument document = reader.readFromFile(fileName);
|
||||
|
||||
if (reader.isSuccessfull())
|
||||
{
|
||||
QMessageBox::information(this, tr("PDF Reader"), tr("Document '%1' was successfully loaded!").arg(fileName));
|
||||
}
|
||||
else
|
||||
{
|
||||
QMessageBox::information(this, tr("PDF Reader"), tr("Document read error: %1").arg(fileName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdfviewer
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef PDFVIEWERMAINWINDOW_H
|
||||
#define PDFVIEWERMAINWINDOW_H
|
||||
|
||||
#include <QMainWindow>
|
||||
|
||||
namespace Ui
|
||||
{
|
||||
class PDFViewerMainWindow;
|
||||
}
|
||||
|
||||
namespace pdfviewer
|
||||
{
|
||||
|
||||
class PDFViewerMainWindow : public QMainWindow
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit PDFViewerMainWindow(QWidget *parent = nullptr);
|
||||
virtual ~PDFViewerMainWindow() override;
|
||||
|
||||
private:
|
||||
void onActionOpenTriggered();
|
||||
|
||||
Ui::PDFViewerMainWindow* ui;
|
||||
};
|
||||
|
||||
} // namespace pdfviewer
|
||||
|
||||
#endif // PDFVIEWERMAINWINDOW_H
|
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ui version="4.0">
|
||||
<class>PDFViewerMainWindow</class>
|
||||
<widget class="QMainWindow" name="PDFViewerMainWindow">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>400</width>
|
||||
<height>300</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
<string>PDFViewerMainWindow</string>
|
||||
</property>
|
||||
<widget class="QWidget" name="centralWidget"/>
|
||||
<widget class="QMenuBar" name="menuBar">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>400</width>
|
||||
<height>21</height>
|
||||
</rect>
|
||||
</property>
|
||||
<widget class="QMenu" name="menuFile">
|
||||
<property name="title">
|
||||
<string>File</string>
|
||||
</property>
|
||||
<addaction name="actionOpen"/>
|
||||
</widget>
|
||||
<addaction name="menuFile"/>
|
||||
</widget>
|
||||
<widget class="QToolBar" name="mainToolBar">
|
||||
<attribute name="toolBarArea">
|
||||
<enum>TopToolBarArea</enum>
|
||||
</attribute>
|
||||
<attribute name="toolBarBreak">
|
||||
<bool>false</bool>
|
||||
</attribute>
|
||||
</widget>
|
||||
<widget class="QStatusBar" name="statusBar"/>
|
||||
<action name="actionOpen">
|
||||
<property name="text">
|
||||
<string>Open</string>
|
||||
</property>
|
||||
<property name="shortcut">
|
||||
<string>Ctrl+O</string>
|
||||
</property>
|
||||
</action>
|
||||
</widget>
|
||||
<layoutdefault spacing="6" margin="11"/>
|
||||
<resources/>
|
||||
<connections/>
|
||||
</ui>
|
|
@ -1,3 +1,21 @@
|
|||
# Copyright (C) 2018 Jakub Melka
|
||||
#
|
||||
# This file is part of PdfForQt.
|
||||
#
|
||||
# PdfForQt is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# PdfForQt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
QT += testlib
|
||||
QT -= gui
|
||||
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
# Copyright (C) 2018 Jakub Melka
|
||||
#
|
||||
# This file is part of PdfForQt.
|
||||
#
|
||||
# PdfForQt is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# PdfForQt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
QT += testlib
|
||||
QT -= gui
|
||||
|
||||
CONFIG += qt console warn_on depend_includepath testcase
|
||||
CONFIG -= app_bundle
|
||||
|
||||
TEMPLATE = app
|
||||
|
||||
INCLUDEPATH += $$PWD/../PDFForQtLib/Sources
|
||||
|
||||
DESTDIR = $$OUT_PWD/..
|
||||
|
||||
LIBS += -L$$OUT_PWD/..
|
||||
|
||||
LIBS += -lPDFForQtLib
|
||||
|
||||
QMAKE_CXXFLAGS += /std:c++latest
|
||||
|
||||
SOURCES += \
|
||||
tst_lexicalanalyzertest.cpp
|
Loading…
Reference in New Issue