mirror of https://github.com/JakubMelka/PDF4QT.git
Encoding tables
This commit is contained in:
parent
bc8617751e
commit
2e805b198c
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 4.7.2, 2018-11-30T19:34:52. -->
|
||||
<!-- Written by QtCreator 4.7.2, 2018-12-02T11:05:49. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
|
@ -67,7 +67,7 @@
|
|||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop Qt 5.11.2 MSVC2017 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">qt.qt5.5112.win64_msvc2017_64_kit</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">1</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
|
@ -296,7 +296,7 @@
|
|||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">UnitTests/UnitTests.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/UnitTests/..</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/UnitTests/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
|
@ -353,7 +353,7 @@
|
|||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">PdfForQtViewer/PdfForQtViewer.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_debug/PdfForQtViewer/..</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">K:/Programming/PDF/PDF_For_Qt/bin_release/PdfForQtViewer/..</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
|
|
|
@ -41,7 +41,8 @@ SOURCES += \
|
|||
sources/pdfdocument.cpp \
|
||||
sources/pdfdocumentreader.cpp \
|
||||
sources/pdfxreftable.cpp \
|
||||
sources/pdfvisitor.cpp
|
||||
sources/pdfvisitor.cpp \
|
||||
sources/pdfencoding.cpp
|
||||
|
||||
HEADERS += \
|
||||
sources/pdfobject.h \
|
||||
|
@ -52,7 +53,8 @@ HEADERS += \
|
|||
sources/pdfdocumentreader.h \
|
||||
sources/pdfxreftable.h \
|
||||
sources/pdfflatmap.h \
|
||||
sources/pdfvisitor.h
|
||||
sources/pdfvisitor.h \
|
||||
sources/pdfencoding.h
|
||||
|
||||
unix {
|
||||
target.path = /usr/lib
|
||||
|
@ -63,4 +65,4 @@ unix {
|
|||
CONFIG += force_debug_info
|
||||
|
||||
|
||||
QMAKE_CXXFLAGS += /std:c++latest
|
||||
QMAKE_CXXFLAGS += /std:c++latest /utf-8
|
||||
|
|
|
@ -17,4 +17,147 @@
|
|||
|
||||
|
||||
#include "pdfdocument.h"
|
||||
#include "pdfparser.h"
|
||||
#include "pdfencoding.h"
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
// Entries for "Info" entry in trailer dictionary
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY = "Info";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TITLE = "Title";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_AUTHOR = "Author";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_SUBJECT = "Subject";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_KEYWORDS = "Keywords";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_CREATOR = "Creator";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_PRODUCER = "Producer";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_CREATION_DATE = "CreationDate";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_MODIFIED_DATE = "ModDate";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED = "Trapped";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_TRUE = "True";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_FALSE = "False";
|
||||
static constexpr const char* PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN = "Unknown";
|
||||
|
||||
void PDFDocument::init()
|
||||
{
|
||||
initInfo();
|
||||
}
|
||||
|
||||
void PDFDocument::initInfo()
|
||||
{
|
||||
const PDFObject& trailerDictionary = m_pdfObjectStorage.getTrailerDictionary();
|
||||
|
||||
// Trailer object should be dictionary here. It is verified in the document reader.
|
||||
Q_ASSERT(trailerDictionary.isDictionary());
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
Q_ASSERT(dictionary);
|
||||
|
||||
if (dictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY))
|
||||
{
|
||||
const PDFObject& info = getObject(dictionary->get(PDF_DOCUMENT_INFO_ENTRY));
|
||||
|
||||
if (info.isDictionary())
|
||||
{
|
||||
const PDFDictionary* infoDictionary = info.getDictionary();
|
||||
Q_ASSERT(infoDictionary);
|
||||
|
||||
auto readTextString = [this, infoDictionary](const char* entry, QString& fillEntry)
|
||||
{
|
||||
if (infoDictionary->hasKey(entry))
|
||||
{
|
||||
const PDFObject& stringObject = getObject(infoDictionary->get(entry));
|
||||
if (stringObject.isString())
|
||||
{
|
||||
// We have succesfully read the string, convert it according to encoding
|
||||
fillEntry = PDFEncoding::convertTextString(stringObject.getString());
|
||||
}
|
||||
else if (!stringObject.isNull())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary. String expected."));
|
||||
}
|
||||
}
|
||||
};
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_TITLE, m_info.title);
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_AUTHOR, m_info.author);
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_SUBJECT, m_info.subject);
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_KEYWORDS, m_info.keywords);
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_CREATOR, m_info.creator);
|
||||
readTextString(PDF_DOCUMENT_INFO_ENTRY_PRODUCER, m_info.producer);
|
||||
|
||||
auto readDate= [this, infoDictionary](const char* entry, QDateTime& fillEntry)
|
||||
{
|
||||
if (infoDictionary->hasKey(entry))
|
||||
{
|
||||
const PDFObject& stringObject = getObject(infoDictionary->get(entry));
|
||||
if (stringObject.isString())
|
||||
{
|
||||
// We have succesfully read the string, convert it to date time
|
||||
fillEntry = PDFEncoding::convertToDateTime(stringObject.getString());
|
||||
|
||||
if (!fillEntry.isValid())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary. String with date time format expected."));
|
||||
}
|
||||
}
|
||||
else if (!stringObject.isNull())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary. String with date time format expected."));
|
||||
}
|
||||
}
|
||||
};
|
||||
readDate(PDF_DOCUMENT_INFO_ENTRY_CREATION_DATE, m_info.creationDate);
|
||||
readDate(PDF_DOCUMENT_INFO_ENTRY_MODIFIED_DATE, m_info.modifiedDate);
|
||||
|
||||
if (infoDictionary->hasKey(PDF_DOCUMENT_INFO_ENTRY_TRAPPED))
|
||||
{
|
||||
const PDFObject& nameObject = getObject(infoDictionary->get(PDF_DOCUMENT_INFO_ENTRY_TRAPPED));
|
||||
if (nameObject.isName())
|
||||
{
|
||||
const QByteArray& name = nameObject.getString();
|
||||
if (name == PDF_DOCUMENT_INFO_ENTRY_TRAPPED_TRUE)
|
||||
{
|
||||
m_info.trapped = Info::Trapped::True;
|
||||
}
|
||||
else if (name == PDF_DOCUMENT_INFO_ENTRY_TRAPPED_FALSE)
|
||||
{
|
||||
m_info.trapped = Info::Trapped::False;
|
||||
}
|
||||
else if (name == PDF_DOCUMENT_INFO_ENTRY_TRAPPED_UNKNOWN)
|
||||
{
|
||||
m_info.trapped = Info::Trapped::Unknown;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary. Trapping information expected"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary. Trapping information expected"));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!info.isNull()) // Info may be invalid...
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of document info entry in trailer dictionary."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const
|
||||
{
|
||||
if (reference.objectNumber >= 0 &&
|
||||
reference.objectNumber < static_cast<PDFInteger>(m_objects.size()) &&
|
||||
m_objects[reference.objectNumber].generation == reference.generation)
|
||||
{
|
||||
return m_objects[reference.objectNumber].object;
|
||||
}
|
||||
else
|
||||
{
|
||||
static const PDFObject dummy;
|
||||
return dummy;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
#include "pdfglobal.h"
|
||||
#include "pdfobject.h"
|
||||
|
||||
#include <QtCore>
|
||||
#include <QDateTime>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
|
@ -56,6 +59,10 @@ public:
|
|||
|
||||
}
|
||||
|
||||
/// Returns object from the object storage. If invalid reference is passed,
|
||||
/// then null object is returned (no exception is thrown).
|
||||
const PDFObject& getObject(PDFObjectReference reference) const;
|
||||
|
||||
/// Returns array of objects stored in this storage
|
||||
const PDFObjects& getObjects() const { return m_objects; }
|
||||
|
||||
|
@ -70,24 +77,82 @@ private:
|
|||
/// PDF document main class.
|
||||
class PDFDocument
|
||||
{
|
||||
Q_DECLARE_TR_FUNCTIONS(pdf::PDFDocument)
|
||||
|
||||
public:
|
||||
explicit PDFDocument() = default;
|
||||
|
||||
const PDFObjectStorage& getStorage() const { return m_pdfObjectStorage; }
|
||||
|
||||
/// Info about the document. Title, Author, Keywords...
|
||||
struct Info
|
||||
{
|
||||
/// Indicates, that document was modified that it includes trapping information.
|
||||
/// See PDF Reference 1.7, Section 10.10.5 "Trapping Support".
|
||||
enum class Trapped
|
||||
{
|
||||
True, ///< Fully trapped
|
||||
False, ///< Not yet trapped
|
||||
Unknown ///< Either unknown, or it has been trapped partly, not fully
|
||||
};
|
||||
|
||||
QString title;
|
||||
QString author;
|
||||
QString subject;
|
||||
QString keywords;
|
||||
QString creator;
|
||||
QString producer;
|
||||
QDateTime creationDate;
|
||||
QDateTime modifiedDate;
|
||||
Trapped trapped = Trapped::Unknown;
|
||||
};
|
||||
|
||||
/// Returns info about the document (title, author, etc.)
|
||||
const Info* getInfo() const { return &m_info; }
|
||||
|
||||
private:
|
||||
friend class PDFDocumentReader;
|
||||
|
||||
explicit PDFDocument(PDFObjectStorage&& storage) :
|
||||
m_pdfObjectStorage(std::move(storage))
|
||||
{
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
/// Initialize data based on object in the storage.
|
||||
/// Can throw exception if error is detected.
|
||||
void init();
|
||||
|
||||
/// Initialize the document info from the trailer dictionary.
|
||||
/// If document info is not present, then default document
|
||||
/// info is used. If error is detected, exception is thrown.
|
||||
void initInfo();
|
||||
|
||||
/// If object is reference, the dereference attempt is performed
|
||||
/// and object is returned. If it is not a reference, then self
|
||||
/// is returned. If dereference attempt fails, then null object
|
||||
/// is returned (no exception is thrown).
|
||||
const PDFObject& getObject(const PDFObject& object) const;
|
||||
|
||||
/// Storage of objects
|
||||
PDFObjectStorage m_pdfObjectStorage;
|
||||
|
||||
/// Info about the PDF document
|
||||
Info m_info;
|
||||
};
|
||||
|
||||
inline
|
||||
const PDFObject& PDFDocument::getObject(const PDFObject& object) const
|
||||
{
|
||||
if (object.isReference())
|
||||
{
|
||||
// Try to dereference the object
|
||||
return m_pdfObjectStorage.getObject(object.getReference());
|
||||
}
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFDOCUMENT_H
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,90 @@
|
|||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef PDFENCODING_H
|
||||
#define PDFENCODING_H
|
||||
|
||||
#include <QString>
|
||||
#include <QDateTime>
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
namespace encoding
|
||||
{
|
||||
using EncodingTable = std::array<QChar, 256>;
|
||||
}
|
||||
|
||||
/// This class can convert byte stream to the QString in unicode encoding.
|
||||
/// PDF has several encodings, see PDF Reference 1.7, Appendix D.
|
||||
class PDFEncoding
|
||||
{
|
||||
public:
|
||||
explicit PDFEncoding() = delete;
|
||||
|
||||
enum class Encoding
|
||||
{
|
||||
Standard, ///< Appendix D, Section D.1, StandardEncoding
|
||||
MacRoman, ///< Appendix D, Section D.1, MacRomanEncoding
|
||||
WinAnsi, ///< Appendix D, Section D.1, WinAnsiEncoding
|
||||
PDFDoc, ///< Appendix D, Section D.1/D.2, PDFDocEncoding
|
||||
MacExpert, ///< Appendix D, Section D.3, MacExpertEncoding
|
||||
Symbol, ///< Appendix D, Section D.4, Symbol Set and Encoding
|
||||
ZapfDingbats ///< Appendix D, Section D.5, Zapf Dingbats Encoding
|
||||
};
|
||||
|
||||
/// Converts byte array to the unicode string using specified encoding
|
||||
/// \param stream Stream (byte array string) to be processed
|
||||
/// \param encoding Encoding used to convert to unicode string
|
||||
/// \returns Converted unicode string
|
||||
static QString convert(const QByteArray& stream, Encoding encoding);
|
||||
|
||||
/// Convert text string to the unicode string, using either PDFDocEncoding,
|
||||
/// or UTF-16BE encoding. Please see PDF Reference 1.7, Chapter 3.8.1. If
|
||||
/// UTF-16BE encoding is used, then leading bytes should be 0xFE and 0xFF
|
||||
/// \param Stream
|
||||
/// \returns Converted unicode string
|
||||
static QString convertTextString(const QByteArray& stream);
|
||||
|
||||
/// Converts byte array from UTF-16BE encoding to QString with same encoding.
|
||||
/// \param Stream
|
||||
/// \returns Converted unicode string
|
||||
static QString convertFromUnicode(const QByteArray& stream);
|
||||
|
||||
/// Convert stream to date time according to PDF Reference 1.7, Chapter 3.8.1.
|
||||
/// If date cannot be converted (string is invalid), then invalid QDateTime
|
||||
/// is returned.
|
||||
/// \param stream Stream, from which date/time is read
|
||||
static QDateTime convertToDateTime(const QByteArray& stream);
|
||||
|
||||
private:
|
||||
/// Returns conversion table for particular encoding
|
||||
/// \param encoding Encoding
|
||||
static const encoding::EncodingTable* getTableForEncoding(Encoding encoding);
|
||||
|
||||
/// Returns true, if byte array has UTF-16BE unicode marking bytes at the
|
||||
/// stream start. If they are present, then byte stream is probably encoded
|
||||
/// as unicode.
|
||||
/// \param stream Stream to be tested
|
||||
static bool hasUnicodeLeadMarkings(const QByteArray& stream);
|
||||
};
|
||||
|
||||
} // namespace pdf
|
||||
|
||||
#endif // PDFENCODING_H
|
|
@ -22,7 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
|||
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||
|
||||
QMAKE_CXXFLAGS += /std:c++latest
|
||||
QMAKE_CXXFLAGS += /std:c++latest /utf-8
|
||||
|
||||
INCLUDEPATH += $$PWD/../PDFForQtLib/Sources
|
||||
|
||||
|
|
|
@ -103,6 +103,7 @@ void LexicalAnalyzerTest::test_strings()
|
|||
testTokens("(Text with special character: \\))", { Token(Type::String, QByteArray("Text with special character: )")) });
|
||||
testTokens("(Text with special character: \\\\)", { Token(Type::String, QByteArray("Text with special character: \\")) });
|
||||
testTokens("(\53)", { Token(Type::String, QByteArray("+")) });
|
||||
testTokens("(\376\377)", { Token(Type::String, QByteArray("\376\377")) });
|
||||
testTokens("(\0533)", { Token(Type::String, QByteArray("+3")) });
|
||||
testTokens("(\053)", { Token(Type::String, QByteArray("+")) });
|
||||
testTokens("(\053053)", { Token(Type::String, QByteArray("+053")) });
|
||||
|
|
Loading…
Reference in New Issue