Handling encrypted documents - basics

This commit is contained in:
Jakub Melka 2019-08-04 18:26:15 +02:00
parent 1d3e6231ec
commit 0a6e7bb866
5 changed files with 385 additions and 3 deletions

View File

@ -42,6 +42,7 @@ SOURCES += \
sources/pdfparser.cpp \
sources/pdfdocument.cpp \
sources/pdfdocumentreader.cpp \
sources/pdfsecurityhandler.cpp \
sources/pdfutils.cpp \
sources/pdfxreftable.cpp \
sources/pdfvisitor.cpp \
@ -70,6 +71,7 @@ HEADERS += \
sources/pdfconstants.h \
sources/pdfdocument.h \
sources/pdfdocumentreader.h \
sources/pdfsecurityhandler.h \
sources/pdfxreftable.h \
sources/pdfflatmap.h \
sources/pdfvisitor.h \

View File

@ -22,6 +22,7 @@
#include "pdfglobal.h"
#include "pdfobject.h"
#include "pdfcatalog.h"
#include "pdfsecurityhandler.h"
#include <QtCore>
#include <QDateTime>
@ -54,9 +55,10 @@ public:
using PDFObjects = std::vector<Entry>;
explicit PDFObjectStorage(PDFObjects&& objects, PDFObject&& trailerDictionary) :
explicit PDFObjectStorage(PDFObjects&& objects, PDFObject&& trailerDictionary, PDFSecurityHandlerPointer&& securityHandler) :
m_objects(std::move(objects)),
m_trailerDictionary(std::move(trailerDictionary))
m_trailerDictionary(std::move(trailerDictionary)),
m_securityHandler(std::move(securityHandler))
{
}
@ -71,9 +73,13 @@ public:
/// Returns trailer dictionary
const PDFObject& getTrailerDictionary() const { return m_trailerDictionary; }
/// Returns security handler associated with these objects
const PDFSecurityHandler* getSecurityHandler() const { return m_securityHandler.data(); }
private:
PDFObjects m_objects;
PDFObject m_trailerDictionary;
PDFSecurityHandlerPointer m_securityHandler;
};
/// Loads data from the object contained in the PDF document, such as integers,

View File

@ -267,6 +267,33 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
// Now, we are ready to scan all objects
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
// ------------------------------------------------------------------------------------------
// SECURITY - handle encrypted documents
// ------------------------------------------------------------------------------------------
const PDFObject& trailerDictionaryObject = xrefTable.getTrailerDictionary();
if (!trailerDictionaryObject.isDictionary())
{
throw PDFParserException(tr("Invalid trailer dictionary."));
}
const PDFDictionary* trailerDictionary = trailerDictionaryObject.getDictionary();
PDFObject encryptObject = trailerDictionary->get("Encrypt");
if (encryptObject.isReference())
{
PDFObjectReference encryptObjectReference = encryptObject.getReference();
if (encryptObjectReference.objectNumber < objects.size() && objects[encryptObjectReference.objectNumber].generation == encryptObjectReference.generation)
{
encryptObject = objects[encryptObjectReference.objectNumber].object;
}
}
// Read the security handler
PDFSecurityHandlerPointer securityHandler = PDFSecurityHandler::createSecurityHandler(encryptObject);
// ------------------------------------------------------------------------------------------
// SECURITY - security handler created
// ------------------------------------------------------------------------------------------
// Then process object streams
std::vector<PDFXRefTable::Entry> objectStreamEntries = xrefTable.getObjectStreamEntries();
std::set<PDFObjectReference> objectStreams;
@ -369,7 +396,7 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
// Now, we are ready to scan all object streams
std::for_each(std::execution::parallel_policy(), objectStreams.cbegin(), objectStreams.cend(), processObjectStream);
PDFObjectStorage storage(std::move(objects), PDFObject(xrefTable.getTrailerDictionary()));
PDFObjectStorage storage(std::move(objects), PDFObject(xrefTable.getTrailerDictionary()), std::move(securityHandler));
return PDFDocument(std::move(storage));
}
catch (PDFParserException parserException)

View File

@ -0,0 +1,221 @@
// Copyright (C) 2019 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfsecurityhandler.h"
#include "pdfexception.h"
#include <openssl/rc4.h>
namespace pdf
{
PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObject& encryptionDictionaryObject)
{
if (encryptionDictionaryObject.isNull())
{
return PDFSecurityHandlerPointer(new PDFNoneSecurityHandler());
}
if (!encryptionDictionaryObject.isDictionary())
{
throw PDFParserException(PDFTranslationContext::tr("Invalid encryption dictionary."));
}
const PDFDictionary* dictionary = encryptionDictionaryObject.getDictionary();
auto getName = [](const PDFDictionary* dictionary, const char* key, bool required, const char* defaultValue = nullptr) -> QByteArray
{
const PDFObject& nameObject = dictionary->get(key);
if (nameObject.isNull())
{
return defaultValue ? QByteArray(defaultValue) : QByteArray();
}
if (!nameObject.isName())
{
if (required)
{
throw PDFParserException(PDFTranslationContext::tr("Invalid value for entry '%1' in encryption dictionary. Name expected.").arg(QString::fromLatin1(key)));
}
return defaultValue ? QByteArray(defaultValue) : QByteArray();
}
return nameObject.getString();
};
auto getInt = [](const PDFDictionary* dictionary, const char* key, bool required, PDFInteger defaultValue = -1) -> PDFInteger
{
const PDFObject& intObject = dictionary->get(key);
if (!intObject.isInt())
{
if (required)
{
throw PDFParserException(PDFTranslationContext::tr("Invalid value for entry '%1' in encryption dictionary. Integer expected.").arg(QString::fromLatin1(key)));
}
return defaultValue;
}
return intObject.getInteger();
};
QByteArray filterName = getName(dictionary, "Filter", true);
if (filterName != "Standard")
{
throw PDFParserException(PDFTranslationContext::tr("Unknown security handler."));
}
const int V = getInt(dictionary, "V", true);
// Check V
if (V < 1 || V > 5)
{
throw PDFParserException(PDFTranslationContext::tr("Unsupported version of document encryption (V = %1).").arg(V));
}
// Only valid for V == 2 or V == 3, otherwise we set file encryption key length manually
int Length = 40;
switch (V)
{
case 1:
Length = 40;
break;
case 2:
case 3:
Length = getInt(dictionary, "Length", false, 40);
break;
case 4:
Length = 128;
break;
case 5:
Length = 256;
break;
default:
Q_ASSERT(false);
break;
}
// Create standard security handler
PDFStandardSecurityHandler handler;
handler.m_V = V;
handler.m_keyLength = Length;
// Add "Identity" filter to the filters
CryptFilter identityFilter;
identityFilter.type = CryptFilterType::Identity;
handler.m_cryptFilters["Identity"] = identityFilter;
if (V == 4 || V == 5)
{
const PDFObject& cryptFilterObjects = dictionary->get("CF");
if (cryptFilterObjects.isDictionary())
{
auto parseCryptFilter = [&getName](const PDFObject& object) -> CryptFilter
{
if (!object.isDictionary())
{
throw PDFParserException(PDFTranslationContext::tr("Crypt filter is not a dictionary!"));
}
const PDFDictionary* cryptFilterDictionary = object.getDictionary();
CryptFilter filter;
QByteArray CFMName = getName(cryptFilterDictionary, "CFM", false, "None");
if (CFMName == "None")
{
filter.type = CryptFilterType::None;
}
else if (CFMName == "V2")
{
filter.type = CryptFilterType::V2;
}
else if (CFMName == "AESV2")
{
filter.type = CryptFilterType::AESV2;
}
else if (CFMName == "AESV3")
{
filter.type = CryptFilterType::AESV3;
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Unsupported encryption algorithm '%1'.").arg(QString::fromLatin1(CFMName)));
}
QByteArray authEventName = getName(cryptFilterDictionary, "AuthEvent", false, "DocOpen");
if (authEventName == "DocOpen")
{
filter.authEvent = AuthEvent::DocOpen;
}
else if (authEventName == "EFOpen")
{
filter.authEvent = AuthEvent::EFOpen;
}
else
{
throw PDFParserException(PDFTranslationContext::tr("Unsupported authorization event '%1'.").arg(QString::fromLatin1(authEventName)));
}
return filter;
};
const PDFDictionary* cryptFilters = cryptFilterObjects.getDictionary();
for (size_t i = 0, cryptFilterCount = cryptFilters->getCount(); i < cryptFilterCount; ++i)
{
handler.m_cryptFilters[cryptFilters->getKey(i)] = parseCryptFilter(cryptFilters->getValue(i));
}
}
// Now, add standard filters
auto resolveFilter = [&handler](const QByteArray& name)
{
auto it = handler.m_cryptFilters.find(name);
if (it == handler.m_cryptFilters.cend())
{
throw PDFParserException(PDFTranslationContext::tr("Uknown crypt filter '%1'.").arg(QString::fromLatin1(name)));
}
return it->second;
};
handler.m_filterStreams = resolveFilter(getName(dictionary, "StmF", false, "Identity"));
handler.m_filterStrings = resolveFilter(getName(dictionary, "StrF", false, "Identity"));
if (dictionary->hasKey("EFF"))
{
handler.m_filterEmbeddedFiles = resolveFilter(getName(dictionary, "EFF", true));
}
else
{
// According to the PDF specification, if 'EFF' entry is omitted, then filter
// for streams is used.
handler.m_filterEmbeddedFiles = handler.m_filterStreams;
}
}
return PDFSecurityHandlerPointer(new PDFStandardSecurityHandler(qMove(handler)));
}
} // namespace pdf

View File

@ -0,0 +1,126 @@
// Copyright (C) 2019 Jakub Melka
//
// This file is part of PdfForQt.
//
// PdfForQt is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PdfForQt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#ifndef PDFSECURITYHANDLER_H
#define PDFSECURITYHANDLER_H
#include "pdfglobal.h"
#include "pdfobject.h"
#include <QByteArray>
#include <QSharedPointer>
#include <map>
namespace pdf
{
enum class EncryptionMode
{
None, ///< Document is not encrypted
Standard, ///< Document is encrypted and using standard security handler
Custom ///< Document is encrypted and using custom security handler. Custom handlers must return this value.
};
enum class CryptFilterType
{
None, ///< The application shall decrypt the data using the security handler
V2, ///< Use file encryption key for RC4 algorithm
AESV2, ///< Use file encryption key for AES algorithm
AESV3, ///< Use file encryption key for AES 256 bit algorithm
Identity, ///< Don't decrypt anything, use identity function
};
enum class AuthEvent
{
DocOpen, ///< Authorize on document open
EFOpen ///< Authorize when accessing embedded file stream
};
enum class CryptFilterApplication
{
String, ///< Apply filter to decrypt/encrypt strings
Stream, ///< Apply filter to decrypt/encrypt streams
EmbeddedFile ///< Apply filter to decrypt/encrypt embedded file streams
};
struct CryptFilter
{
CryptFilterType type = CryptFilterType::None;
AuthEvent authEvent = AuthEvent::DocOpen;
};
class PDFSecurityHandler;
using PDFSecurityHandlerPointer = QSharedPointer<PDFSecurityHandler>;
class PDFSecurityHandler
{
public:
explicit PDFSecurityHandler() = default;
virtual ~PDFSecurityHandler() = default;
virtual EncryptionMode getMode() const = 0;
/// Creates a security handler from the object. If object is null, then
/// "None" security handler is created. If error occurs, then exception is thrown.
/// \param encryptionDictionaryObject Encryption dictionary object
static PDFSecurityHandlerPointer createSecurityHandler(const PDFObject& encryptionDictionaryObject);
private:
/// Version of the encryption, shall be a number from 1 to 5, according the
/// PDF specification. Other values are invalid.
int m_V = 0;
/// Length of the key to encrypt/decrypt the document in bits. Only valid
/// for V = 2 or V = 3, otherwise it is invalid.
int m_keyLength = 40;
/// Map containing crypt filters.
std::map<QByteArray, CryptFilter> m_cryptFilters;
/// Crypt filter for decrypting strings
CryptFilter m_filterStrings;
/// Crypt filter for decrypting streams
CryptFilter m_filterStreams;
/// Crypt filter for decrypting embedded files
CryptFilter m_filterEmbeddedFiles;
};
/// Specifies the security of unencrypted document
class PDFNoneSecurityHandler : public PDFSecurityHandler
{
public:
virtual EncryptionMode getMode() const { return EncryptionMode::None; }
};
/// Specifies the security using standard security handler (see PDF specification
/// for details).
class PDFStandardSecurityHandler : public PDFSecurityHandler
{
public:
virtual EncryptionMode getMode() const { return EncryptionMode::Standard; }
private:
};
} // namespace pdf
#endif // PDFSECURITYHANDLER_H