Decryption of document data

This commit is contained in:
Jakub Melka 2019-08-13 11:45:36 +02:00
parent cfc9593d14
commit 2623ef6825
5 changed files with 428 additions and 5 deletions

View File

@ -268,6 +268,12 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
// Now, we are ready to scan all objects
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry);
if (m_result != Result::OK)
{
// Do not proceed further, if document loading failed
return PDFDocument();
}
// ------------------------------------------------------------------------------------------
// SECURITY - handle encrypted documents
// ------------------------------------------------------------------------------------------
@ -304,9 +310,11 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
}
}
PDFObjectReference encryptObjectReference;
PDFObject encryptObject = trailerDictionary->get("Encrypt");
if (encryptObject.isReference())
{
encryptObjectReference = encryptObject.getReference();
PDFObjectReference encryptObjectReference = encryptObject.getReference();
if (static_cast<size_t>(encryptObjectReference.objectNumber) < objects.size() && objects[encryptObjectReference.objectNumber].generation == encryptObjectReference.generation)
{
@ -330,6 +338,33 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer)
throw PDFParserException(PDFTranslationContext::tr("Authorization failed. Bad password provided."));
}
// Now, decrypt the document, if we are authorized. We must also check, if we have to decrypt the object.
// According to the PDF specification, following items are ommited from encryption:
// 1) Values for ID entry in the trailer dictionary
// 2) Any strings in Encrypt dictionary
// 3) String/streams in object streams (entire object streams are encrypted)
// 4) Hexadecimal strings in Content key in signature dictionary
//
// Trailer dictionary is not decrypted, because PDF specification provides no algorithm to decrypt it,
// because it needs object number and generation for generating the decrypt key. So 1) is handled
// automatically. 2) is handled in the code below. 3) is handled also automatically, because we do not
// decipher object streams here. 4) must be handled in the security handler.
if (securityHandler->getMode() != EncryptionMode::None)
{
auto decryptEntry = [encryptObjectReference, &securityHandler, &objects](const PDFXRefTable::Entry& entry)
{
if (encryptObjectReference.objectNumber != 0 && encryptObjectReference == entry.reference)
{
// 2) - Encrypt dictionary
return;
}
objects[entry.reference.objectNumber].object = securityHandler->decryptObject(objects[entry.reference.objectNumber].object, entry.reference);
};
std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), decryptEntry);
}
// ------------------------------------------------------------------------------------------
// SECURITY - security handler created
// ------------------------------------------------------------------------------------------

View File

@ -197,6 +197,7 @@ class PDFArray : public PDFObjectContent
{
public:
inline constexpr PDFArray() = default;
inline PDFArray(std::vector<PDFObject>&& objects) : m_objects(qMove(objects)) { }
virtual ~PDFArray() override = default;
virtual bool equals(const PDFObjectContent* other) const override;
@ -227,11 +228,11 @@ private:
/// map, because dictionaries are usually small.
class PDFDictionary : public PDFObjectContent
{
private:
public:
using DictionaryEntry = std::pair<QByteArray, PDFObject>;
public:
inline constexpr PDFDictionary() = default;
inline PDFDictionary(std::vector<DictionaryEntry>&& dictionary) : m_dictionary(qMove(dictionary)) { }
virtual ~PDFDictionary() override = default;
virtual bool equals(const PDFObjectContent* other) const override;

View File

@ -18,6 +18,8 @@
#include "pdfsecurityhandler.h"
#include "pdfexception.h"
#include "pdfencoding.h"
#include "pdfvisitor.h"
#include "pdfutils.h"
#include <openssl/rc4.h>
#include <openssl/md5.h>
@ -37,6 +39,153 @@ static constexpr std::array<uint8_t, 32> PDFPasswordPadding = {
0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
};
class PDFDecryptObjectVisitor : public PDFAbstractVisitor
{
public:
explicit PDFDecryptObjectVisitor(const PDFSecurityHandler* securityHandler, PDFObjectReference reference) :
m_securityHandler(securityHandler),
m_reference(reference)
{
m_objectStack.reserve(32);
}
virtual void visitNull() override;
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(const PDFString* string) override;
virtual void visitName(const PDFString* name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
virtual void visitReference(const PDFObjectReference reference) override;
PDFObject getDecryptedObject();
private:
const PDFSecurityHandler* m_securityHandler;
std::vector<PDFObject> m_objectStack;
PDFObjectReference m_reference;
};
void PDFDecryptObjectVisitor::visitNull()
{
m_objectStack.push_back(PDFObject::createNull());
}
void PDFDecryptObjectVisitor::visitBool(bool value)
{
m_objectStack.push_back(PDFObject::createBool(value));
}
void PDFDecryptObjectVisitor::visitInt(PDFInteger value)
{
m_objectStack.push_back(PDFObject::createInteger(value));
}
void PDFDecryptObjectVisitor::visitReal(PDFReal value)
{
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFDecryptObjectVisitor::visitString(const PDFString* string)
{
m_objectStack.push_back(PDFObject::createString(std::make_shared<PDFString>(m_securityHandler->decrypt(string->getString(), m_reference, PDFSecurityHandler::EncryptionScope::String))));
}
void PDFDecryptObjectVisitor::visitName(const PDFString* name)
{
m_objectStack.push_back(PDFObject::createName(std::make_shared<PDFString>(*name)));
}
void PDFDecryptObjectVisitor::visitArray(const PDFArray* array)
{
acceptArray(array);
// We have all objects on the stack
Q_ASSERT(array->getCount() <= m_objectStack.size());
auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount());
std::vector<PDFObject> objects(it, m_objectStack.cend());
PDFObject object = PDFObject::createArray(std::make_shared<PDFArray>(qMove(objects)));
m_objectStack.erase(it, m_objectStack.cend());
m_objectStack.push_back(object);
}
void PDFDecryptObjectVisitor::visitDictionary(const PDFDictionary* dictionary)
{
Q_ASSERT(dictionary);
// We must check, if it is or isn't a signature dictionary. If it is,
// then don't decrypt the Content value. We also don't check, if signature
// isn't indirectly referenced by reference. Hope it isn't...
const PDFObject& typeObject = dictionary->get("Type");
bool isSignatureObject = (typeObject.isName() && typeObject.getString() == "Sig");
std::vector<PDFDictionary::DictionaryEntry> entries;
entries.reserve(dictionary->getCount());
for (size_t i = 0, count = dictionary->getCount(); i < count; ++i)
{
if (isSignatureObject && dictionary->getKey(i) == "Contents")
{
entries.emplace_back(dictionary->getKey(i), dictionary->getValue(i));
}
else
{
dictionary->getValue(i).accept(this);
entries.emplace_back(dictionary->getKey(i), m_objectStack.back());
m_objectStack.pop_back();
}
}
m_objectStack.push_back(PDFObject::createDictionary(std::make_shared<PDFDictionary>(qMove(entries))));
}
void PDFDecryptObjectVisitor::visitStream(const PDFStream* stream)
{
// Don't decrypt, if it is a Metadata stream and Metadata encryption is turned off
const PDFDictionary* dictionary = stream->getDictionary();
const PDFObject& typeObject = dictionary->get("Type");
bool isMetadata = (typeObject.isName() && typeObject.getString() == "Metadata");
if (isMetadata && !m_securityHandler->isMetadataEncrypted())
{
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(PDFDictionary(*dictionary), QByteArray(*stream->getContent()))));
return;
}
// Decrypt the dictionary
visitDictionary(dictionary);
PDFObject dictionaryObject = m_objectStack.back();
m_objectStack.pop_back();
// TODO: Handle Crypt filter
PDFDictionary decryptedDictionary(*dictionaryObject.getDictionary());
QByteArray decryptedData = m_securityHandler->decrypt(*stream->getContent(), m_reference, PDFSecurityHandler::EncryptionScope::Stream);
m_objectStack.push_back(PDFObject::createStream(std::make_shared<PDFStream>(qMove(decryptedDictionary), qMove(decryptedData))));
}
void PDFDecryptObjectVisitor::visitReference(const PDFObjectReference reference)
{
m_objectStack.push_back(PDFObject::createReference(reference));
}
PDFObject PDFDecryptObjectVisitor::getDecryptedObject()
{
Q_ASSERT(m_objectStack.size() == 1);
return qMove(m_objectStack.back());
}
PDFObject PDFSecurityHandler::decryptObject(const PDFObject& object, PDFObjectReference reference) const
{
PDFDecryptObjectVisitor visitor(this, reference);
object.accept(&visitor);
return visitor.getDecryptedObject();
}
PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObject& encryptionDictionaryObject, const QByteArray& id)
{
if (encryptionDictionaryObject.isNull())
@ -145,7 +294,7 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj
const PDFObject& cryptFilterObjects = dictionary->get("CF");
if (cryptFilterObjects.isDictionary())
{
auto parseCryptFilter = [&getName](const PDFObject& object) -> CryptFilter
auto parseCryptFilter = [Length, &getName, &getInt](const PDFObject& object) -> CryptFilter
{
if (!object.isDictionary())
{
@ -191,6 +340,8 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj
throw PDFParserException(PDFTranslationContext::tr("Unsupported authorization event '%1'.").arg(QString::fromLatin1(authEventName)));
}
filter.keyLength = getInt(cryptFilterDictionary, "Length", false, Length / 8);
return filter;
};
@ -236,6 +387,10 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj
}
handler.m_R = R;
handler.m_filterDefault.authEvent = AuthEvent::DocOpen;
handler.m_filterDefault.keyLength = Length / 8;
handler.m_filterDefault.type = (R > 4) ? CryptFilterType::AESV3 : CryptFilterType::V2;
auto readByteArray = [dictionary](const char* key, int size)
{
QByteArray result;
@ -427,6 +582,169 @@ PDFSecurityHandler::AuthorizationResult PDFStandardSecurityHandler::authenticate
return AuthorizationResult::Cancelled;
}
QByteArray PDFStandardSecurityHandler::decryptUsingFilter(const QByteArray& data, CryptFilter filter, PDFObjectReference reference) const
{
QByteArray decryptedData;
Q_ASSERT(m_authorizationData.isAuthorized());
struct AES_data
{
QByteArray initializationVector;
QByteArray paddedData;
};
auto prepareAES_data = [](const QByteArray& data)
{
AES_data result;
result.initializationVector = data.left(AES_BLOCK_SIZE);
// This is an error. But to handle it, we resize the vector
// with arbitrary data.
if (result.initializationVector.size() < AES_BLOCK_SIZE)
{
result.initializationVector.resize(AES_BLOCK_SIZE);
}
result.paddedData = data.mid(AES_BLOCK_SIZE);
// Add padding remainder according to the specification
int size = result.paddedData.size();
int paddingRemainder = AES_BLOCK_SIZE - (size % AES_BLOCK_SIZE);
for (int i = 0; i < paddingRemainder; ++i)
{
result.paddedData.push_back(paddingRemainder);
}
return result;
};
switch (filter.type)
{
case CryptFilterType::None: // The application shall decrypt the data using the security handler
{
// This shouldn't occur, because in case the used filter has None value, then default filter
// is used and default filter can't have this value.
Q_ASSERT(false);
break;
}
case CryptFilterType::V2: // Use file encryption key for RC4 algorithm
{
std::vector<uint8_t> inputKeyData = convertByteArrayToVector(m_authorizationData.fileEncryptionKey);
uint32_t objectNumber = qToLittleEndian(static_cast<uint32_t>(reference.objectNumber));
uint32_t generation = qToLittleEndian(static_cast<uint32_t>(reference.generation));
inputKeyData.insert(inputKeyData.cend(), { uint8_t(objectNumber & 0xFF), uint8_t((objectNumber >> 8) && 0xFF), uint8_t((objectNumber >> 16) && 0xFF), uint8_t(generation & 0xFF), uint8_t((generation >> 8) && 0xFF), });
std::vector<uint8_t> objectEncryptionKey(MD5_DIGEST_LENGTH, uint8_t(0));
MD5(inputKeyData.data(), inputKeyData.size(), objectEncryptionKey.data());
// Use up to (n + 5) bytes, maximally 16, from the digest as object encryption key
size_t objectEncryptionKeySize = qMin(filter.keyLength + 5, MD5_DIGEST_LENGTH);
objectEncryptionKey.resize(objectEncryptionKeySize);
decryptedData.resize(data.size());
RC4_KEY key = { };
RC4_set_key(&key, static_cast<int>(objectEncryptionKey.size()), objectEncryptionKey.data());
RC4(&key, data.size(), convertByteArrayToUcharPtr(data), convertByteArrayToUcharPtr(decryptedData));
break;
}
case CryptFilterType::AESV2: // Use file encryption key for AES algorithm
{
std::vector<uint8_t> inputKeyData = convertByteArrayToVector(m_authorizationData.fileEncryptionKey);
uint32_t objectNumber = qToLittleEndian(static_cast<uint32_t>(reference.objectNumber));
uint32_t generation = qToLittleEndian(static_cast<uint32_t>(reference.generation));
inputKeyData.insert(inputKeyData.cend(), { uint8_t(objectNumber & 0xFF), uint8_t((objectNumber >> 8) && 0xFF), uint8_t((objectNumber >> 16) && 0xFF), uint8_t(generation & 0xFF), uint8_t((generation >> 8) && 0xFF), 0x73, 0x41, 0x6C, 0x54 });
std::vector<uint8_t> objectEncryptionKey(MD5_DIGEST_LENGTH, uint8_t(0));
MD5(inputKeyData.data(), inputKeyData.size(), objectEncryptionKey.data());
// For AES algorithm, always use 16 bytes key (128 bit encryption mode)
AES_KEY key = { };
AES_set_decrypt_key(objectEncryptionKey.data(), static_cast<int>(objectEncryptionKey.size()) * 8, &key);
AES_data aes_data = prepareAES_data(data);
if (!aes_data.paddedData.isEmpty())
{
decryptedData.resize(aes_data.paddedData.size());
AES_cbc_encrypt(convertByteArrayToUcharPtr(aes_data.paddedData), convertByteArrayToUcharPtr(decryptedData), aes_data.paddedData.length(), &key, convertByteArrayToUcharPtr(aes_data.initializationVector), AES_DECRYPT);
decryptedData = decryptedData.left(data.length() - AES_BLOCK_SIZE);
}
break;
}
case CryptFilterType::AESV3: // Use file encryption key for AES 256 bit algorithm
{
Q_ASSERT(m_authorizationData.fileEncryptionKey.size() == 32);
AES_KEY key = { };
AES_set_decrypt_key(convertByteArrayToUcharPtr(m_authorizationData.fileEncryptionKey.data()), static_cast<int>(m_authorizationData.fileEncryptionKey.size()) * 8, &key);
AES_data aes_data = prepareAES_data(data);
if (!aes_data.paddedData.isEmpty())
{
decryptedData.resize(aes_data.paddedData.size());
AES_cbc_encrypt(convertByteArrayToUcharPtr(aes_data.paddedData), convertByteArrayToUcharPtr(decryptedData), aes_data.paddedData.length(), &key, convertByteArrayToUcharPtr(aes_data.initializationVector), AES_DECRYPT);
decryptedData = decryptedData.left(data.length() - AES_BLOCK_SIZE);
}
break;
}
case CryptFilterType::Identity: // Don't decrypt anything, use identity function
{
decryptedData = data;
break;
}
}
return decryptedData;
}
QByteArray PDFStandardSecurityHandler::decrypt(const QByteArray& data, PDFObjectReference reference, PDFSecurityHandler::EncryptionScope encryptionScope) const
{
CryptFilter filter = m_filterDefault;
switch (encryptionScope)
{
case EncryptionScope::String:
{
if (m_filterStrings.type != CryptFilterType::None)
{
filter = m_filterStrings;
}
break;
}
case EncryptionScope::Stream:
{
if (m_filterStreams.type != CryptFilterType::None)
{
filter = m_filterStreams;
}
break;
}
case EncryptionScope::EmbeddedFile:
{
if (m_filterEmbeddedFiles.type != CryptFilterType::None)
{
filter = m_filterEmbeddedFiles;
}
break;
}
}
return decryptUsingFilter(data, filter, reference);
}
QByteArray PDFStandardSecurityHandler::createFileEncryptionKey(const QByteArray& password) const
{
QByteArray result;

View File

@ -63,6 +63,7 @@ struct CryptFilter
{
CryptFilterType type = CryptFilterType::None;
AuthEvent authEvent = AuthEvent::DocOpen;
int keyLength = 0; ///< Key length in bytes
};
class PDFSecurityHandler;
@ -82,9 +83,44 @@ public:
Cancelled
};
enum class EncryptionScope
{
String,
Stream,
EmbeddedFile
};
/// Retrieve encryption mode (none/standard encryption/custom)
virtual EncryptionMode getMode() const = 0;
/// Performs authentication of the document content access. First, algorithm should check,
/// if empty password allows document access (so, for example, only owner password is provided).
/// If this fails, function \p getPasswordCallback is called to retrieve user entered password.
/// This callback function also has pointer to bool parameter, which sets to false, if user wants
/// to cancel the authentication (and \p Cancelled authentication result is returned) or true,
/// to try provided password.
/// \param getPasswordCallback Callback to get user password
/// \returns Result of authentication
virtual AuthorizationResult authenticate(const std::function<QString(bool*)>& getPasswordCallback) = 0;
/// Decrypts the PDF object. This function works properly only (and only if)
/// \p authenticate function returns user/owner authorization code.
/// \param object Object to be decrypted
/// \param reference Reference of indirect object (some algorithms require to generate key also from reference)
/// \returns Decrypted object
PDFObject decryptObject(const PDFObject& object, PDFObjectReference reference) const;
/// Decrypts the PDF object data. This function works properly only (and only if)
/// \p authenticate function returns user/owner authorization code.
/// \param data Data to be decrypted
/// \param reference Reference of indirect object (some algorithms require to generate key also from reference)
/// \param encryptionScope Scope of the encryption (if it is string/stream/...)
/// \returns Decrypted object data
virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference reference, EncryptionScope encryptionScope) const = 0;
/// Returns true, if metadata are encrypted
virtual bool isMetadataEncrypted() const = 0;
/// Creates a security handler from the object. If object is null, then
/// "None" security handler is created. If error occurs, then exception is thrown.
/// \param encryptionDictionaryObject Encryption dictionary object
@ -96,13 +132,15 @@ protected:
/// PDF specification. Other values are invalid.
int m_V = 0;
/// Length of the key to encrypt/decrypt the document in bits. Only valid
/// for V = 2 or V = 3, otherwise it is invalid.
/// Length of the key to encrypt/decrypt the document in bits.
int m_keyLength = 40;
/// Map containing crypt filters.
std::map<QByteArray, CryptFilter> m_cryptFilters;
/// Default filter
CryptFilter m_filterDefault;
/// Crypt filter for decrypting strings
CryptFilter m_filterStrings;
@ -119,6 +157,8 @@ class PDFNoneSecurityHandler : public PDFSecurityHandler
public:
virtual EncryptionMode getMode() const { return EncryptionMode::None; }
virtual AuthorizationResult authenticate(const std::function<QString(bool*)>&) override { return AuthorizationResult::OwnerAuthorized; }
virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference, EncryptionScope) const override { return data; }
virtual bool isMetadataEncrypted() const override { return true; }
};
/// Specifies the security using standard security handler (see PDF specification
@ -128,6 +168,8 @@ class PDFStandardSecurityHandler : public PDFSecurityHandler
public:
virtual EncryptionMode getMode() const { return EncryptionMode::Standard; }
virtual AuthorizationResult authenticate(const std::function<QString(bool*)>& getPasswordCallback) override;
virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference reference, EncryptionScope encryptionScope) const override;
virtual bool isMetadataEncrypted() const override { return m_encryptMetadata; }
struct AuthorizationData
{
@ -179,6 +221,13 @@ private:
/// Adjusts the password according to the PDF specification
QByteArray adjustPassword(const QString& password);
/// Decrypts data using specified filter. This function can be called only, if authorization was successfull.
/// \param data Data to be decrypted
/// \param filter Filter to be used for decryption
/// \param reference Object reference for key generation
/// \returns Decrypted data
QByteArray decryptUsingFilter(const QByteArray& data, CryptFilter filter, PDFObjectReference reference) const;
/// Returns true, if character with unicode code is non-ascii space character
/// according the RFC 3454, section C.1.2
/// \param unicode Unicode code to be tested

View File

@ -24,6 +24,8 @@
#include <QByteArray>
#include <QDataStream>
#include <vector>
namespace pdf
{
@ -110,6 +112,24 @@ static inline constexpr PDFReal interpolate(PDFReal x, PDFReal x_min, PDFReal x_
return y_min + (x - x_min) * (y_max - y_min) / (x_max - x_min);
}
inline
std::vector<uint8_t> convertByteArrayToVector(const QByteArray& data)
{
return std::vector<uint8_t>(reinterpret_cast<const uint8_t*>(data.constData()), reinterpret_cast<const uint8_t*>(data.constData()) + data.size());
}
inline
const unsigned char* convertByteArrayToUcharPtr(const QByteArray& data)
{
return reinterpret_cast<const unsigned char*>(data.constData());
}
inline
unsigned char* convertByteArrayToUcharPtr(QByteArray& data)
{
return reinterpret_cast<unsigned char*>(data.data());
}
} // namespace pdf
#endif // PDFUTILS_H