diff --git a/PdfForQtLib/sources/pdfdocumentreader.cpp b/PdfForQtLib/sources/pdfdocumentreader.cpp index 4869862..187f7a8 100644 --- a/PdfForQtLib/sources/pdfdocumentreader.cpp +++ b/PdfForQtLib/sources/pdfdocumentreader.cpp @@ -268,6 +268,12 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) // Now, we are ready to scan all objects std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), processEntry); + if (m_result != Result::OK) + { + // Do not proceed further, if document loading failed + return PDFDocument(); + } + // ------------------------------------------------------------------------------------------ // SECURITY - handle encrypted documents // ------------------------------------------------------------------------------------------ @@ -304,9 +310,11 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) } } + PDFObjectReference encryptObjectReference; PDFObject encryptObject = trailerDictionary->get("Encrypt"); if (encryptObject.isReference()) { + encryptObjectReference = encryptObject.getReference(); PDFObjectReference encryptObjectReference = encryptObject.getReference(); if (static_cast(encryptObjectReference.objectNumber) < objects.size() && objects[encryptObjectReference.objectNumber].generation == encryptObjectReference.generation) { @@ -330,6 +338,33 @@ PDFDocument PDFDocumentReader::readFromBuffer(const QByteArray& buffer) throw PDFParserException(PDFTranslationContext::tr("Authorization failed. Bad password provided.")); } + // Now, decrypt the document, if we are authorized. We must also check, if we have to decrypt the object. + // According to the PDF specification, following items are ommited from encryption: + // 1) Values for ID entry in the trailer dictionary + // 2) Any strings in Encrypt dictionary + // 3) String/streams in object streams (entire object streams are encrypted) + // 4) Hexadecimal strings in Content key in signature dictionary + // + // Trailer dictionary is not decrypted, because PDF specification provides no algorithm to decrypt it, + // because it needs object number and generation for generating the decrypt key. So 1) is handled + // automatically. 2) is handled in the code below. 3) is handled also automatically, because we do not + // decipher object streams here. 4) must be handled in the security handler. + if (securityHandler->getMode() != EncryptionMode::None) + { + auto decryptEntry = [encryptObjectReference, &securityHandler, &objects](const PDFXRefTable::Entry& entry) + { + if (encryptObjectReference.objectNumber != 0 && encryptObjectReference == entry.reference) + { + // 2) - Encrypt dictionary + return; + } + + objects[entry.reference.objectNumber].object = securityHandler->decryptObject(objects[entry.reference.objectNumber].object, entry.reference); + }; + + std::for_each(std::execution::parallel_policy(), occupiedEntries.cbegin(), occupiedEntries.cend(), decryptEntry); + } + // ------------------------------------------------------------------------------------------ // SECURITY - security handler created // ------------------------------------------------------------------------------------------ diff --git a/PdfForQtLib/sources/pdfobject.h b/PdfForQtLib/sources/pdfobject.h index 86adcab..c6ef347 100644 --- a/PdfForQtLib/sources/pdfobject.h +++ b/PdfForQtLib/sources/pdfobject.h @@ -197,6 +197,7 @@ class PDFArray : public PDFObjectContent { public: inline constexpr PDFArray() = default; + inline PDFArray(std::vector&& objects) : m_objects(qMove(objects)) { } virtual ~PDFArray() override = default; virtual bool equals(const PDFObjectContent* other) const override; @@ -227,11 +228,11 @@ private: /// map, because dictionaries are usually small. class PDFDictionary : public PDFObjectContent { -private: +public: using DictionaryEntry = std::pair; -public: inline constexpr PDFDictionary() = default; + inline PDFDictionary(std::vector&& dictionary) : m_dictionary(qMove(dictionary)) { } virtual ~PDFDictionary() override = default; virtual bool equals(const PDFObjectContent* other) const override; diff --git a/PdfForQtLib/sources/pdfsecurityhandler.cpp b/PdfForQtLib/sources/pdfsecurityhandler.cpp index c8da405..31b8d30 100644 --- a/PdfForQtLib/sources/pdfsecurityhandler.cpp +++ b/PdfForQtLib/sources/pdfsecurityhandler.cpp @@ -18,6 +18,8 @@ #include "pdfsecurityhandler.h" #include "pdfexception.h" #include "pdfencoding.h" +#include "pdfvisitor.h" +#include "pdfutils.h" #include #include @@ -37,6 +39,153 @@ static constexpr std::array PDFPasswordPadding = { 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A }; +class PDFDecryptObjectVisitor : public PDFAbstractVisitor +{ +public: + explicit PDFDecryptObjectVisitor(const PDFSecurityHandler* securityHandler, PDFObjectReference reference) : + m_securityHandler(securityHandler), + m_reference(reference) + { + m_objectStack.reserve(32); + } + + virtual void visitNull() override; + virtual void visitBool(bool value) override; + virtual void visitInt(PDFInteger value) override; + virtual void visitReal(PDFReal value) override; + virtual void visitString(const PDFString* string) override; + virtual void visitName(const PDFString* name) override; + virtual void visitArray(const PDFArray* array) override; + virtual void visitDictionary(const PDFDictionary* dictionary) override; + virtual void visitStream(const PDFStream* stream) override; + virtual void visitReference(const PDFObjectReference reference) override; + + PDFObject getDecryptedObject(); + +private: + const PDFSecurityHandler* m_securityHandler; + std::vector m_objectStack; + PDFObjectReference m_reference; +}; + + +void PDFDecryptObjectVisitor::visitNull() +{ + m_objectStack.push_back(PDFObject::createNull()); +} + +void PDFDecryptObjectVisitor::visitBool(bool value) +{ + m_objectStack.push_back(PDFObject::createBool(value)); +} + +void PDFDecryptObjectVisitor::visitInt(PDFInteger value) +{ + m_objectStack.push_back(PDFObject::createInteger(value)); +} + +void PDFDecryptObjectVisitor::visitReal(PDFReal value) +{ + m_objectStack.push_back(PDFObject::createReal(value)); +} + +void PDFDecryptObjectVisitor::visitString(const PDFString* string) +{ + m_objectStack.push_back(PDFObject::createString(std::make_shared(m_securityHandler->decrypt(string->getString(), m_reference, PDFSecurityHandler::EncryptionScope::String)))); +} + +void PDFDecryptObjectVisitor::visitName(const PDFString* name) +{ + m_objectStack.push_back(PDFObject::createName(std::make_shared(*name))); +} + +void PDFDecryptObjectVisitor::visitArray(const PDFArray* array) +{ + acceptArray(array); + + // We have all objects on the stack + Q_ASSERT(array->getCount() <= m_objectStack.size()); + + auto it = std::next(m_objectStack.cbegin(), m_objectStack.size() - array->getCount()); + std::vector objects(it, m_objectStack.cend()); + PDFObject object = PDFObject::createArray(std::make_shared(qMove(objects))); + m_objectStack.erase(it, m_objectStack.cend()); + m_objectStack.push_back(object); +} + +void PDFDecryptObjectVisitor::visitDictionary(const PDFDictionary* dictionary) +{ + Q_ASSERT(dictionary); + + // We must check, if it is or isn't a signature dictionary. If it is, + // then don't decrypt the Content value. We also don't check, if signature + // isn't indirectly referenced by reference. Hope it isn't... + const PDFObject& typeObject = dictionary->get("Type"); + bool isSignatureObject = (typeObject.isName() && typeObject.getString() == "Sig"); + + std::vector entries; + entries.reserve(dictionary->getCount()); + + for (size_t i = 0, count = dictionary->getCount(); i < count; ++i) + { + if (isSignatureObject && dictionary->getKey(i) == "Contents") + { + entries.emplace_back(dictionary->getKey(i), dictionary->getValue(i)); + } + else + { + dictionary->getValue(i).accept(this); + entries.emplace_back(dictionary->getKey(i), m_objectStack.back()); + m_objectStack.pop_back(); + } + } + + m_objectStack.push_back(PDFObject::createDictionary(std::make_shared(qMove(entries)))); +} + +void PDFDecryptObjectVisitor::visitStream(const PDFStream* stream) +{ + // Don't decrypt, if it is a Metadata stream and Metadata encryption is turned off + const PDFDictionary* dictionary = stream->getDictionary(); + + const PDFObject& typeObject = dictionary->get("Type"); + bool isMetadata = (typeObject.isName() && typeObject.getString() == "Metadata"); + + if (isMetadata && !m_securityHandler->isMetadataEncrypted()) + { + m_objectStack.push_back(PDFObject::createStream(std::make_shared(PDFDictionary(*dictionary), QByteArray(*stream->getContent())))); + return; + } + + // Decrypt the dictionary + visitDictionary(dictionary); + PDFObject dictionaryObject = m_objectStack.back(); + m_objectStack.pop_back(); + + // TODO: Handle Crypt filter + PDFDictionary decryptedDictionary(*dictionaryObject.getDictionary()); + QByteArray decryptedData = m_securityHandler->decrypt(*stream->getContent(), m_reference, PDFSecurityHandler::EncryptionScope::Stream); + m_objectStack.push_back(PDFObject::createStream(std::make_shared(qMove(decryptedDictionary), qMove(decryptedData)))); +} + +void PDFDecryptObjectVisitor::visitReference(const PDFObjectReference reference) +{ + m_objectStack.push_back(PDFObject::createReference(reference)); +} + +PDFObject PDFDecryptObjectVisitor::getDecryptedObject() +{ + Q_ASSERT(m_objectStack.size() == 1); + return qMove(m_objectStack.back()); +} + +PDFObject PDFSecurityHandler::decryptObject(const PDFObject& object, PDFObjectReference reference) const +{ + PDFDecryptObjectVisitor visitor(this, reference); + object.accept(&visitor); + return visitor.getDecryptedObject(); +} + PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObject& encryptionDictionaryObject, const QByteArray& id) { if (encryptionDictionaryObject.isNull()) @@ -145,7 +294,7 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj const PDFObject& cryptFilterObjects = dictionary->get("CF"); if (cryptFilterObjects.isDictionary()) { - auto parseCryptFilter = [&getName](const PDFObject& object) -> CryptFilter + auto parseCryptFilter = [Length, &getName, &getInt](const PDFObject& object) -> CryptFilter { if (!object.isDictionary()) { @@ -191,6 +340,8 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj throw PDFParserException(PDFTranslationContext::tr("Unsupported authorization event '%1'.").arg(QString::fromLatin1(authEventName))); } + filter.keyLength = getInt(cryptFilterDictionary, "Length", false, Length / 8); + return filter; }; @@ -236,6 +387,10 @@ PDFSecurityHandlerPointer PDFSecurityHandler::createSecurityHandler(const PDFObj } handler.m_R = R; + handler.m_filterDefault.authEvent = AuthEvent::DocOpen; + handler.m_filterDefault.keyLength = Length / 8; + handler.m_filterDefault.type = (R > 4) ? CryptFilterType::AESV3 : CryptFilterType::V2; + auto readByteArray = [dictionary](const char* key, int size) { QByteArray result; @@ -427,6 +582,169 @@ PDFSecurityHandler::AuthorizationResult PDFStandardSecurityHandler::authenticate return AuthorizationResult::Cancelled; } +QByteArray PDFStandardSecurityHandler::decryptUsingFilter(const QByteArray& data, CryptFilter filter, PDFObjectReference reference) const +{ + QByteArray decryptedData; + + Q_ASSERT(m_authorizationData.isAuthorized()); + + struct AES_data + { + QByteArray initializationVector; + QByteArray paddedData; + }; + + auto prepareAES_data = [](const QByteArray& data) + { + AES_data result; + + result.initializationVector = data.left(AES_BLOCK_SIZE); + + // This is an error. But to handle it, we resize the vector + // with arbitrary data. + if (result.initializationVector.size() < AES_BLOCK_SIZE) + { + result.initializationVector.resize(AES_BLOCK_SIZE); + } + + result.paddedData = data.mid(AES_BLOCK_SIZE); + + // Add padding remainder according to the specification + int size = result.paddedData.size(); + int paddingRemainder = AES_BLOCK_SIZE - (size % AES_BLOCK_SIZE); + + for (int i = 0; i < paddingRemainder; ++i) + { + result.paddedData.push_back(paddingRemainder); + } + + return result; + }; + + switch (filter.type) + { + case CryptFilterType::None: // The application shall decrypt the data using the security handler + { + // This shouldn't occur, because in case the used filter has None value, then default filter + // is used and default filter can't have this value. + Q_ASSERT(false); + break; + } + + case CryptFilterType::V2: // Use file encryption key for RC4 algorithm + { + std::vector inputKeyData = convertByteArrayToVector(m_authorizationData.fileEncryptionKey); + uint32_t objectNumber = qToLittleEndian(static_cast(reference.objectNumber)); + uint32_t generation = qToLittleEndian(static_cast(reference.generation)); + inputKeyData.insert(inputKeyData.cend(), { uint8_t(objectNumber & 0xFF), uint8_t((objectNumber >> 8) && 0xFF), uint8_t((objectNumber >> 16) && 0xFF), uint8_t(generation & 0xFF), uint8_t((generation >> 8) && 0xFF), }); + std::vector objectEncryptionKey(MD5_DIGEST_LENGTH, uint8_t(0)); + MD5(inputKeyData.data(), inputKeyData.size(), objectEncryptionKey.data()); + + // Use up to (n + 5) bytes, maximally 16, from the digest as object encryption key + size_t objectEncryptionKeySize = qMin(filter.keyLength + 5, MD5_DIGEST_LENGTH); + objectEncryptionKey.resize(objectEncryptionKeySize); + + decryptedData.resize(data.size()); + + RC4_KEY key = { }; + RC4_set_key(&key, static_cast(objectEncryptionKey.size()), objectEncryptionKey.data()); + RC4(&key, data.size(), convertByteArrayToUcharPtr(data), convertByteArrayToUcharPtr(decryptedData)); + + break; + } + + case CryptFilterType::AESV2: // Use file encryption key for AES algorithm + { + std::vector inputKeyData = convertByteArrayToVector(m_authorizationData.fileEncryptionKey); + uint32_t objectNumber = qToLittleEndian(static_cast(reference.objectNumber)); + uint32_t generation = qToLittleEndian(static_cast(reference.generation)); + inputKeyData.insert(inputKeyData.cend(), { uint8_t(objectNumber & 0xFF), uint8_t((objectNumber >> 8) && 0xFF), uint8_t((objectNumber >> 16) && 0xFF), uint8_t(generation & 0xFF), uint8_t((generation >> 8) && 0xFF), 0x73, 0x41, 0x6C, 0x54 }); + std::vector objectEncryptionKey(MD5_DIGEST_LENGTH, uint8_t(0)); + MD5(inputKeyData.data(), inputKeyData.size(), objectEncryptionKey.data()); + + // For AES algorithm, always use 16 bytes key (128 bit encryption mode) + + AES_KEY key = { }; + AES_set_decrypt_key(objectEncryptionKey.data(), static_cast(objectEncryptionKey.size()) * 8, &key); + + AES_data aes_data = prepareAES_data(data); + if (!aes_data.paddedData.isEmpty()) + { + decryptedData.resize(aes_data.paddedData.size()); + AES_cbc_encrypt(convertByteArrayToUcharPtr(aes_data.paddedData), convertByteArrayToUcharPtr(decryptedData), aes_data.paddedData.length(), &key, convertByteArrayToUcharPtr(aes_data.initializationVector), AES_DECRYPT); + decryptedData = decryptedData.left(data.length() - AES_BLOCK_SIZE); + } + + break; + } + + case CryptFilterType::AESV3: // Use file encryption key for AES 256 bit algorithm + { + Q_ASSERT(m_authorizationData.fileEncryptionKey.size() == 32); + AES_KEY key = { }; + AES_set_decrypt_key(convertByteArrayToUcharPtr(m_authorizationData.fileEncryptionKey.data()), static_cast(m_authorizationData.fileEncryptionKey.size()) * 8, &key); + + AES_data aes_data = prepareAES_data(data); + if (!aes_data.paddedData.isEmpty()) + { + decryptedData.resize(aes_data.paddedData.size()); + AES_cbc_encrypt(convertByteArrayToUcharPtr(aes_data.paddedData), convertByteArrayToUcharPtr(decryptedData), aes_data.paddedData.length(), &key, convertByteArrayToUcharPtr(aes_data.initializationVector), AES_DECRYPT); + decryptedData = decryptedData.left(data.length() - AES_BLOCK_SIZE); + } + + break; + } + + case CryptFilterType::Identity: // Don't decrypt anything, use identity function + { + decryptedData = data; + break; + } + } + + return decryptedData; +} + +QByteArray PDFStandardSecurityHandler::decrypt(const QByteArray& data, PDFObjectReference reference, PDFSecurityHandler::EncryptionScope encryptionScope) const +{ + CryptFilter filter = m_filterDefault; + + switch (encryptionScope) + { + case EncryptionScope::String: + { + if (m_filterStrings.type != CryptFilterType::None) + { + filter = m_filterStrings; + } + + break; + } + + case EncryptionScope::Stream: + { + if (m_filterStreams.type != CryptFilterType::None) + { + filter = m_filterStreams; + } + + break; + } + + case EncryptionScope::EmbeddedFile: + { + if (m_filterEmbeddedFiles.type != CryptFilterType::None) + { + filter = m_filterEmbeddedFiles; + } + + break; + } + } + + return decryptUsingFilter(data, filter, reference); +} + QByteArray PDFStandardSecurityHandler::createFileEncryptionKey(const QByteArray& password) const { QByteArray result; diff --git a/PdfForQtLib/sources/pdfsecurityhandler.h b/PdfForQtLib/sources/pdfsecurityhandler.h index fd8167b..53a4309 100644 --- a/PdfForQtLib/sources/pdfsecurityhandler.h +++ b/PdfForQtLib/sources/pdfsecurityhandler.h @@ -63,6 +63,7 @@ struct CryptFilter { CryptFilterType type = CryptFilterType::None; AuthEvent authEvent = AuthEvent::DocOpen; + int keyLength = 0; ///< Key length in bytes }; class PDFSecurityHandler; @@ -82,9 +83,44 @@ public: Cancelled }; + enum class EncryptionScope + { + String, + Stream, + EmbeddedFile + }; + + /// Retrieve encryption mode (none/standard encryption/custom) virtual EncryptionMode getMode() const = 0; + + /// Performs authentication of the document content access. First, algorithm should check, + /// if empty password allows document access (so, for example, only owner password is provided). + /// If this fails, function \p getPasswordCallback is called to retrieve user entered password. + /// This callback function also has pointer to bool parameter, which sets to false, if user wants + /// to cancel the authentication (and \p Cancelled authentication result is returned) or true, + /// to try provided password. + /// \param getPasswordCallback Callback to get user password + /// \returns Result of authentication virtual AuthorizationResult authenticate(const std::function& getPasswordCallback) = 0; + /// Decrypts the PDF object. This function works properly only (and only if) + /// \p authenticate function returns user/owner authorization code. + /// \param object Object to be decrypted + /// \param reference Reference of indirect object (some algorithms require to generate key also from reference) + /// \returns Decrypted object + PDFObject decryptObject(const PDFObject& object, PDFObjectReference reference) const; + + /// Decrypts the PDF object data. This function works properly only (and only if) + /// \p authenticate function returns user/owner authorization code. + /// \param data Data to be decrypted + /// \param reference Reference of indirect object (some algorithms require to generate key also from reference) + /// \param encryptionScope Scope of the encryption (if it is string/stream/...) + /// \returns Decrypted object data + virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference reference, EncryptionScope encryptionScope) const = 0; + + /// Returns true, if metadata are encrypted + virtual bool isMetadataEncrypted() const = 0; + /// Creates a security handler from the object. If object is null, then /// "None" security handler is created. If error occurs, then exception is thrown. /// \param encryptionDictionaryObject Encryption dictionary object @@ -96,13 +132,15 @@ protected: /// PDF specification. Other values are invalid. int m_V = 0; - /// Length of the key to encrypt/decrypt the document in bits. Only valid - /// for V = 2 or V = 3, otherwise it is invalid. + /// Length of the key to encrypt/decrypt the document in bits. int m_keyLength = 40; /// Map containing crypt filters. std::map m_cryptFilters; + /// Default filter + CryptFilter m_filterDefault; + /// Crypt filter for decrypting strings CryptFilter m_filterStrings; @@ -119,6 +157,8 @@ class PDFNoneSecurityHandler : public PDFSecurityHandler public: virtual EncryptionMode getMode() const { return EncryptionMode::None; } virtual AuthorizationResult authenticate(const std::function&) override { return AuthorizationResult::OwnerAuthorized; } + virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference, EncryptionScope) const override { return data; } + virtual bool isMetadataEncrypted() const override { return true; } }; /// Specifies the security using standard security handler (see PDF specification @@ -128,6 +168,8 @@ class PDFStandardSecurityHandler : public PDFSecurityHandler public: virtual EncryptionMode getMode() const { return EncryptionMode::Standard; } virtual AuthorizationResult authenticate(const std::function& getPasswordCallback) override; + virtual QByteArray decrypt(const QByteArray& data, PDFObjectReference reference, EncryptionScope encryptionScope) const override; + virtual bool isMetadataEncrypted() const override { return m_encryptMetadata; } struct AuthorizationData { @@ -179,6 +221,13 @@ private: /// Adjusts the password according to the PDF specification QByteArray adjustPassword(const QString& password); + /// Decrypts data using specified filter. This function can be called only, if authorization was successfull. + /// \param data Data to be decrypted + /// \param filter Filter to be used for decryption + /// \param reference Object reference for key generation + /// \returns Decrypted data + QByteArray decryptUsingFilter(const QByteArray& data, CryptFilter filter, PDFObjectReference reference) const; + /// Returns true, if character with unicode code is non-ascii space character /// according the RFC 3454, section C.1.2 /// \param unicode Unicode code to be tested diff --git a/PdfForQtLib/sources/pdfutils.h b/PdfForQtLib/sources/pdfutils.h index 9955a1b..8a9a31a 100644 --- a/PdfForQtLib/sources/pdfutils.h +++ b/PdfForQtLib/sources/pdfutils.h @@ -24,6 +24,8 @@ #include #include +#include + namespace pdf { @@ -110,6 +112,24 @@ static inline constexpr PDFReal interpolate(PDFReal x, PDFReal x_min, PDFReal x_ return y_min + (x - x_min) * (y_max - y_min) / (x_max - x_min); } +inline +std::vector convertByteArrayToVector(const QByteArray& data) +{ + return std::vector(reinterpret_cast(data.constData()), reinterpret_cast(data.constData()) + data.size()); +} + +inline +const unsigned char* convertByteArrayToUcharPtr(const QByteArray& data) +{ + return reinterpret_cast(data.constData()); +} + +inline +unsigned char* convertByteArrayToUcharPtr(QByteArray& data) +{ + return reinterpret_cast(data.data()); +} + } // namespace pdf #endif // PDFUTILS_H