PDF4QT/Pdf4QtLib/sources/pdfstreamfilters.cpp

991 lines
31 KiB
C++
Raw Normal View History

2021-04-30 20:12:10 +02:00
// Copyright (C) 2018-2021 Jakub Melka
2018-12-29 18:22:13 +01:00
//
2020-12-20 19:03:58 +01:00
// This file is part of Pdf4Qt.
2018-12-29 18:22:13 +01:00
//
2020-12-20 19:03:58 +01:00
// Pdf4Qt is free software: you can redistribute it and/or modify
2018-12-29 18:22:13 +01:00
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
2021-04-30 20:12:10 +02:00
// with the written consent of the copyright owner, any later version.
2018-12-29 18:22:13 +01:00
//
2020-12-20 19:03:58 +01:00
// Pdf4Qt is distributed in the hope that it will be useful,
2018-12-29 18:22:13 +01:00
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
2020-12-20 19:03:58 +01:00
// along with Pdf4Qt. If not, see <https://www.gnu.org/licenses/>.
2018-12-29 18:22:13 +01:00
#include "pdfstreamfilters.h"
2019-04-29 17:03:19 +02:00
#include "pdfexception.h"
2019-06-28 18:11:05 +02:00
#include "pdfconstants.h"
2018-12-29 18:22:13 +01:00
#include "pdfparser.h"
2019-08-13 15:48:01 +02:00
#include "pdfsecurityhandler.h"
#include "pdfutils.h"
#include <zlib.h>
2018-12-29 18:22:13 +01:00
#include <QtEndian>
namespace pdf
{
2019-08-13 15:48:01 +02:00
QByteArray PDFAsciiHexDecodeFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
2018-12-29 18:22:13 +01:00
{
2019-06-28 18:11:05 +02:00
Q_UNUSED(objectFetcher);
2018-12-29 18:22:13 +01:00
Q_UNUSED(parameters);
2019-08-13 15:48:01 +02:00
Q_UNUSED(securityHandler);
2018-12-29 18:22:13 +01:00
const int indexOfEnd = data.indexOf('>');
const int size = (indexOfEnd == -1) ? data.size() : indexOfEnd;
if (size % 2 == 1)
{
// We must add trailing zero to the buffer
QByteArray temporaryData(data.constData(), size);
temporaryData.push_back('0');
return QByteArray::fromHex(temporaryData);
}
else if (size == data.size())
{
// We do this, because we do not want to allocate unnecessary buffer for this case.
// This case should be common.
return QByteArray::fromHex(data);
}
return QByteArray::fromHex(QByteArray::fromRawData(data.constData(), size));
}
2019-08-13 15:48:01 +02:00
QByteArray PDFAscii85DecodeFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
2018-12-29 18:22:13 +01:00
{
2019-06-28 18:11:05 +02:00
Q_UNUSED(objectFetcher);
2018-12-29 18:22:13 +01:00
Q_UNUSED(parameters);
2019-08-13 15:48:01 +02:00
Q_UNUSED(securityHandler);
2018-12-29 18:22:13 +01:00
const unsigned char* dataBegin = reinterpret_cast<const unsigned char*>(data.constData());
const unsigned char* dataEnd = reinterpret_cast<const unsigned char*>(data.constData() + data.size());
const unsigned char* it = dataBegin;
const constexpr uint32_t STREAM_END = 0xFFFFFFFF;
2021-03-06 18:13:21 +01:00
auto getChar = [&it, dataEnd]() -> uint32_t
2018-12-29 18:22:13 +01:00
{
// Skip whitespace characters
while (it != dataEnd && PDFLexicalAnalyzer::isWhitespace(*it))
{
++it;
}
if (it == dataEnd || (*it == '~'))
{
return STREAM_END;
}
return *it++;
};
QByteArray result;
result.reserve(data.size() * 4 / 5);
while (true)
{
const uint32_t scannedChar = getChar();
if (scannedChar == STREAM_END)
{
break;
}
else if (scannedChar == 'z')
{
result.append(4, static_cast<char>(0));
}
else
{
// Scan all 5 characters, some of then can be equal to STREAM_END constant. We will
// treat all these characters as last character.
std::array<uint32_t, 5> scannedChars;
scannedChars.fill(84);
scannedChars[0] = scannedChar - 33;
int validBytes = 0;
for (auto it = std::next(scannedChars.begin()); it != scannedChars.end(); ++it)
{
uint32_t character = getChar();
if (character == STREAM_END)
{
break;
}
*it = character - 33;
++validBytes;
}
// Decode bytes using 85 base
uint32_t decodedBytesPacked = 0;
for (const uint32_t value : scannedChars)
{
decodedBytesPacked = decodedBytesPacked * 85 + value;
}
// Decode bytes into byte array
std::array<char, 4> decodedBytesUnpacked;
decodedBytesUnpacked.fill(0);
for (auto byteIt = decodedBytesUnpacked.rbegin(); byteIt != decodedBytesUnpacked.rend(); ++byteIt)
{
*byteIt = static_cast<char>(decodedBytesPacked & 0xFF);
decodedBytesPacked = decodedBytesPacked >> 8;
}
Q_ASSERT(validBytes <= decodedBytesUnpacked.size());
for (int i = 0; i < validBytes; ++i)
{
result.push_back(decodedBytesUnpacked[i]);
}
}
}
return result;
}
class PDFLzwStreamDecoder
{
public:
explicit PDFLzwStreamDecoder(const QByteArray& inputByteArray, uint32_t early);
QByteArray decompress();
private:
static constexpr const uint32_t CODE_TABLE_RESET = 256;
static constexpr const uint32_t CODE_END_OF_STREAM = 257;
// Maximal code size is 12 bits. so we can have 2^12 = 4096 items
// in the table (some items are unused, for example 256, 257). We also
// need to initialize items under code 256, because we treat them specially,
// they are not initialized in the decompress.
static constexpr const uint32_t TABLE_SIZE = 4096;
/// Clears the input data table
void clearTable();
/// Returns a newly scanned code
uint32_t getCode();
struct TableItem
{
uint32_t previous = TABLE_SIZE;
char character = 0;
};
std::array<TableItem, TABLE_SIZE> m_table;
std::array<char, TABLE_SIZE> m_sequence;
uint32_t m_nextCode; ///< Next code value (to be written into the table)
uint32_t m_nextBits; ///< Number of bits of the next code
uint32_t m_early; ///< Early (see PDF 1.7 Specification, this constant is 0 or 1, based on the dictionary value)
uint32_t m_inputBuffer; ///< Input buffer, containing bits, which were read from the input byte array
uint32_t m_inputBits; ///< Number of bits in the input buffer.
std::array<char, TABLE_SIZE>::iterator m_currentSequenceEnd;
bool m_first; ///< Are we reading from stream for first time after the reset
char m_newCharacter; ///< New character to be written
int m_position; ///< Position in the input array
const QByteArray& m_inputByteArray;
};
PDFLzwStreamDecoder::PDFLzwStreamDecoder(const QByteArray& inputByteArray, uint32_t early) :
m_table(),
m_sequence(),
m_nextCode(0),
m_nextBits(0),
m_early(early),
m_inputBuffer(0),
m_inputBits(0),
m_currentSequenceEnd(m_sequence.begin()),
m_first(false),
m_newCharacter(0),
m_position(0),
m_inputByteArray(inputByteArray)
{
for (size_t i = 0; i < 256; ++i)
{
m_table[i].character = static_cast<char>(i);
m_table[i].previous = TABLE_SIZE;
}
clearTable();
}
QByteArray PDFLzwStreamDecoder::decompress()
{
QByteArray result;
// Guess output byte array size - assume compress ratio is 2:1
result.reserve(m_inputByteArray.size() * 2);
uint32_t previousCode = TABLE_SIZE;
while (true)
{
const uint32_t code = getCode();
if (code == CODE_END_OF_STREAM)
{
// We are at end of stream
break;
}
else if (code == CODE_TABLE_RESET)
{
// Just reset the table
clearTable();
continue;
}
// Normal operation code
if (code < m_nextCode)
{
m_currentSequenceEnd = m_sequence.begin();
for (uint32_t currentCode = code; currentCode != TABLE_SIZE; currentCode = m_table[currentCode].previous)
{
*m_currentSequenceEnd++ = m_table[currentCode].character;
}
// We must reverse the sequence, because we stored it in the
// linked list, which we traversed from last to first item.
std::reverse(m_sequence.begin(), m_currentSequenceEnd);
}
else if (code == m_nextCode)
{
// We use the buffer from previous run, just add a new
// character to the end.
*m_currentSequenceEnd++ = m_newCharacter;
}
else
{
// Unknown code
throw PDFException(PDFTranslationContext::tr("Invalid code in the LZW stream."));
2018-12-29 18:22:13 +01:00
}
m_newCharacter = m_sequence.front();
if (m_first)
{
m_first = false;
}
else
{
// Add a new word in the dictionary, if we have it
if (m_nextCode < TABLE_SIZE)
{
m_table[m_nextCode].character = m_newCharacter;
m_table[m_nextCode].previous = previousCode;
++m_nextCode;
}
// Change bit size of the code, if it is neccessary
switch (m_nextCode + m_early)
{
case 512:
m_nextBits = 10;
break;
case 1024:
m_nextBits = 11;
break;
case 2048:
m_nextBits = 12;
break;
default:
break;
}
}
previousCode = code;
// Copy the input array to the buffer
std::copy(m_sequence.begin(), m_currentSequenceEnd, std::back_inserter(result));
}
result.shrink_to_fit();
return result;
}
void PDFLzwStreamDecoder::clearTable()
{
// We do not clear the m_table array here. It is for performance reasons, we assume
// the input is correct. We also do not clear the sequence buffer here.
m_nextCode = 258;
m_nextBits = 9;
m_first = true;
m_newCharacter = 0;
}
uint32_t PDFLzwStreamDecoder::getCode()
{
while (m_inputBits < m_nextBits)
{
// Did we reach end of array?
if (m_position == m_inputByteArray.size())
{
return CODE_END_OF_STREAM;
}
m_inputBuffer = (m_inputBuffer << 8) | static_cast<unsigned char>(m_inputByteArray[m_position++]);
m_inputBits += 8;
}
// We must omit bits from left (old ones) and right (newly scanned ones) and
// read just m_nextBits bits. Mask should omit the old ones and shift (m_inputBits - m_nextBits)
// should omit the new ones.
const uint32_t mask = ((1 << m_nextBits) - 1);
const uint32_t code = (m_inputBuffer >> (m_inputBits - m_nextBits)) & mask;
m_inputBits -= m_nextBits;
return code;
}
2019-08-13 15:48:01 +02:00
QByteArray PDFLzwDecodeFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
2018-12-29 18:22:13 +01:00
{
2019-08-13 15:48:01 +02:00
Q_UNUSED(securityHandler);
2018-12-29 18:22:13 +01:00
uint32_t early = 1;
2019-06-28 18:11:05 +02:00
const PDFObject& dereferencedParameters = objectFetcher(parameters);
2018-12-29 18:22:13 +01:00
if (dereferencedParameters.isDictionary())
{
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
2019-06-28 18:11:05 +02:00
PDFInteger predictor = 1;
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
if (predictorObject.isInt())
{
predictor = predictorObject.getInteger();
}
const PDFObject& earlyChangeObject = objectFetcher(dictionary->get("EarlyChange"));
if (earlyChangeObject.isInt())
{
early = earlyChangeObject.getInteger();
}
2018-12-29 18:22:13 +01:00
}
2019-07-01 12:29:57 +02:00
PDFStreamPredictor predictor = PDFStreamPredictor::createPredictor(objectFetcher, parameters);
2018-12-29 18:22:13 +01:00
PDFLzwStreamDecoder decoder(data, early);
2019-07-01 12:29:57 +02:00
return predictor.apply(decoder.decompress());
2018-12-29 18:22:13 +01:00
}
2019-08-13 15:48:01 +02:00
QByteArray PDFFlateDecodeFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
2018-12-29 18:22:13 +01:00
{
2019-08-13 15:48:01 +02:00
Q_UNUSED(securityHandler);
2019-06-28 18:11:05 +02:00
const PDFObject& dereferencedParameters = objectFetcher(parameters);
2019-06-15 14:29:49 +02:00
if (dereferencedParameters.isDictionary())
{
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
2019-06-28 18:11:05 +02:00
PDFInteger predictor = 1;
const PDFObject& predictorObject = objectFetcher(dictionary->get("Predictor"));
if (predictorObject.isInt())
{
predictor = predictorObject.getInteger();
}
2019-06-15 14:29:49 +02:00
}
2018-12-29 18:22:13 +01:00
PDFStreamPredictor predictor = PDFStreamPredictor::createPredictor(objectFetcher, parameters);
return predictor.apply(uncompress(data));
}
2018-12-29 18:22:13 +01:00
2020-06-06 16:30:06 +02:00
QByteArray PDFFlateDecodeFilter::recompress(const QByteArray& data)
{
QByteArray result;
QByteArray decompressedData = uncompress(data);
z_stream stream = { };
stream.next_in = const_cast<Bytef*>(convertByteArrayToUcharPtr(decompressedData));
stream.avail_in = decompressedData.size();
std::array<Bytef, 1024> outputBuffer = { };
int error = deflateInit(&stream, Z_BEST_COMPRESSION);
if (error != Z_OK)
{
throw PDFException(PDFTranslationContext::tr("Failed to initialize flate compression stream."));
}
do
{
stream.next_out = outputBuffer.data();
stream.avail_out = static_cast<uInt>(outputBuffer.size());
error = deflate(&stream, Z_FINISH);
int bytesWritten = int(outputBuffer.size()) - stream.avail_out;
result.append(reinterpret_cast<const char*>(outputBuffer.data()), bytesWritten);
} while (error == Z_OK);
QString errorMessage;
if (stream.msg)
{
errorMessage = QString::fromLatin1(stream.msg);
}
deflateEnd(&stream);
switch (error)
{
case Z_STREAM_END:
break; // No error, normal behaviour
default:
{
if (errorMessage.isEmpty())
{
errorMessage = PDFTranslationContext::tr("zlib code: %1").arg(error);
}
throw PDFException(PDFTranslationContext::tr("Error decompressing by flate method: %1").arg(errorMessage));
}
}
return result;
}
PDFInteger PDFFlateDecodeFilter::getStreamDataLength(const QByteArray& data, PDFInteger offset) const
{
if (offset < 0 || offset >= data.size())
{
return -1;
}
z_stream stream = { };
stream.next_in = const_cast<Bytef*>(convertByteArrayToUcharPtr(data) + offset);
stream.avail_in = data.size() - offset;
std::array<Bytef, 1024> outputBuffer = { };
int error = inflateInit(&stream);
if (error != Z_OK)
{
return -1;
}
do
{
stream.next_out = outputBuffer.data();
stream.avail_out = static_cast<uInt>(outputBuffer.size());
error = inflate(&stream, Z_NO_FLUSH);
} while (error == Z_OK);
PDFInteger dataLength = stream.total_in;
inflateEnd(&stream);
if (error == Z_STREAM_END)
{
return dataLength;
}
return -1;
}
QByteArray PDFFlateDecodeFilter::uncompress(const QByteArray& data)
{
QByteArray result;
2018-12-29 18:22:13 +01:00
z_stream stream = { };
stream.next_in = const_cast<Bytef*>(convertByteArrayToUcharPtr(data));
stream.avail_in = data.size();
2018-12-29 18:22:13 +01:00
std::array<Bytef, 1024> outputBuffer = { };
int error = inflateInit(&stream);
if (error != Z_OK)
{
throw PDFException(PDFTranslationContext::tr("Failed to initialize flate decompression stream."));
}
do
{
stream.next_out = outputBuffer.data();
stream.avail_out = static_cast<uInt>(outputBuffer.size());
error = inflate(&stream, Z_NO_FLUSH);
int bytesWritten = int(outputBuffer.size()) - stream.avail_out;
result.append(reinterpret_cast<const char*>(outputBuffer.data()), bytesWritten);
} while (error == Z_OK);
QString errorMessage;
if (stream.msg)
{
errorMessage = QString::fromLatin1(stream.msg);
}
inflateEnd(&stream);
switch (error)
{
case Z_STREAM_END:
break; // No error, normal behaviour
default:
{
if (errorMessage.isEmpty())
{
errorMessage = PDFTranslationContext::tr("zlib code: %1").arg(error);
}
throw PDFException(PDFTranslationContext::tr("Error decompressing by flate method: %1").arg(errorMessage));
}
}
return result;
2018-12-29 18:22:13 +01:00
}
2019-08-13 15:48:01 +02:00
QByteArray PDFRunLengthDecodeFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
2018-12-29 18:22:13 +01:00
{
2019-06-28 18:11:05 +02:00
Q_UNUSED(objectFetcher);
2018-12-29 18:22:13 +01:00
Q_UNUSED(parameters);
2019-08-13 15:48:01 +02:00
Q_UNUSED(securityHandler);
2018-12-29 18:22:13 +01:00
QByteArray result;
result.reserve(data.size() * 2);
auto itEnd = data.cend();
for (auto it = data.cbegin(); it != itEnd;)
{
const unsigned char current = *it++;
if (current == 128)
{
// End of stream marker
break;
}
else if (current < 128)
{
// Copy n + 1 characters from the input array literally (and advance iterators)
const int count = static_cast<int>(current) + 1;
std::copy(it, std::next(it, count), std::back_inserter(result));
std::advance(it, count);
}
else if (current > 128)
{
// Copy 257 - n copies of single character
const int count = 257 - current;
const char toBeCopied = *it++;
std::fill_n(std::back_inserter(result), count, toBeCopied);
}
}
return result;
}
2019-02-09 18:40:56 +01:00
const PDFStreamFilter* PDFStreamFilterStorage::getFilter(const QByteArray& filterName)
{
const PDFStreamFilterStorage* instance = getInstance();
auto it = instance->m_filters.find(filterName);
if (it != instance->m_filters.cend())
{
return it->second.get();
}
auto itNameDecoded = instance->m_abbreviations.find(filterName);
if (itNameDecoded != instance->m_abbreviations.cend())
{
return getFilter(itNameDecoded->second);
}
return nullptr;
}
2020-06-06 16:30:06 +02:00
PDFStreamFilterStorage::StreamFilters PDFStreamFilterStorage::getStreamFilters(const PDFStream* stream, const PDFObjectFetcher& objectFetcher)
2019-06-28 18:11:05 +02:00
{
2020-06-06 16:30:06 +02:00
StreamFilters result;
2019-06-28 18:11:05 +02:00
const PDFDictionary* dictionary = stream->getDictionary();
// Retrieve filters
PDFObject filters;
if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
{
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILTER));
}
else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
{
filters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
}
// Retrieve filter parameters
PDFObject filterParameters;
if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
{
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
}
else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
{
filterParameters = objectFetcher(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
}
if (filters.isName())
{
2020-06-06 16:30:06 +02:00
result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(filters.getString()));
2019-06-28 18:11:05 +02:00
}
else if (filters.isArray())
{
const PDFArray* filterArray = filters.getArray();
const size_t filterCount = filterArray->getCount();
for (size_t i = 0; i < filterCount; ++i)
{
const PDFObject& object = objectFetcher(filterArray->getItem(i));
if (object.isName())
{
2020-06-06 16:30:06 +02:00
result.filterObjects.push_back(PDFStreamFilterStorage::getFilter(object.getString()));
2019-06-28 18:11:05 +02:00
}
else
{
2020-06-06 16:30:06 +02:00
result.valid = false;
return result;
2019-06-28 18:11:05 +02:00
}
}
}
else if (!filters.isNull())
{
2020-06-06 16:30:06 +02:00
result.valid = false;
return result;
2019-06-28 18:11:05 +02:00
}
if (filterParameters.isArray())
{
const PDFArray* filterParameterArray = filterParameters.getArray();
const size_t filterParameterCount = filterParameterArray->getCount();
for (size_t i = 0; i < filterParameterCount; ++i)
{
const PDFObject& object = objectFetcher(filterParameterArray->getItem(i));
2020-06-06 16:30:06 +02:00
result.filterParameterObjects.push_back(object);
2019-06-28 18:11:05 +02:00
}
}
else
{
2020-06-06 16:30:06 +02:00
result.filterParameterObjects.push_back(filterParameters);
2019-06-28 18:11:05 +02:00
}
2020-06-06 16:30:06 +02:00
result.filterParameterObjects.resize(result.filterObjects.size());
std::reverse(result.filterObjects.begin(), result.filterObjects.end());
std::reverse(result.filterParameterObjects.begin(), result.filterParameterObjects.end());
return result;
}
2019-06-28 18:11:05 +02:00
2020-06-06 16:30:06 +02:00
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFObjectFetcher& objectFetcher, const PDFSecurityHandler* securityHandler)
{
StreamFilters streamFilters = getStreamFilters(stream, objectFetcher);
2019-06-28 18:11:05 +02:00
QByteArray result = *stream->getContent();
2020-06-06 16:30:06 +02:00
if (!streamFilters.valid)
{
// Stream filters are invalid
return QByteArray();
}
for (size_t i = 0, count = streamFilters.filterObjects.size(); i < count; ++i)
2019-06-28 18:11:05 +02:00
{
2020-06-06 16:30:06 +02:00
const PDFStreamFilter* streamFilter = streamFilters.filterObjects[i];
const PDFObject& streamFilterParameters = streamFilters.filterParameterObjects[i];
2019-06-28 18:11:05 +02:00
if (streamFilter)
{
2019-08-13 15:48:01 +02:00
result = streamFilter->apply(result, objectFetcher, streamFilterParameters, securityHandler);
2019-06-28 18:11:05 +02:00
}
}
return result;
}
2019-08-13 15:48:01 +02:00
QByteArray PDFStreamFilterStorage::getDecodedStream(const PDFStream* stream, const PDFSecurityHandler* securityHandler)
2019-06-28 18:11:05 +02:00
{
2019-08-13 15:48:01 +02:00
return getDecodedStream(stream, [](const PDFObject& object) -> const PDFObject& { return object; }, securityHandler);
2019-06-28 18:11:05 +02:00
}
PDFInteger PDFStreamFilterStorage::getStreamDataLength(const QByteArray& data, const QByteArray& filterName, PDFInteger offset)
{
if (const PDFStreamFilter* filter = getFilter(filterName))
{
return filter->getStreamDataLength(data, offset);
}
return -1;
}
2019-02-09 18:40:56 +01:00
PDFStreamFilterStorage::PDFStreamFilterStorage()
{
// Initialize map with the filters
m_filters["ASCIIHexDecode"] = std::make_unique<PDFAsciiHexDecodeFilter>();
m_filters["ASCII85Decode"] = std::make_unique<PDFAscii85DecodeFilter>();
m_filters["LZWDecode"] = std::make_unique<PDFLzwDecodeFilter>();
m_filters["FlateDecode"] = std::make_unique<PDFFlateDecodeFilter>();
m_filters["RunLengthDecode"] = std::make_unique<PDFRunLengthDecodeFilter>();
2019-08-13 15:48:01 +02:00
m_filters["Crypt"] = std::make_unique<PDFCryptFilter>();
2019-02-09 18:40:56 +01:00
m_abbreviations["AHx"] = "ASCIIHexDecode";
m_abbreviations["A85"] = "ASCII85Decode";
m_abbreviations["LZW"] = "LZWDecode";
m_abbreviations["Fl"] = "FlateDecode";
m_abbreviations["RL"] = "RunLengthDecode";
m_abbreviations["CCF"] = "CCITFaxDecode";
m_abbreviations["DCT"] = "DCTDecode";
}
const PDFStreamFilterStorage* PDFStreamFilterStorage::getInstance()
{
static PDFStreamFilterStorage instance;
return &instance;
}
2019-07-01 12:29:57 +02:00
PDFStreamPredictor PDFStreamPredictor::createPredictor(const PDFObjectFetcher& objectFetcher, const PDFObject& parameters)
{
const PDFObject& dereferencedParameters = objectFetcher(parameters);
if (dereferencedParameters.isDictionary())
{
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
auto getInteger = [dictionary, &objectFetcher](const char* key, int min, int max, int defaultValue) -> int
{
const PDFObject& object = objectFetcher(dictionary->get(key));
if (object.isInt())
{
PDFInteger value = object.getInteger();
if (value < min || value > max)
{
throw PDFException(PDFTranslationContext::tr("Property '%1' should be in range from %2 to %3.").arg(QString::fromLatin1(key)).arg(min).arg(max));
2019-07-01 12:29:57 +02:00
}
return value;
}
else if (object.isNull())
{
return defaultValue;
}
throw PDFException(PDFTranslationContext::tr("Invalid property '%1' of the stream predictor parameters.").arg(QString::fromLatin1(key)));
2019-07-01 12:29:57 +02:00
return 0;
};
int predictor = getInteger("Predictor", 1, 15, 1);
int components = getInteger("Colors", 1, PDF_MAX_COLOR_COMPONENTS, 1);
int bitsPerComponent = getInteger("BitsPerComponent", 1, 16, 8);
int columns = getInteger("Columns", 1, std::numeric_limits<int>::max(), 1);
return PDFStreamPredictor(static_cast<Predictor>(predictor), components, bitsPerComponent, columns);
}
return PDFStreamPredictor();
}
QByteArray PDFStreamPredictor::apply(const QByteArray& data) const
{
switch (m_predictor)
{
case NoPredictor:
return data;
case TIFF:
return applyTIFFPredictor(data);
default:
{
if (m_predictor >= 10)
{
return applyPNGPredictor(data);
}
break;
}
}
throw PDFException(PDFTranslationContext::tr("Invalid predictor algorithm."));
2019-07-01 12:29:57 +02:00
return QByteArray();
}
QByteArray PDFStreamPredictor::applyPNGPredictor(const QByteArray& data) const
{
QByteArray outputData;
outputData.reserve(data.size());
auto it = data.cbegin();
auto itEnd = data.cend();
int pixelBytes = (m_components * m_bitsPerComponent + 7) / 8;
auto readByte = [&it, &itEnd]() -> uint8_t
{
if (it != itEnd)
{
return static_cast<uint8_t>(*it++);
}
// According to the PDF specification, incomplete line is completed. For this
// reason, we behave as we have zero data in the buffer.
return 0;
};
// Idea: to avoid using if for many cases, we use larger buffer filled with zeros
const int totalBytes = m_stride + pixelBytes;
std::vector<uint8_t> line(totalBytes, 0);
std::vector<uint8_t> lineOld(totalBytes, 0);
Predictor currentPredictor = m_predictor;
while (it != itEnd)
{
// First, read the predictor data for current line
currentPredictor = static_cast<Predictor>(readByte() + 10);
for (int i = 0; i < m_stride; ++i)
{
uint8_t currentByte = readByte();
int lineIndex = i + pixelBytes;
switch (currentPredictor)
{
case PNG_Sub:
{
line[lineIndex] = line[i] + currentByte;
break;
}
case PNG_Up:
{
line[lineIndex] = lineOld[lineIndex] + currentByte;
break;
}
case PNG_Average:
{
line[lineIndex] = (lineOld[lineIndex] + line[i]) / 2 + currentByte;
break;
}
case PNG_Paeth:
{
// a = left,
// b = upper,
// c = upper left
const int a = line[i];
const int b = lineOld[lineIndex];
const int c = lineOld[i];
const int p = a + b - c;
const int pa = std::abs(p - a);
const int pb = std::abs(p - b);
const int pc = std::abs(p - c);
if (pa <= pb && pa <= pc)
{
line[lineIndex] = a + currentByte;
}
else if (pb <= pc)
{
line[lineIndex] = b + currentByte;
}
else
{
line[lineIndex] = c + currentByte;
}
break;
}
case PNG_None:
default:
{
line[lineIndex] = currentByte;
break;
}
}
// Fill the output buffer
outputData.push_back(static_cast<const char>(line[lineIndex]));
}
// Swap the buffers
std::swap(line, lineOld);
}
return outputData;
}
QByteArray PDFStreamPredictor::applyTIFFPredictor(const QByteArray& data) const
{
Q_UNUSED(data);
2019-10-05 17:38:15 +02:00
PDFBitWriter writer(m_bitsPerComponent);
PDFBitReader reader(&data, m_bitsPerComponent);
writer.reserve(data.size());
std::vector<uint32_t> leftValues(m_components, 0);
while (!reader.isAtEnd())
{
for (int i = 0; i < m_columns; ++i)
{
for (int componentIndex = 0; componentIndex < m_components; ++componentIndex)
{
leftValues[componentIndex] = (leftValues[componentIndex] + reader.read()) & reader.max();
writer.write(leftValues[componentIndex]);
}
}
std::fill(leftValues.begin(), leftValues.end(), 0);
reader.alignToBytes();
writer.finishLine();
}
return writer.takeByteArray();
2019-07-01 12:29:57 +02:00
}
2019-08-13 15:48:01 +02:00
QByteArray PDFCryptFilter::apply(const QByteArray& data,
const PDFObjectFetcher& objectFetcher,
const PDFObject& parameters,
const PDFSecurityHandler* securityHandler) const
{
if (!securityHandler)
{
throw PDFException(PDFTranslationContext::tr("Security handler required, but not provided."));
2019-08-13 15:48:01 +02:00
}
PDFObjectReference objectReference;
QByteArray cryptFilterName = PDFSecurityHandler::IDENTITY_FILTER_NAME;
const PDFObject& dereferencedParameters = objectFetcher(parameters);
if (dereferencedParameters.isDictionary())
{
const PDFDictionary* dictionary = dereferencedParameters.getDictionary();
const PDFObject& cryptFilterNameObject = objectFetcher(dictionary->get("Name"));
if (cryptFilterNameObject.isName())
{
cryptFilterName = cryptFilterNameObject.getString();
}
const PDFObject& objectReferenceObject = dictionary->get(PDFSecurityHandler::OBJECT_REFERENCE_DICTIONARY_NAME);
if (objectReferenceObject.isReference())
{
objectReference = objectReferenceObject.getReference();
}
}
return securityHandler->decryptByFilter(data, cryptFilterName, objectReference);
}
PDFInteger PDFStreamFilter::getStreamDataLength(const QByteArray& data, PDFInteger offset) const
{
Q_UNUSED(data);
Q_UNUSED(offset);
return -1;
}
2018-12-29 18:22:13 +01:00
} // namespace pdf