Inplace strings - performance optimization

This commit is contained in:
Jakub Melka 2020-05-29 19:56:10 +02:00
parent 702ee98316
commit 91f1779053
12 changed files with 198 additions and 60 deletions

View File

@ -1104,19 +1104,19 @@ PDFColorSpacePointer PDFICCBasedColorSpace::createICCBasedColorSpace(const PDFDi
{
case 1:
{
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(std::make_shared<PDFString>(std::move(QByteArray(COLOR_SPACE_NAME_DEVICE_GRAY)))), recursion);
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(COLOR_SPACE_NAME_DEVICE_GRAY), recursion);
break;
}
case 3:
{
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(std::make_shared<PDFString>(std::move(QByteArray(COLOR_SPACE_NAME_DEVICE_RGB)))), recursion);
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(COLOR_SPACE_NAME_DEVICE_RGB), recursion);
break;
}
case 4:
{
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(std::make_shared<PDFString>(std::move(QByteArray(COLOR_SPACE_NAME_DEVICE_CMYK)))), recursion);
alternateColorSpace = PDFAbstractColorSpace::createColorSpaceImpl(colorSpaceDictionary, document, PDFObject::createName(COLOR_SPACE_NAME_DEVICE_CMYK), recursion);
break;
}

View File

@ -82,8 +82,8 @@ public:
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(const PDFString* string) override;
virtual void visitName(const PDFString* name) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
@ -116,14 +116,14 @@ void PDFReplaceReferencesVisitor::visitReal(PDFReal value)
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFReplaceReferencesVisitor::visitString(const PDFString* string)
void PDFReplaceReferencesVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(std::make_shared<PDFString>(*string)));
m_objectStack.push_back(PDFObject::createString(string));
}
void PDFReplaceReferencesVisitor::visitName(const PDFString* name)
void PDFReplaceReferencesVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(std::make_shared<PDFString>(*name)));
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFReplaceReferencesVisitor::visitArray(const PDFArray* array)
@ -355,7 +355,7 @@ PDFObjectFactory& PDFObjectFactory::operator<<(AnnotationBorderStyle style)
PDFObjectFactory& PDFObjectFactory::operator<<(const QDateTime& dateTime)
{
addObject(PDFObject::createString(std::make_shared<PDFString>(PDFEncoding::converDateTimeToString(dateTime))));
addObject(PDFObject::createString(PDFEncoding::converDateTimeToString(dateTime)));
return *this;
}
@ -417,7 +417,7 @@ PDFObjectFactory& PDFObjectFactory::operator<<(AnnotationLineEnding lineEnding)
PDFObjectFactory& PDFObjectFactory::operator<<(WrapString string)
{
addObject(PDFObject::createString(std::make_shared<PDFString>(qMove(string.string))));
addObject(PDFObject::createString(qMove(string.string)));
return *this;
}
@ -555,12 +555,12 @@ PDFObject PDFObjectFactory::createTextString(QString textString)
textStream << textString;
}
return PDFObject::createString(std::make_shared<PDFString>(qMove(ba)));
return PDFObject::createString(qMove(ba));
}
else
{
// Use PDF document encoding
return PDFObject::createString(std::make_shared<PDFString>(PDFEncoding::convertToEncoding(textString, PDFEncoding::Encoding::PDFDoc)));
return PDFObject::createString(PDFEncoding::convertToEncoding(textString, PDFEncoding::Encoding::PDFDoc));
}
}
@ -599,7 +599,7 @@ PDFObjectFactory& PDFObjectFactory::operator<<(WrapAnnotationColor color)
PDFObjectFactory& PDFObjectFactory::operator<<(WrapCurrentDateTime)
{
addObject(PDFObject::createString(std::make_shared<PDFString>(PDFEncoding::converDateTimeToString(QDateTime::currentDateTime()))));
addObject(PDFObject::createString(PDFEncoding::converDateTimeToString(QDateTime::currentDateTime())));
return *this;
}
@ -617,7 +617,7 @@ PDFObjectFactory& PDFObjectFactory::operator<<(int value)
PDFObjectFactory& PDFObjectFactory::operator<<(WrapName wrapName)
{
addObject(PDFObject::createName(std::make_shared<PDFString>(qMove(wrapName.name))));
addObject(PDFObject::createName(qMove(wrapName.name)));
return *this;
}

View File

@ -38,8 +38,8 @@ public:
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(const PDFString* string) override;
virtual void visitName(const PDFString* name) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
@ -85,9 +85,9 @@ void PDFWriteObjectVisitor::visitReal(PDFReal value)
m_device->write(" ");
}
void PDFWriteObjectVisitor::visitString(const PDFString* string)
void PDFWriteObjectVisitor::visitString(PDFStringRef string)
{
const QByteArray& data = string->getString();
QByteArray data = string.getString();
if (data.indexOf('(') != -1 ||
data.indexOf(')') != -1 ||
data.indexOf('\\') != -1)
@ -126,9 +126,9 @@ void PDFWriteObjectVisitor::writeName(const QByteArray& string)
m_device->write(" ");
}
void PDFWriteObjectVisitor::visitName(const PDFString* name)
void PDFWriteObjectVisitor::visitName(PDFStringRef name)
{
writeName(name->getString());
writeName(name.getString());
}
void PDFWriteObjectVisitor::visitArray(const PDFArray* array)

View File

@ -2078,7 +2078,7 @@ void PDFFormFieldCheckableButtonEditor::click()
parameters.invokingWidget = m_formWidget.getWidget();
parameters.invokingFormField = m_formWidget.getParent();
parameters.scope = PDFFormField::SetValueParameters::Scope::User;
parameters.value = PDFObject::createName(std::make_shared<PDFString>(qMove(newState)));
parameters.value = PDFObject::createName(qMove(newState));
m_formManager->setFormFieldValue(parameters);
}

View File

@ -1131,7 +1131,7 @@ PDF3DBackground PDF3DBackground::parse(const PDFObjectStorage* storage, PDFObjec
PDFObject colorSpace = dictionary->get("CS");
if (colorSpace.isNull())
{
colorSpace = PDFObject::createName(std::make_shared<PDFString>("DeviceRGB"));
colorSpace = PDFObject::createName("DeviceRGB");
}
std::vector<PDFReal> color = loader.readNumberArrayFromDictionary(dictionary, "C", { 1.0, 1.0, 1.0});
PDFArray array;

View File

@ -15,7 +15,6 @@
// You should have received a copy of the GNU Lesser General Public License
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
#include "pdfobject.h"
#include "pdfvisitor.h"
@ -24,11 +23,10 @@ namespace pdf
QByteArray PDFObject::getString() const
{
const PDFObjectContentPointer& objectContent = std::get<PDFObjectContentPointer>(m_data);
PDFStringRef stringRef = getStringObject();
Q_ASSERT(stringRef.inplaceString || stringRef.memoryString);
Q_ASSERT(dynamic_cast<PDFString*>(objectContent.get()));
const PDFString* string = static_cast<PDFString*>(objectContent.get());
return string->getString();
return stringRef.inplaceString ? stringRef.inplaceString->getString() : stringRef.memoryString->getString();
}
const PDFDictionary* PDFObject::getDictionary() const
@ -39,12 +37,19 @@ const PDFDictionary* PDFObject::getDictionary() const
return static_cast<const PDFDictionary*>(objectContent.get());
}
const PDFString* PDFObject::getStringObject() const
PDFStringRef PDFObject::getStringObject() const
{
const PDFObjectContentPointer& objectContent = std::get<PDFObjectContentPointer>(m_data);
if (std::holds_alternative<PDFInplaceString>(m_data))
{
return { &std::get<PDFInplaceString>(m_data) , nullptr };
}
else
{
const PDFObjectContentPointer& objectContent = std::get<PDFObjectContentPointer>(m_data);
Q_ASSERT(dynamic_cast<const PDFString*>(objectContent.get()));
return static_cast<const PDFString*>(objectContent.get());
Q_ASSERT(dynamic_cast<const PDFString*>(objectContent.get()));
return { nullptr, static_cast<const PDFString*>(objectContent.get()) };
}
}
const PDFStream* PDFObject::getStream() const
@ -135,6 +140,54 @@ void PDFObject::accept(PDFAbstractVisitor* visitor) const
}
}
PDFObject PDFObject::createName(QByteArray name)
{
if (name.size() > PDFInplaceString::MAX_STRING_SIZE)
{
return PDFObject(Type::Name, std::make_shared<PDFString>(qMove(name)));
}
else
{
return PDFObject(Type::Name, PDFInplaceString(qMove(name)));
}
}
PDFObject PDFObject::createString(QByteArray name)
{
if (name.size() > PDFInplaceString::MAX_STRING_SIZE)
{
return PDFObject(Type::String, std::make_shared<PDFString>(qMove(name)));
}
else
{
return PDFObject(Type::String, PDFInplaceString(qMove(name)));
}
}
PDFObject PDFObject::createName(PDFStringRef name)
{
if (name.memoryString)
{
return PDFObject(Type::Name, std::make_shared<PDFString>(name.getString()));
}
else
{
return PDFObject(Type::Name, *name.inplaceString);
}
}
PDFObject PDFObject::createString(PDFStringRef name)
{
if (name.memoryString)
{
return PDFObject(Type::String, std::make_shared<PDFString>(name.getString()));
}
else
{
return PDFObject(Type::String, *name.inplaceString);
}
}
bool PDFString::equals(const PDFObjectContent* other) const
{
Q_ASSERT(dynamic_cast<const PDFString*>(other));
@ -335,4 +388,9 @@ PDFObject PDFObjectManipulator::removeNullObjects(PDFObject object)
return merge(object, object, RemoveNullObjects);
}
QByteArray PDFStringRef::getString() const
{
return inplaceString ? inplaceString->getString() : memoryString->getString();
}
} // namespace pdf

View File

@ -26,6 +26,7 @@
#include <memory>
#include <vector>
#include <variant>
#include <array>
#include <initializer_list>
namespace pdf
@ -52,6 +53,64 @@ public:
virtual void optimize() = 0;
};
/// This class represents inplace string in the PDF object. To avoid too much
/// memory allocation, we store small strings inplace as small objects, so
/// we do not use memory allocator, so this doesn't cause performance downgrade.
/// Very often, PDF document consists of large number of names and strings
/// objects, which will fit into this category.
struct PDFInplaceString
{
static constexpr const int MAX_STRING_SIZE = sizeof(PDFObjectReference) - 1;
constexpr PDFInplaceString() = default;
inline PDFInplaceString(const QByteArray& data)
{
Q_ASSERT(data.size() <= MAX_STRING_SIZE);
size = static_cast<uint8_t>(data.size());
std::copy(data.cbegin(), data.cend(), string.data());
}
inline bool operator==(const PDFInplaceString& other) const
{
if (size != other.size)
{
return false;
}
for (uint8_t i = 0; i < size; ++i)
{
if (string[i] != other.string[i])
{
return false;
}
}
return true;
}
inline bool operator !=(const PDFInplaceString& other) const
{
return !(*this == other);
}
QByteArray getString() const
{
return (size > 0) ? QByteArray(string.data(), size) : QByteArray();
}
uint8_t size = 0;
std::array<char, MAX_STRING_SIZE> string = { };
};
/// Reference to the string implementations
struct PDFStringRef
{
const PDFInplaceString* inplaceString = nullptr;
const PDFString* memoryString = nullptr;
QByteArray getString() const;
};
class PDFFORQTLIBSHARED_EXPORT PDFObject
{
public:
@ -115,7 +174,7 @@ public:
QByteArray getString() const;
const PDFDictionary* getDictionary() const;
PDFObjectReference getReference() const { return std::get<PDFObjectReference>(m_data); }
const PDFString* getStringObject() const;
PDFStringRef getStringObject() const;
const PDFStream* getStream() const;
const PDFArray* getArray() const;
@ -137,15 +196,9 @@ public:
/// Creates an object with real number
static inline PDFObject createReal(PDFReal value) { return PDFObject(Type::Real, value); }
/// Creates a name object
static inline PDFObject createName(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Name, std::move(value)); }
/// Creates a reference object
static inline PDFObject createReference(const PDFObjectReference& reference) { return PDFObject(Type::Reference, reference); }
/// Creates a string object
static inline PDFObject createString(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::String, std::move(value)); }
/// Creates an array object
static inline PDFObject createArray(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Array, std::move(value)); }
@ -155,17 +208,30 @@ public:
/// Creates a stream object
static inline PDFObject createStream(PDFObjectContentPointer&& value) { value->optimize(); return PDFObject(Type::Stream, std::move(value)); }
/// Creates a name object
static PDFObject createName(QByteArray name);
/// Creates a string object
static PDFObject createString(QByteArray name);
/// Creates a name object
static PDFObject createName(PDFStringRef name);
/// Creates a string object
static PDFObject createString(PDFStringRef name);
private:
template<typename T>
constexpr inline PDFObject(Type type, T&& value) :
m_type(type),
m_data(std::forward<T>(value))
m_data(std::forward<T>(value)),
m_type(type)
{
}
std::variant<typename std::monostate, bool, PDFInteger, PDFReal, PDFObjectReference, PDFObjectContentPointer, PDFInplaceString> m_data;
Type m_type;
std::variant<typename std::monostate, bool, PDFInteger, PDFReal, PDFObjectReference, PDFObjectContentPointer> m_data;
};
/// Represents raw string in the PDF file. No conversions are performed, this is

View File

@ -2118,7 +2118,7 @@ void PDFPageContentProcessor::operatorColorSetStrokingColorSpace(PDFPageContentP
return;
}
PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(std::make_shared<PDFString>(QByteArray(name.name))));
PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(name.name));
if (colorSpace)
{
// We must also set default color (it can depend on the color space)
@ -2141,7 +2141,7 @@ void PDFPageContentProcessor::operatorColorSetFillingColorSpace(PDFOperandName n
return;
}
PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(std::make_shared<PDFString>(QByteArray(name.name))));
PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(name.name));
if (colorSpace)
{
// We must also set default color (it can depend on the color space)

View File

@ -738,7 +738,7 @@ PDFObject PDFParser::getObject()
QByteArray array = m_lookAhead1.data.toByteArray();
array.shrink_to_fit();
shift();
return PDFObject::createString(std::make_shared<PDFString>(std::move(array)));
return PDFObject::createString(std::move(array));
}
case PDFLexicalAnalyzer::TokenType::Name:
@ -747,7 +747,7 @@ PDFObject PDFParser::getObject()
QByteArray array = m_lookAhead1.data.toByteArray();
array.shrink_to_fit();
shift();
return PDFObject::createName(std::make_shared<PDFString>(std::move(array)));
return PDFObject::createName(std::move(array));
}
case PDFLexicalAnalyzer::TokenType::ArrayStart:

View File

@ -53,8 +53,8 @@ public:
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(const PDFString* string) override;
virtual void visitName(const PDFString* name) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;
@ -88,14 +88,14 @@ void PDFDecryptObjectVisitor::visitReal(PDFReal value)
m_objectStack.push_back(PDFObject::createReal(value));
}
void PDFDecryptObjectVisitor::visitString(const PDFString* string)
void PDFDecryptObjectVisitor::visitString(PDFStringRef string)
{
m_objectStack.push_back(PDFObject::createString(std::make_shared<PDFString>(m_securityHandler->decrypt(string->getString(), m_reference, PDFSecurityHandler::EncryptionScope::String))));
m_objectStack.push_back(PDFObject::createString(m_securityHandler->decrypt(string.getString(), m_reference, PDFSecurityHandler::EncryptionScope::String)));
}
void PDFDecryptObjectVisitor::visitName(const PDFString* name)
void PDFDecryptObjectVisitor::visitName(PDFStringRef name)
{
m_objectStack.push_back(PDFObject::createName(std::make_shared<PDFString>(*name)));
m_objectStack.push_back(PDFObject::createName(name));
}
void PDFDecryptObjectVisitor::visitArray(const PDFArray* array)

View File

@ -80,16 +80,30 @@ void PDFStatisticsCollector::visitReal(PDFReal value)
collectStatisticsOfSimpleObject(PDFObject::Type::Real);
}
void PDFStatisticsCollector::visitString(const PDFString* string)
void PDFStatisticsCollector::visitString(PDFStringRef string)
{
Statistics& statistics = m_statistics[PDFObject::Type::String];
collectStatisticsOfString(string, statistics);
if (string.inplaceString)
{
collectStatisticsOfSimpleObject(PDFObject::Type::String);
}
else
{
collectStatisticsOfString(string.memoryString, statistics);
}
}
void PDFStatisticsCollector::visitName(const PDFString* name)
void PDFStatisticsCollector::visitName(PDFStringRef name)
{
Statistics& statistics = m_statistics[PDFObject::Type::Name];
collectStatisticsOfString(name, statistics);
if (name.inplaceString)
{
collectStatisticsOfSimpleObject(PDFObject::Type::Name);
}
else
{
collectStatisticsOfString(name.memoryString, statistics);
}
}
void PDFStatisticsCollector::visitArray(const PDFArray* array)

View File

@ -66,8 +66,8 @@ public:
virtual void visitBool(bool value) { Q_UNUSED(value); }
virtual void visitInt(PDFInteger value) { Q_UNUSED(value); }
virtual void visitReal(PDFReal value) { Q_UNUSED(value); }
virtual void visitString(const PDFString* string) { Q_UNUSED(string); }
virtual void visitName(const PDFString* name) { Q_UNUSED(name); }
virtual void visitString(PDFStringRef string) { Q_UNUSED(string); }
virtual void visitName(PDFStringRef name) { Q_UNUSED(name); }
virtual void visitArray(const PDFArray* array) { Q_UNUSED(array); }
virtual void visitDictionary(const PDFDictionary* dictionary) { Q_UNUSED(dictionary); }
virtual void visitStream(const PDFStream* stream) { Q_UNUSED(stream); }
@ -115,8 +115,8 @@ public:
virtual void visitBool(bool value) override;
virtual void visitInt(PDFInteger value) override;
virtual void visitReal(PDFReal value) override;
virtual void visitString(const PDFString* string) override;
virtual void visitName(const PDFString* name) override;
virtual void visitString(PDFStringRef string) override;
virtual void visitName(PDFStringRef name) override;
virtual void visitArray(const PDFArray* array) override;
virtual void visitDictionary(const PDFDictionary* dictionary) override;
virtual void visitStream(const PDFStream* stream) override;