diff --git a/PdfForQtLib/sources/pdfdocument.cpp b/PdfForQtLib/sources/pdfdocument.cpp index c41d04c..d2a14b7 100644 --- a/PdfForQtLib/sources/pdfdocument.cpp +++ b/PdfForQtLib/sources/pdfdocument.cpp @@ -262,6 +262,17 @@ const PDFObject& PDFObjectStorage::getObject(PDFObjectReference reference) const } } +QByteArray PDFDocumentDataLoaderDecorator::readName(const PDFObject& object) +{ + const PDFObject& dereferencedObject = m_document->getObject(object); + if (dereferencedObject.isName()) + { + return dereferencedObject.getString(); + } + + return QByteArray(); +} + PDFInteger PDFDocumentDataLoaderDecorator::readInteger(const PDFObject& object, PDFInteger defaultValue) const { const PDFObject& dereferencedObject = m_document->getObject(object); @@ -458,4 +469,14 @@ bool PDFDocumentDataLoaderDecorator::readBooleanFromDictionary(const PDFDictiona return defaultValue; } +QByteArray PDFDocumentDataLoaderDecorator::readNameFromDictionary(const PDFDictionary* dictionary, const char* key) +{ + if (dictionary->hasKey(key)) + { + return readName(dictionary->get(key)); + } + + return QByteArray(); +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfdocument.h b/PdfForQtLib/sources/pdfdocument.h index 413da75..70f4b3f 100644 --- a/PdfForQtLib/sources/pdfdocument.h +++ b/PdfForQtLib/sources/pdfdocument.h @@ -87,6 +87,11 @@ public: inline explicit PDFDocumentDataLoaderDecorator(const PDFDocument* document) : m_document(document) { } inline ~PDFDocumentDataLoaderDecorator() = default; + /// Reads a name from the object, if it is possible. If object is not a name, + /// then empty byte array is returned. + /// \param object Object, can be an indirect reference to object (it is dereferenced) + QByteArray readName(const PDFObject& object); + /// Reads an integer from the object, if it is possible. /// \param object Object, can be an indirect reference to object (it is dereferenced) /// \param defaultValue Default value @@ -217,6 +222,11 @@ public: /// \param defaultValue Default value bool readBooleanFromDictionary(const PDFDictionary* dictionary, const char* key, bool defaultValue) const; + /// Reads a name from dictionary. If dictionary entry doesn't exist, or error occurs, empty byte array is returned. + /// \param dictionary Dictionary containing desired data + /// \param key Entry key + QByteArray readNameFromDictionary(const PDFDictionary* dictionary, const char* key); + private: const PDFDocument* m_document; }; diff --git a/PdfForQtLib/sources/pdfencoding.h b/PdfForQtLib/sources/pdfencoding.h index 16363ae..0e64933 100644 --- a/PdfForQtLib/sources/pdfencoding.h +++ b/PdfForQtLib/sources/pdfencoding.h @@ -46,7 +46,11 @@ public: PDFDoc, ///< Appendix D, Section D.1/D.2, PDFDocEncoding MacExpert, ///< Appendix D, Section D.3, MacExpertEncoding Symbol, ///< Appendix D, Section D.4, Symbol Set and Encoding - ZapfDingbats ///< Appendix D, Section D.5, Zapf Dingbats Encoding + ZapfDingbats, ///< Appendix D, Section D.5, Zapf Dingbats Encoding + + // Following encodings are used for internal use only and are not a part of PDF reference + Custom, + Invalid }; /// Converts byte array to the unicode string using specified encoding @@ -73,11 +77,11 @@ public: /// \param stream Stream, from which date/time is read static QDateTime convertToDateTime(const QByteArray& stream); -private: /// Returns conversion table for particular encoding /// \param encoding Encoding static const encoding::EncodingTable* getTableForEncoding(Encoding encoding); +private: /// Returns true, if byte array has UTF-16BE unicode marking bytes at the /// stream start. If they are present, then byte stream is probably encoded /// as unicode. diff --git a/PdfForQtLib/sources/pdffont.cpp b/PdfForQtLib/sources/pdffont.cpp index 7a6d246..4368295 100644 --- a/PdfForQtLib/sources/pdffont.cpp +++ b/PdfForQtLib/sources/pdffont.cpp @@ -16,6 +16,9 @@ // along with PDFForQt. If not, see . #include "pdffont.h" +#include "pdfdocument.h" +#include "pdfparser.h" +#include "pdfnametounicode.h" namespace pdf { @@ -25,4 +28,295 @@ PDFFont::PDFFont() } +PDFFontPointer PDFFont::createFont(const PDFObject& object, const PDFDocument* document) +{ + const PDFObject& dereferencedFontDictionary = document->getObject(object); + if (!dereferencedFontDictionary.isDictionary()) + { + throw PDFParserException(PDFTranslationContext::tr("Font object must be a dictionary.")); + } + + const PDFDictionary* fontDictionary = dereferencedFontDictionary.getDictionary(); + PDFDocumentDataLoaderDecorator fontLoader(document); + + // TODO: Fonts - implement all types of the font + // First, determine the font subtype + constexpr const std::array, 2> fontTypes = { + std::pair{ "Type1", FontType::Type1 }, + std::pair{ "TrueType", FontType::TrueType } + }; + + const FontType fontType = fontLoader.readEnumByName(fontDictionary->get("Subtype"), fontTypes.cbegin(), fontTypes.cend(), FontType::Invalid); + if (fontType == FontType::Invalid) + { + throw PDFParserException(PDFTranslationContext::tr("Invalid font type.")); + } + + QByteArray name = fontLoader.readNameFromDictionary(fontDictionary, "Name"); + QByteArray baseFont = fontLoader.readNameFromDictionary(fontDictionary, "BaseFont"); + const PDFInteger firstChar = fontLoader.readIntegerFromDictionary(fontDictionary, "FirstChar", 0); + const PDFInteger lastChar = fontLoader.readIntegerFromDictionary(fontDictionary, "LastChar", 255); + std::vector widths = fontLoader.readIntegerArrayFromDictionary(fontDictionary, "Widths"); + + // Read standard font + constexpr const std::array, 14> standardFonts = { + std::pair{ "Times-Roman", StandardFontType::TimesRoman }, + std::pair{ "Times-Bold", StandardFontType::TimesRomanBold }, + std::pair{ "Times-Italic", StandardFontType::TimesRomanItalics }, + std::pair{ "Times-BoldItalic", StandardFontType::TimesRomanBoldItalics }, + std::pair{ "Helvetica", StandardFontType::Helvetica }, + std::pair{ "Helvetica-Bold", StandardFontType::HelveticaBold }, + std::pair{ "Helvetica-Oblique", StandardFontType::HelveticaOblique }, + std::pair{ "Helvetica-BoldOblique", StandardFontType::HelveticaBoldOblique }, + std::pair{ "Courier", StandardFontType::Courier }, + std::pair{ "Courier-Bold", StandardFontType::CourierBold }, + std::pair{ "Courier-Oblique", StandardFontType::CourierOblique }, + std::pair{ "Courier-BoldOblique", StandardFontType::CourierBoldOblique }, + std::pair{ "Symbol", StandardFontType::Symbol }, + std::pair{ "ZapfDingbats", StandardFontType::ZapfDingbats } + }; + const StandardFontType standardFont = fontLoader.readEnumByName(fontDictionary->get("BaseFont"), standardFonts.cbegin(), standardFonts.cend(), StandardFontType::Invalid); + + // Read Font Descriptor + // TODO: Read font descriptor + + // Read Font Encoding + // The font encoding for the simple font is determined by this algorithm: + // 1) Try to use Encoding dictionary to determine base encoding + // (it can be MacRomanEncoding, MacExpertEncoding, WinAnsiEncoding or StandardEncoding) + // 2) If it is not present, then try to obtain built-in encoding from the font file (usually, this is not possible) + // 3) Use default encoding for the font depending on the font type + // - one of the 14 base fonts - use builtin encoding for the font type + // - TrueType - use WinAnsiEncoding + // - all others - use StandardEncoding + // 4) Merge with Differences, if present + // 5) Fill missing characters from StandardEncoding + + // TODO: Read font encoding from the font file + PDFEncoding::Encoding encoding = PDFEncoding::Encoding::Invalid; + encoding::EncodingTable simpleFontEncodingTable = { }; + switch (fontType) + { + case FontType::Type1: + case FontType::TrueType: + { + bool hasDifferences = false; + encoding::EncodingTable differences = { }; + + if (fontDictionary->hasKey("Encoding")) + { + constexpr const std::array, 3> encodings = { + std::pair{ "MacRomanEncoding", PDFEncoding::Encoding::MacRoman }, + std::pair{ "MacExpertEncoding", PDFEncoding::Encoding::MacExpert }, + std::pair{ "WinAnsiEncoding", PDFEncoding::Encoding::WinAnsi } + }; + + const PDFObject& encodingObject = document->getObject(fontDictionary->get("Encoding")); + if (encodingObject.isName()) + { + // Decode name of the encoding + encoding = fontLoader.readEnumByName(encodingObject, encodings.cbegin(), encodings.cend(), PDFEncoding::Encoding::Invalid); + } + else if (encodingObject.isDictionary()) + { + // Dictionary with base encoding and differences (all optional) + const PDFDictionary* encodingDictionary = encodingObject.getDictionary(); + if (encodingDictionary->hasKey("BaseEncoding")) + { + encoding = fontLoader.readEnumByName(encodingDictionary->get("BaseEncoding"), encodings.cbegin(), encodings.cend(), PDFEncoding::Encoding::Invalid); + } + else + { + // We get encoding for the standard font. If we have invalid standard font, + // then we get standard encoding. So we shouldn't test it. + encoding = getEncodingForStandardFont(standardFont); + } + + if (encodingDictionary->hasKey("Differences")) + { + const PDFObject& differencesArray = document->getObject(encodingDictionary->get("Differences")); + if (differencesArray.isArray()) + { + hasDifferences = true; + const PDFArray* array = differencesArray.getArray(); + size_t currentOffset = 0; + for (size_t i = 0, count = array->getCount(); i < count; ++i) + { + const PDFObject& item = document->getObject(array->getItem(i)); + if (item.isInt()) + { + currentOffset = static_cast(item.getInteger()); + } + else if (item.isName()) + { + if (currentOffset >= differences.size()) + { + throw PDFParserException(PDFTranslationContext::tr("Invalid differences in encoding entry of the font.")); + } + + QChar character = PDFNameToUnicode::getUnicodeForName(item.getString()); + + // Try ZapfDingbats, if this fails + if (character.isNull()) + { + character = PDFNameToUnicode::getUnicodeForNameZapfDingbats(item.getString()); + } + differences[currentOffset] = character; + + ++currentOffset; + } + else + { + throw PDFParserException(PDFTranslationContext::tr("Invalid differences in encoding entry of the font.")); + } + } + } + else + { + throw PDFParserException(PDFTranslationContext::tr("Invalid differences in encoding entry of the font.")); + } + } + } + else + { + throw PDFParserException(PDFTranslationContext::tr("Invalid encoding entry of the font.")); + } + } + else + { + // We get encoding for the standard font. If we have invalid standard font, + // then we get standard encoding. So we shouldn't test it. + encoding = getEncodingForStandardFont(standardFont); + } + + if (encoding == PDFEncoding::Encoding::Invalid) + { + throw PDFParserException(PDFTranslationContext::tr("Invalid encoding entry of the font.")); + } + + simpleFontEncodingTable = *PDFEncoding::getTableForEncoding(encoding); + + // Fill in differences + if (hasDifferences) + { + for (size_t i = 0; i < differences.size(); ++i) + { + if (!differences[i].isNull()) + { + simpleFontEncodingTable[i] = differences[i]; + } + } + + // Set the encoding to custom + encoding = PDFEncoding::Encoding::Custom; + } + + // Fill in missing characters from standard encoding + const encoding::EncodingTable& standardEncoding = *PDFEncoding::getTableForEncoding(PDFEncoding::Encoding::Standard); + for (size_t i = 0; i < standardEncoding.size(); ++i) + { + if ((simpleFontEncodingTable[i].isNull() || simpleFontEncodingTable[i] == QChar(QChar::SpecialCharacter::ReplacementCharacter)) && + (!standardEncoding[i].isNull() && standardEncoding[i] != QChar(QChar::SpecialCharacter::ReplacementCharacter))) + { + simpleFontEncodingTable[i] = standardEncoding[i]; + } + } + + break; + } + + default: + { + Q_ASSERT(false); + break; + } + } + + switch (fontType) + { + case FontType::Type1: + return PDFFontPointer(new PDFType1Font(qMove(name), qMove(baseFont), firstChar, lastChar, qMove(widths), encoding, simpleFontEncodingTable, standardFont)); + + case FontType::TrueType: + return PDFFontPointer(new PDFTrueTypeFont(qMove(name), qMove(baseFont), firstChar, lastChar, qMove(widths), encoding, simpleFontEncodingTable)); + + default: + { + Q_ASSERT(false); + break; + } + } + + // Read To Unicode + // TODO: Read To Unicode + + // Read Embedded fonts + // TODO: Read embedded fonts + return PDFFontPointer(); +} + +PDFSimpleFont::PDFSimpleFont(QByteArray name, + QByteArray baseFont, + PDFInteger firstChar, + PDFInteger lastChar, + std::vector widths, + PDFEncoding::Encoding encodingType, + encoding::EncodingTable encoding) : + m_name(qMove(name)), + m_baseFont(qMove(baseFont)), + m_firstChar(firstChar), + m_lastChar(lastChar), + m_widths(qMove(widths)), + m_encodingType(encodingType), + m_encoding(encoding) +{ + +} + +QRawFont PDFSimpleFont::getRealizedFont(PDFReal fontSize) const +{ + // TODO: Fix font creation to use also embedded fonts, font descriptor, etc. + QFont font(m_baseFont); + font.setHintingPreference(QFont::PreferNoHinting); + font.setPixelSize(fontSize); + return QRawFont::fromFont(font, QFontDatabase::Any); +} + +QString PDFSimpleFont::getTextUsingEncoding(const QByteArray& byteArray) const +{ + QString string; + string.resize(byteArray.size(), QChar()); + + for (int i = 0, count = byteArray.size(); i < count; ++i) + { + string[i] = m_encoding[static_cast(byteArray[i])]; + } + + return string; +} + +PDFType1Font::PDFType1Font(QByteArray name, + QByteArray baseFont, + PDFInteger firstChar, + PDFInteger lastChar, + std::vector widths, + PDFEncoding::Encoding encodingType, + encoding::EncodingTable encoding, + StandardFontType standardFontType) : + PDFSimpleFont(qMove(name), qMove(baseFont), firstChar, lastChar, qMove(widths), encodingType, encoding), + m_standardFontType(standardFontType) +{ + +} + +FontType PDFType1Font::getFontType() const +{ + return FontType::Type1; +} + +FontType PDFTrueTypeFont::getFontType() const +{ + return FontType::TrueType; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdffont.h b/PdfForQtLib/sources/pdffont.h index b5dd734..da92116 100644 --- a/PdfForQtLib/sources/pdffont.h +++ b/PdfForQtLib/sources/pdffont.h @@ -19,11 +19,15 @@ #define PDFFONT_H #include "pdfglobal.h" +#include "pdfencoding.h" +#include "pdfobject.h" +#include #include namespace pdf { +class PDFDocument; enum class TextRenderingMode { @@ -82,13 +86,131 @@ constexpr bool isTextRenderingModeClipped(TextRenderingMode mode) } } +enum class FontType +{ + Invalid, + Type1, + TrueType +}; + +/// Standard Type1 fonts +enum class StandardFontType +{ + Invalid, + TimesRoman, + TimesRomanBold, + TimesRomanItalics, + TimesRomanBoldItalics, + Helvetica, + HelveticaBold, + HelveticaOblique, + HelveticaBoldOblique, + Courier, + CourierBold, + CourierOblique, + CourierBoldOblique, + Symbol, + ZapfDingbats +}; + +/// Returns builtin encoding for the standard font +static constexpr PDFEncoding::Encoding getEncodingForStandardFont(StandardFontType standardFont) +{ + switch (standardFont) + { + case StandardFontType::Symbol: + return PDFEncoding::Encoding::Symbol; + + case StandardFontType::ZapfDingbats: + return PDFEncoding::Encoding::ZapfDingbats; + + default: + return PDFEncoding::Encoding::Standard; + } +} + +class PDFFont; + +using PDFFontPointer = QSharedPointer; + +/// Base class representing font in the PDF file class PDFFont { public: - PDFFont(); + explicit PDFFont(); + virtual ~PDFFont() = default; + + /// Returns the font type + virtual FontType getFontType() const = 0; + + /// Realizes the font (physical materialization of the font using pixel size, + /// if font can't be realized, then empty QRawFont is returned). + /// \param fontSize Size of the font + virtual QRawFont getRealizedFont(PDFReal fontSize) const = 0; + + /// Returns text using the font encoding + /// \param byteArray Byte array with encoded string + virtual QString getTextUsingEncoding(const QByteArray& byteArray) const = 0; + + static PDFFontPointer createFont(const PDFObject& object, const PDFDocument* document); }; -using PDFFontPointer = QSharedPointer; +/// Simple font, see PDF reference 1.7, chapter 5.5. Simple fonts have encoding table, +/// which maps single-byte character to the glyph in the font. +class PDFSimpleFont : public PDFFont +{ +public: + explicit PDFSimpleFont(QByteArray name, + QByteArray baseFont, + PDFInteger firstChar, + PDFInteger lastChar, + std::vector widths, + PDFEncoding::Encoding encodingType, + encoding::EncodingTable encoding); + virtual ~PDFSimpleFont() override = default; + + virtual QRawFont getRealizedFont(PDFReal fontSize) const override; + virtual QString getTextUsingEncoding(const QByteArray& byteArray) const override; + +protected: + QByteArray m_name; + QByteArray m_baseFont; + PDFInteger m_firstChar; + PDFInteger m_lastChar; + std::vector m_widths; + PDFEncoding::Encoding m_encodingType; + encoding::EncodingTable m_encoding; +}; + +class PDFType1Font : public PDFSimpleFont +{ +public: + explicit PDFType1Font(QByteArray name, + QByteArray baseFont, + PDFInteger firstChar, + PDFInteger lastChar, + std::vector widths, + PDFEncoding::Encoding encodingType, + encoding::EncodingTable encoding, + StandardFontType standardFontType); + virtual ~PDFType1Font() override = default; + + virtual FontType getFontType() const override; + + /// Returns the assigned standard font (or invalid, if font is not standard) + StandardFontType getStandardFontType() const { return m_standardFontType; } + +private: + StandardFontType m_standardFontType; ///< Type of the standard font (or invalid, if it is not a standard font) +}; + +class PDFTrueTypeFont : public PDFSimpleFont +{ +public: + using PDFSimpleFont::PDFSimpleFont; + + virtual FontType getFontType() const override; +}; } // namespace pdf diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp index 12b23e1..69ac380 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.cpp +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.cpp @@ -153,20 +153,29 @@ PDFPageContentProcessor::PDFPageContentProcessor(const PDFPage* page, const PDFD m_page(page), m_document(document), m_colorSpaceDictionary(nullptr), + m_fontDictionary(nullptr), m_textBeginEndState(0) { Q_ASSERT(page); Q_ASSERT(document); - const PDFObject& resources = m_document->getObject(m_page->getResources()); - if (resources.isDictionary() && resources.getDictionary()->hasKey(COLOR_SPACE_DICTIONARY)) + auto getDictionary = [this](const char* resourceName) -> const pdf::PDFDictionary* { - const PDFObject& colorSpace = m_document->getObject(resources.getDictionary()->get(COLOR_SPACE_DICTIONARY)); - if (colorSpace.isDictionary()) + const PDFObject& resources = m_document->getObject(m_page->getResources()); + if (resources.isDictionary() && resources.getDictionary()->hasKey(resourceName)) { - m_colorSpaceDictionary = colorSpace.getDictionary(); + const PDFObject& resourceDictionary = m_document->getObject(resources.getDictionary()->get(resourceName)); + if (resourceDictionary.isDictionary()) + { + return resourceDictionary.getDictionary(); + } } - } + + return nullptr; + }; + + m_colorSpaceDictionary = getDictionary(COLOR_SPACE_DICTIONARY); + m_fontDictionary = getDictionary("Font"); } PDFPageContentProcessor::~PDFPageContentProcessor() @@ -260,17 +269,21 @@ void PDFPageContentProcessor::performClipping(const QPainterPath& path, Qt::Fill Q_UNUSED(fillRule); } -void PDFPageContentProcessor::performUpdateGraphicsState(const PDFPageContentProcessor::PDFPageContentProcessorState& state) +void PDFPageContentProcessor::performUpdateGraphicsState(const PDFPageContentProcessorState& state) { - Q_UNUSED(state); + if (state.getStateFlags().testFlag(PDFPageContentProcessorState::StateTextFont) || + state.getStateFlags().testFlag(PDFPageContentProcessorState::StateTextFontSize)) + { + m_realizedFont.dirty(); + } } -void PDFPageContentProcessor::performSaveGraphicState(PDFPageContentProcessor::ProcessOrder order) +void PDFPageContentProcessor::performSaveGraphicState(ProcessOrder order) { Q_UNUSED(order); } -void PDFPageContentProcessor::performRestoreGraphicState(PDFPageContentProcessor::ProcessOrder order) +void PDFPageContentProcessor::performRestoreGraphicState(ProcessOrder order) { Q_UNUSED(order); } @@ -689,6 +702,34 @@ void PDFPageContentProcessor::processCommand(const QByteArray& command) break; } + case Operator::TextShowTextString: + { + // Tj, show text string + invokeOperator(&PDFPageContentProcessor::operatorTextShowTextString); + break; + } + + case Operator::TextShowTextIndividualSpacing: + { + // TJ, show text, allow individual text spacing + invokeOperator(&PDFPageContentProcessor::operatorTextShowTextIndividualSpacing); + break; + } + + case Operator::TextNextLineShowText: + { + // ', move to the next line and show text ("string '" is equivalent to "T* string Tj") + invokeOperator(&PDFPageContentProcessor::operatorTextNextLineShowText); + break; + } + + case Operator::TextSetSpacingAndShowText: + { + // ", move to the next line, set spacing and show text (equivalent to sequence "w1 Tw w2 Tc string '") + invokeOperator(&PDFPageContentProcessor::operatorTextSetSpacingAndShowText); + break; + } + case Operator::Invalid: { m_errorList.append(PDFRenderError(RenderErrorType::Error, PDFTranslationContext::tr("Unknown operator '%1'.").arg(QString::fromLatin1(command)))); @@ -902,7 +943,7 @@ void PDFPageContentProcessor::operatorSetLineDashPattern() updateGraphicState(); } -void PDFPageContentProcessor::operatorSetRenderingIntent(PDFName intent) +void PDFPageContentProcessor::operatorSetRenderingIntent(PDFOperandName intent) { m_graphicState.setRenderingIntent(intent.name); updateGraphicState(); @@ -915,7 +956,7 @@ void PDFPageContentProcessor::operatorSetFlatness(PDFReal flatness) updateGraphicState(); } -void PDFPageContentProcessor::operatorSetGraphicState(PDFName dictionaryName) +void PDFPageContentProcessor::operatorSetGraphicState(PDFOperandName dictionaryName) { const PDFObject& resources = m_page->getResources(); if (resources.isDictionary()) @@ -1101,7 +1142,7 @@ PDFInteger PDFPageContentProcessor::readOperand(size_t index) const } template<> -PDFPageContentProcessor::PDFName PDFPageContentProcessor::readOperand(size_t index) const +PDFPageContentProcessor::PDFOperandName PDFPageContentProcessor::readOperand(size_t index) const { if (index < m_operands.size()) { @@ -1110,7 +1151,7 @@ PDFPageContentProcessor::PDFName PDFPageContentProcessor::readOperand +PDFPageContentProcessor::PDFOperandString PDFPageContentProcessor::readOperand(size_t index) const +{ + if (index < m_operands.size()) + { + const PDFLexicalAnalyzer::Token& token = m_operands[index]; + + switch (token.type) + { + case PDFLexicalAnalyzer::TokenType::String: + return PDFOperandString{ token.data.toByteArray() }; + + default: + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Can't read operand (string) on index %1. Operand is of type '%2'.").arg(index + 1).arg(PDFLexicalAnalyzer::getStringFromOperandType(token.type))); + } + } + else + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Can't read operand (string) on index %1. Only %2 operands provided.").arg(index + 1).arg(m_operands.size())); + } + + return PDFOperandString(); } void PDFPageContentProcessor::operatorMoveCurrentPoint(PDFReal x, PDFReal y) @@ -1275,7 +1341,7 @@ void PDFPageContentProcessor::operatorClipEvenOdd() } } -void PDFPageContentProcessor::operatorColorSetStrokingColorSpace(PDFPageContentProcessor::PDFName name) +void PDFPageContentProcessor::operatorColorSetStrokingColorSpace(PDFPageContentProcessor::PDFOperandName name) { PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(std::make_shared(QByteArray(name.name)))); if (colorSpace) @@ -1291,7 +1357,7 @@ void PDFPageContentProcessor::operatorColorSetStrokingColorSpace(PDFPageContentP } } -void PDFPageContentProcessor::operatorColorSetFillingColorSpace(PDFName name) +void PDFPageContentProcessor::operatorColorSetFillingColorSpace(PDFOperandName name) { PDFColorSpacePointer colorSpace = PDFAbstractColorSpace::createColorSpace(m_colorSpaceDictionary, m_document, PDFObject::createName(std::make_shared(QByteArray(name.name)))); if (colorSpace) @@ -1458,13 +1524,27 @@ void PDFPageContentProcessor::operatorTextSetLeading(PDFReal leading) updateGraphicState(); } -void PDFPageContentProcessor::operatorTextSetFontAndFontSize(PDFPageContentProcessor::PDFName fontName, PDFReal fontSize) +void PDFPageContentProcessor::operatorTextSetFontAndFontSize(PDFOperandName fontName, PDFReal fontSize) { - Q_UNUSED(fontName); - Q_UNUSED(fontSize); + if (m_fontDictionary) + { + if (m_fontDictionary->hasKey(fontName.name)) + { + PDFFontPointer font = PDFFont::createFont(m_fontDictionary->get(fontName.name), m_document); - // TODO: Implement this operator - throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Set font not implemented.")); + m_graphicState.setTextFont(qMove(font)); + m_graphicState.setTextFontSize(fontSize); + updateGraphicState(); + } + else + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Font '%1' not found in font dictionary.").arg(QString::fromLatin1(fontName.name))); + } + } + else + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid font dictionary.")); + } } void PDFPageContentProcessor::operatorTextSetRenderMode(PDFInteger mode) @@ -1533,6 +1613,105 @@ void PDFPageContentProcessor::operatorTextMoveByLeading() operatorTextMoveByOffset(0.0, m_graphicState.getTextLeading()); } +void PDFPageContentProcessor::operatorTextShowTextString(PDFOperandString text) +{ + if (m_graphicState.getTextFont()) + { + QString textDecoded = m_graphicState.getTextFont()->getTextUsingEncoding(text.string); + drawText(TextSequence::fromString(textDecoded)); + } + else + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid font, text can't be printed.")); + } +} + +void PDFPageContentProcessor::operatorTextShowTextIndividualSpacing() +{ + // Operand stack must be of this form [ ... text, number, text, number, number, text ... ]. We check it. + + if (m_operands.size() < 2) + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid parameters of text operator with individual character spacing.")); + } + + // Now, we have at least 2 arguments. Check we have an array + if (m_operands[0].type != PDFLexicalAnalyzer::TokenType::ArrayStart || + m_operands[m_operands.size() - 1].type != PDFLexicalAnalyzer::TokenType::ArrayEnd) + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid line dash pattern.")); + } + + if (m_graphicState.getTextFont()) + { + TextSequence textSequence; + + // We use simple heuristic to ensure reallocation doesn't occur too often + textSequence.items.reserve(m_operands.size() * 4); + + for (size_t i = 1, lastIndex = m_operands.size() - 1; i < lastIndex; ++i) + { + switch (m_operands[i].type) + { + case PDFLexicalAnalyzer::TokenType::Integer: + { + textSequence.items.push_back(TextSequenceItem(m_operands[i].data.value())); + break; + } + + case PDFLexicalAnalyzer::TokenType::String: + { + QString string = m_graphicState.getTextFont()->getTextUsingEncoding(m_operands[i].data.toByteArray()); + std::transform(string.cbegin(), string.cend(), std::back_inserter(textSequence.items), [](const QChar character) { return TextSequenceItem(character); }); + break; + } + + default: + { + // Error - we have operand of different type + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid operand of text show operator.")); + } + } + } + + drawText(textSequence); + } + else + { + throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid font, text can't be printed.")); + } +} + +void PDFPageContentProcessor::operatorTextNextLineShowText(PDFOperandString text) +{ + operatorTextMoveByLeading(); + operatorTextShowTextString(qMove(text)); +} + +void PDFPageContentProcessor::operatorTextSetSpacingAndShowText(PDFReal t_w, PDFReal t_c, PDFOperandString text) +{ + m_graphicState.setTextWordSpacing(t_w); + m_graphicState.setTextCharacterSpacing(t_c); + updateGraphicState(); + + operatorTextNextLineShowText(qMove(text)); +} + +void PDFPageContentProcessor::drawText(const TextSequence& textSequence) +{ + +} + +QRawFont PDFPageContentProcessor::getRealizedFontImpl() const +{ + if (m_graphicState.getTextFont()) + { + return m_graphicState.getTextFont()->getRealizedFont(m_graphicState.getTextFontSize()); + } + + return QRawFont(); +} + PDFPageContentProcessor::PDFPageContentProcessorState::PDFPageContentProcessorState() : m_currentTransformationMatrix(), m_fillColorSpace(), @@ -1810,4 +1989,12 @@ void PDFPageContentProcessor::PDFPageContentProcessorState::setTextCharacterSpac } } +PDFPageContentProcessor::TextSequence PDFPageContentProcessor::TextSequence::fromString(const QString& string) +{ + TextSequence result; + result.items.reserve(string.size()); + std::transform(string.cbegin(), string.cend(), std::back_inserter(result.items), [](const QChar character) { return TextSequenceItem(character); }); + return result; +} + } // namespace pdf diff --git a/PdfForQtLib/sources/pdfpagecontentprocessor.h b/PdfForQtLib/sources/pdfpagecontentprocessor.h index 91fe9f4..931d12b 100644 --- a/PdfForQtLib/sources/pdfpagecontentprocessor.h +++ b/PdfForQtLib/sources/pdfpagecontentprocessor.h @@ -22,8 +22,10 @@ #include "pdfparser.h" #include "pdfcolorspaces.h" #include "pdffont.h" +#include "pdfutils.h" #include +#include #include #include @@ -348,6 +350,28 @@ protected: StateFlags m_stateFlags; }; + /// Item of the text sequence (either single character, or advance) + struct TextSequenceItem + { + inline explicit TextSequenceItem() = default; + inline explicit TextSequenceItem(QChar character) : character(character), advance(0) { } + inline explicit TextSequenceItem(PDFInteger advance) : character(), advance(advance) { } + + inline bool isCharacter() const { return !character.isNull(); } + inline bool isAdvance() const { return advance != 0; } + inline bool isNull() const { return !isCharacter() && !isAdvance(); } + + QChar character; + PDFInteger advance = 0; + }; + + struct TextSequence + { + static TextSequence fromString(const QString& string); + + std::vector items; + }; + enum class ProcessOrder { BeforeOperation, @@ -398,11 +422,17 @@ private: void processCommand(const QByteArray& command); /// Wrapper for PDF Name - struct PDFName + struct PDFOperandName { QByteArray name; }; + /// Wrapper for PDF String + struct PDFOperandString + { + QByteArray string; + }; + template T readOperand(size_t index) const; @@ -413,7 +443,10 @@ private: PDFInteger readOperand(size_t index) const; template<> - PDFName readOperand(size_t index) const; + PDFOperandName readOperand(size_t index) const; + + template<> + PDFOperandString readOperand(size_t index) const; template inline T readOperand() const { return readOperand(index); } @@ -484,9 +517,9 @@ private: void operatorSetLineJoin(PDFInteger lineJoin); ///< j, sets the line join void operatorSetMitterLimit(PDFReal mitterLimit); ///< M, sets the mitter limit void operatorSetLineDashPattern(); ///< d, sets the line dash pattern - void operatorSetRenderingIntent(PDFName intent); ///< ri, sets the rendering intent + void operatorSetRenderingIntent(PDFOperandName intent); ///< ri, sets the rendering intent void operatorSetFlatness(PDFReal flatness); ///< i, sets the flattness (number in range from 0 to 100) - void operatorSetGraphicState(PDFName dictionaryName); ///< gs, sets the whole graphic state (stored in resource dictionary) + void operatorSetGraphicState(PDFOperandName dictionaryName); ///< gs, sets the whole graphic state (stored in resource dictionary) // Special graphic state: q, Q, cm void operatorSaveGraphicState(); ///< q, saves the graphic state @@ -518,8 +551,8 @@ private: void operatorClipEvenOdd(); ///< W*, modify current clipping path by intersecting it with current path using "Even-odd rule" // Color: CS, cs, SC, SCN, sc, scn, G, g, RG, rg, K, k - void operatorColorSetStrokingColorSpace(PDFName name); ///< CS, set current color space for stroking operations - void operatorColorSetFillingColorSpace(PDFName name); ///< cs, set current color space for filling operations + void operatorColorSetStrokingColorSpace(PDFOperandName name); ///< CS, set current color space for stroking operations + void operatorColorSetFillingColorSpace(PDFOperandName name); ///< cs, set current color space for filling operations void operatorColorSetStrokingColor(); ///< SC, set current stroking color void operatorColorSetStrokingColorN(); ///< SCN, same as SC, but also supports Pattern, Separation, DeviceN and ICCBased color spaces void operatorColorSetFillingColor(); ///< sc, set current filling color @@ -540,7 +573,7 @@ private: void operatorTextSetWordSpacing(PDFReal wordSpacing); ///< Tw, set text word spacing void operatorTextSetHorizontalScale(PDFReal horizontalScaling); ///< Tz, set text horizontal scaling (in percents, 100% = normal scaling) void operatorTextSetLeading(PDFReal leading); ///< TL, set text leading - void operatorTextSetFontAndFontSize(PDFName fontName, PDFReal fontSize); ///< Tf, set text font (name from dictionary) and its size + void operatorTextSetFontAndFontSize(PDFOperandName fontName, PDFReal fontSize); ///< Tf, set text font (name from dictionary) and its size void operatorTextSetRenderMode(PDFInteger mode); ///< Tr, set text render mode void operatorTextSetRise(PDFReal rise); ///< Ts, set text rise @@ -550,9 +583,25 @@ private: void operatorTextSetMatrix(PDFReal a, PDFReal b, PDFReal c, PDFReal d, PDFReal e, PDFReal f); ///< Tm, set text matrix void operatorTextMoveByLeading(); ///< T*, moves text by leading, equivalent to 0 leading Td + // Text showing: Tj, TJ, ', " + void operatorTextShowTextString(PDFOperandString text); ///< Tj, show text string + void operatorTextShowTextIndividualSpacing(); ///< TJ, show text, allow individual text spacing + void operatorTextNextLineShowText(PDFOperandString text); ///< ', move to the next line and show text ("string '" is equivalent to "T* string Tj") + void operatorTextSetSpacingAndShowText(PDFReal t_w, PDFReal t_c, PDFOperandString text); ///< ", move to the next line, set spacing and show text (equivalent to sequence "w1 Tw w2 Tc string '") + + // Draws the text using the text sequence + void drawText(const TextSequence& textSequence); + + /// Returns realized font + const QRawFont& getRealizedFont() { return m_realizedFont.get(this, &PDFPageContentProcessor::getRealizedFontImpl); } + + /// Returns realized font (or empty font, if font can't be realized) + QRawFont getRealizedFontImpl() const; + const PDFPage* m_page; const PDFDocument* m_document; const PDFDictionary* m_colorSpaceDictionary; + const PDFDictionary* m_fontDictionary; // Default color spaces PDFColorSpacePointer m_deviceGrayColorSpace; @@ -576,6 +625,9 @@ private: /// Nesting level of the begin/end of text object int m_textBeginEndState; + + /// Actually realized physical font + PDFCachedItem m_realizedFont; }; } // namespace pdf diff --git a/PdfForQtLib/sources/pdfpainter.cpp b/PdfForQtLib/sources/pdfpainter.cpp index 4ee56be..bf84782 100644 --- a/PdfForQtLib/sources/pdfpainter.cpp +++ b/PdfForQtLib/sources/pdfpainter.cpp @@ -102,6 +102,8 @@ void PDFPainter::performUpdateGraphicsState(const PDFPageContentProcessorState& { m_currentBrush.dirty(); } + + PDFPageContentProcessor::performUpdateGraphicsState(state); } void PDFPainter::performSaveGraphicState(ProcessOrder order)