From b01a135dfd114a0fa6d2e09d10377049634df9da Mon Sep 17 00:00:00 2001 From: Jakub Melka Date: Sun, 10 Feb 2019 18:32:15 +0100 Subject: [PATCH] Operator list --- PdfForQtLib/sources/pdfflatarray.h | 18 +++++ PdfForQtLib/sources/pdfparser.h | 3 +- PdfForQtLib/sources/pdfrenderer.cpp | 61 ++++++++++++++- PdfForQtLib/sources/pdfrenderer.h | 112 ++++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 3 deletions(-) diff --git a/PdfForQtLib/sources/pdfflatarray.h b/PdfForQtLib/sources/pdfflatarray.h index aa269d5..f3ed76c 100644 --- a/PdfForQtLib/sources/pdfflatarray.h +++ b/PdfForQtLib/sources/pdfflatarray.h @@ -104,6 +104,24 @@ public: } } + void clear() + { + m_flatBlockEndIterator = m_flatBlock.begin(); + m_variableBlock.clear(); + } + + void push_back(T object) + { + if (m_flatBlockEndIterator != m_flatBlock.cend()) + { + *m_flatBlockEndIterator++ = std::move(object); + } + else + { + m_variableBlock.emplace_back(std::move(object)); + } + } + private: size_t getFlatBlockSize() const { return std::distance(m_flatBlock.cbegin(), std::array::const_iterator(m_flatBlockEndIterator)); } diff --git a/PdfForQtLib/sources/pdfparser.h b/PdfForQtLib/sources/pdfparser.h index c7a0ebd..5ecfeca 100644 --- a/PdfForQtLib/sources/pdfparser.h +++ b/PdfForQtLib/sources/pdfparser.h @@ -201,8 +201,7 @@ private: const char* m_end; }; -/// Parsing context. Used for example to detect cyclic reference errors. Can handle multiple threads -/// simultaneously (e.g class is thread safe). +/// Parsing context. Used for example to detect cyclic reference errors. class PDFParsingContext { Q_DECLARE_TR_FUNCTIONS(pdf::PDFParsingContext) diff --git a/PdfForQtLib/sources/pdfrenderer.cpp b/PdfForQtLib/sources/pdfrenderer.cpp index 9601d85..6904079 100644 --- a/PdfForQtLib/sources/pdfrenderer.cpp +++ b/PdfForQtLib/sources/pdfrenderer.cpp @@ -21,6 +21,27 @@ namespace pdf { +// Graphic state operators - mapping from PDF name to the enum, splitted into groups. +// Please see Table 4.1 in PDF Reference 1.7, chapter 4.1 - Graphic Objects. +// +// General graphic state: w, J, j, M, d, ri, i, gs +// Special graphic state: q, Q, cm +// Path construction: m, l, c, v, y, h, re +// Path painting: S, s, F, f, f*, B, B*, b, b*, n +// Clipping paths: W, W* +// Text object: BT, ET +// Text state: Tc, Tw, Tz, TL, Tf, Tr, Ts +// Text positioning: Td, TD, Tm, T* +// Text showing: Tj, TJ, ', " +// Type 3 font: d0, d1 +// Color: CS, cs, SC, SCN, sc, scn, G, g, RG, rg, K, k +// Shading pattern: sh +// Inline images: BI, ID, EI +// XObject: Do +// Marked content: MP, DP, BMC, BDC, EMC +// Compatibility: BX, EX + + PDFRenderer::PDFRenderer(const PDFDocument* document) : m_document(document), m_features(Antialasing | TextAntialiasing) @@ -94,7 +115,45 @@ QList PDFPageContentProcessor::processContentStream(const PDFStr { QByteArray content = m_document->getDecodedStream(stream); - return QList(); + PDFLexicalAnalyzer parser(content.constBegin(), content.constEnd()); + + QList errors; + while (!parser.isAtEnd()) + { + try + { + PDFLexicalAnalyzer::Token token = parser.fetch(); + switch (token.type) + { + case PDFLexicalAnalyzer::TokenType::Command: + { + // Process the command, then clear the operand stack + processCommand(token.data.toByteArray(), errors); + m_operands.clear(); + break; + } + + case PDFLexicalAnalyzer::TokenType::EndOfFile: + { + // Do nothing, just break, we are at the end + break; + } + + default: + { + // Push the operand onto the operand stack + m_operands.push_back(std::move(token)); + break; + } + } + } + catch (PDFParserException exception) + { + errors.append(PDFRenderError(RenderErrorType::Error, exception.getMessage())); + } + } + + return errors; } PDFPageContentProcessor::PDFPageContentProcessorState::PDFPageContentProcessorState() : diff --git a/PdfForQtLib/sources/pdfrenderer.h b/PdfForQtLib/sources/pdfrenderer.h index fade81e..e924971 100644 --- a/PdfForQtLib/sources/pdfrenderer.h +++ b/PdfForQtLib/sources/pdfrenderer.h @@ -19,6 +19,7 @@ #define PDFRENDERER_H #include "pdfpage.h" +#include "pdfparser.h" #include "pdfcolorspaces.h" #include @@ -83,6 +84,114 @@ class PDFPageContentProcessor public: explicit PDFPageContentProcessor(const PDFPage* page, const PDFDocument* document); + enum class Operator + { + // General graphic state w, J, j, M, d, ri, i, gs + SetLineWidth, ///< w, sets the line width + SetLineCap, ///< J, sets the line cap + SetLineJoin, ///< j, sets the line join + SetMitterLimit, ///< M, sets the mitter limit + SetLineDashPattern, ///< d, sets the line dash pattern + SetRenderingIntent, ///< ri, sets the rendering intent + SetFlatness, ///< i, sets the flattness (number in range from 0 to 100) + SetGraphicState, ///< gs, sets the whole graphic state (stored in resource dictionary) + + // Special graphic state: q, Q, cm + SaveGraphicState, ///< q, saves the graphic state + RestoreGraphicState, ///< Q, restores the graphic state + AdjustCurrentTransformationMatrix, ///< cm, modify the current transformation matrix by matrix multiplication + + // Path construction: m, l, c, v, y, h, re + MoveCurrentPoint, ///< m, begin a new subpath by moving to the desired point + LineTo, ///< l, appends a straight line segment to the subpath + Bezier123To, ///< c, appends a Bézier curve with control points 1, 2, 3 + Bezier23To, ///< v, appends a Bézier curve with control points 2, 3 + Bezier13To, ///< y, appends a Bézier curve with control points 1, 3 + EndSubpath, ///< h, ends current subpath by adding straight line segment from the last point to the beginning + + // Path painting: S, s, f, F, f*, B, B*, b, b*, n + StrokePath, ///< S, stroke the path + CloseAndStrokePath, ///< s, close the path and then stroke (equivalent of operators h S) + FillPathWinding, ///< f, close the path, and then fill the path using "Non zero winding number rule" + FillPathWinding2, ///< F, same as previous, see PDF Reference 1.7, Table 4.10 + FillPathEvenOdd, ///< f*, fill the path using "Even-odd rule" + StrokeAndFillWinding, ///< B, stroke and fill path, using "Non zero winding number rule" + StrokeAndFillEvenOdd, ///< B*, stroke and fill path, using "Even-odd rule" + CloseAndStrokeAndFillWinding, ///< b, close, stroke and fill path, using "Non zero winding number rule", equivalent of operators h B + CloseAndStrokeAndFillEvenOdd, ///< b*, close, stroke and fill path, using "Even-odd rule", equivalent of operators h B* + ClearPath, ///< n, clear parh (close current) path, "no-operation", used with clipping + + // Clipping paths: W, W* + ClipWinding, ///< W, modify current clipping path by intersecting it with current path using "Non zero winding number rule" + ClipEvenOdd, ///< W*, modify current clipping path by intersecting it with current path using "Even-odd rule" + + // Text object: BT, ET + TextBegin, ///< BT, begin text object, initialize text matrices, cannot be nested + TextEnd, ///< ET, end text object, cannot be nested + + // Text state: Tc, Tw, Tz, TL, Tf, Tr, Ts + TextSetCharacterSpacing, ///< Tc, set text character spacing + TextSetWordSpacing, ///< Tw, set text word spacing + TextSetHorizontalScale, ///< Tz, set text horizontal scaling (in percents, 100% = normal scaling) + TextSetLeading, ///< TL, set text leading + TextSetFontAndFontSize, ///< Tf, set text font (name from dictionary) and its size + TextSetRenderMode, ///< Tr, set text render mode + TextSetRise, ///< Ts, set text rise + + // Text positioning: Td, TD, Tm, T* + TextMoveByOffset, ///< Td, move by offset + TextSetLeadingAndMoveByOffset, ///< TD, sets thext leading and moves by offset, x y TD is equivalent to sequence -y TL x y Td + TextSetMatrix, ///< Tm, set text matrix + TextMoveByLeading, ///< T*, moves text by leading, equivalent to 0 leading Td + + // Text showing: Tj, TJ, ', " + TextShowTextString, ///< Tj, show text string + TextShowTextIndividualSpacing, ///< TJ, show text, allow individual text spacing + TextNextLineShowText, ///< ', move to the next line and show text ("string '" is equivalent to "T* string Tj") + TextSetSpacingAndShowText, ///< ", move to the next line, set spacing and show text (equivalent to sequence "w1 Tw w2 Tc string '") + + // Type 3 font: d0, d1 + Type3FontSetOffset, ///< d0, set width information, see PDF 1.7 Reference, Table 5.10 + Type3FontSetOffsetAndBB, ///< d1, set offset and glyph bounding box + + // Color: CS, cs, SC, SCN, sc, scn, G, g, RG, rg, K, k + ColorSetStrokingColorSpace, ///< CS, set current color space for stroking operations + ColorSetFillingColorSpace, ///< cs, set current color space for filling operations + ColorSetStrokingColor, ///< SC, set current stroking color + ColorSetStrokingColorN, ///< SCN, same as SC, but also supports Pattern, Separtion, DeviceN and ICCBased color spaces + ColorSetFillingColor, ///< sc, set current filling color + ColorSetFillingColorN, ///< scn, same as sc, but also supports Pattern, Separtion, DeviceN and ICCBased color spaces + ColorSetDeviceGrayStroking, ///< G, set DeviceGray color space for stroking color and set color + ColorSetDeviceGrayFilling, ///< g, set DeviceGray color space for filling color and set color + ColorSetDeviceRGBStroking, ///< RG, set DeviceRGB color space for stroking color and set color + ColorSetDeviceRGBFilling, ///< rg, set DeviceRGB color space for filling color and set color + ColorSetDeviceCMYKStroking, ///< K, set DeviceCMYK color space for stroking color and set color + ColorSetDeviceCMYKFilling, ///< k, set DeviceCMYK color space for filling color and set color + + // Shading pattern: sh + ShadingPaintShape, ///< sh, paint shape + + // Inline images: BI, ID, EI + InlineImageBegin, ///< BI, begin inline image + InlineImageData, ///< ID, inline image data + InlineImageEnd, ///< EI, end of inline image + + // XObject: Do + PaintXObject, ///< Do, paint the X Object (image, form, ...) + + // Marked content: MP, DP, BMC, BDC, EMC + MarkedContentPoint, ///< MP, marked content point + MarkedContentPointWithProperties, ///< DP, marked content point with properties + MarkedContentBegin, ///< BMC, begin of sequence of marked content + MarkedContentBeginWithProperties, ///< BDC, begin of sequence of marked content with properties + MarkedContentEnd, ///< EMC, end of marked content sequence + + // Compatibility: BX, EX + CompatibilityBegin, ///< BX, Compatibility mode begin (unrecognized operators are ignored) + CompatibilityEnd, ///< EX, Compatibility mode end + Invalid ///< Invalid operator, use for error reporting + }; + /// Process the contents of the page QList processContents(); @@ -118,6 +227,9 @@ private: const PDFPage* m_page; const PDFDocument* m_document; + /// Array with current operand arguments + PDFFlatArray m_operands; + /// Stack with current graphic states std::stack m_stack; };