mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-01-07 22:14:40 +01:00
696 lines
19 KiB
C++
696 lines
19 KiB
C++
// Copyright (C) 2020-2021 Jakub Melka
|
|
//
|
|
// This file is part of PDF4QT.
|
|
//
|
|
// PDF4QT is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// with the written consent of the copyright owner, any later version.
|
|
//
|
|
// PDF4QT is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#include "pdfoutputformatter.h"
|
|
|
|
#include <QMutex>
|
|
#include <QTextStream>
|
|
#include <QXmlStreamWriter>
|
|
#include <QCoreApplication>
|
|
#include <QDataStream>
|
|
#include <QStringEncoder>
|
|
|
|
#include <stack>
|
|
|
|
#ifdef Q_OS_WIN
|
|
#include "Windows.h"
|
|
#endif
|
|
|
|
namespace pdftool
|
|
{
|
|
|
|
class PDFOutputFormatterImpl
|
|
{
|
|
public:
|
|
explicit PDFOutputFormatterImpl() = default;
|
|
virtual ~PDFOutputFormatterImpl() = default;
|
|
|
|
/// Starts a new element in structure tree. Each call of this function must be
|
|
/// accompanied with matching call to \p endElement function. Element can have
|
|
/// internal name (in xml file), text description for user (as string), and reference
|
|
/// number. Also alignment can also be specified. Element type and name must
|
|
/// always be specified.
|
|
/// \param type Element type
|
|
/// \param name Internal element name (for example, xml tag if style is XML)
|
|
/// \param description Text description for user
|
|
/// \param alignment Cell alignment in table
|
|
/// \param reference Reference number
|
|
virtual void beginElement(PDFOutputFormatter::Element type, QString name, QString description = QString(), Qt::Alignment alignment = Qt::AlignLeft, int reference = 0) = 0;
|
|
|
|
/// Ends current element. Must match with a call of \p beginElement
|
|
virtual void endElement() = 0;
|
|
|
|
/// Get result string in unicode.
|
|
virtual QString getString() const = 0;
|
|
|
|
/// Ends current line (for formatters, that support it)
|
|
virtual void endl() { }
|
|
};
|
|
|
|
class PDFTextOutputFormatterImpl : public PDFOutputFormatterImpl
|
|
{
|
|
public:
|
|
PDFTextOutputFormatterImpl();
|
|
|
|
virtual void beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference) override;
|
|
virtual void endElement() override;
|
|
virtual QString getString() const override;
|
|
virtual void endl() override;
|
|
|
|
private:
|
|
static constexpr const int INDENT_STEP = 2;
|
|
|
|
void writeIndent();
|
|
|
|
struct TableCell
|
|
{
|
|
QString text;
|
|
Qt::Alignment alignment;
|
|
};
|
|
|
|
const TableCell& getTableCell(size_t row, size_t column) const;
|
|
|
|
QString m_string;
|
|
QTextStream m_streamWriter;
|
|
int m_indent;
|
|
std::stack<PDFOutputFormatter::Element> m_elementStack;
|
|
std::vector<std::vector<TableCell>> m_table;
|
|
};
|
|
|
|
class PDFXmlOutputFormatterImpl : public PDFOutputFormatterImpl
|
|
{
|
|
public:
|
|
PDFXmlOutputFormatterImpl();
|
|
|
|
virtual void beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference) override;
|
|
virtual void endElement() override;
|
|
virtual QString getString() const override;
|
|
|
|
private:
|
|
QString m_string;
|
|
QString m_namespace;
|
|
QString m_prefix;
|
|
QXmlStreamWriter m_streamWriter;
|
|
int m_depth;
|
|
std::stack<PDFOutputFormatter::Element> m_elementStack;
|
|
};
|
|
|
|
class PDFHtmlOutputFormatterImpl : public PDFOutputFormatterImpl
|
|
{
|
|
public:
|
|
PDFHtmlOutputFormatterImpl();
|
|
|
|
virtual void beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference) override;
|
|
virtual void endElement() override;
|
|
virtual QString getString() const override;
|
|
virtual void endl() override;
|
|
|
|
private:
|
|
QString m_string;
|
|
QXmlStreamWriter m_streamWriter;
|
|
int m_depth;
|
|
int m_headerDepth;
|
|
std::stack<PDFOutputFormatter::Element> m_elementStack;
|
|
};
|
|
|
|
PDFTextOutputFormatterImpl::PDFTextOutputFormatterImpl() :
|
|
m_string(),
|
|
m_streamWriter(&m_string, QIODevice::WriteOnly),
|
|
m_indent(0),
|
|
m_elementStack()
|
|
{
|
|
|
|
}
|
|
|
|
void PDFTextOutputFormatterImpl::beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference)
|
|
{
|
|
Q_UNUSED(name);
|
|
Q_UNUSED(reference);
|
|
|
|
m_elementStack.push(type);
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Text:
|
|
case PDFOutputFormatter::Element::Root:
|
|
{
|
|
writeIndent();
|
|
m_streamWriter << description << Qt::endl;
|
|
m_indent += INDENT_STEP;
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Table:
|
|
case PDFOutputFormatter::Element::Header:
|
|
{
|
|
writeIndent();
|
|
m_streamWriter << description << Qt::endl;
|
|
m_indent += INDENT_STEP;
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableRow:
|
|
case PDFOutputFormatter::Element::TableHeaderRow:
|
|
{
|
|
m_table.emplace_back();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
{
|
|
TableCell cell;
|
|
cell.text = QString(" %1 ").arg(description);
|
|
cell.alignment = alignment;
|
|
|
|
Q_ASSERT(!m_table.empty());
|
|
m_table.back().emplace_back(qMove(cell));
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void PDFTextOutputFormatterImpl::endElement()
|
|
{
|
|
PDFOutputFormatter::Element type = m_elementStack.top();
|
|
m_elementStack.pop();
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Text:
|
|
case PDFOutputFormatter::Element::Root:
|
|
{
|
|
m_indent -= INDENT_STEP;
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Table:
|
|
{
|
|
// Print the table
|
|
const size_t rows = m_table.size();
|
|
const size_t columns = (*std::max_element(m_table.cbegin(), m_table.cend(), [](const auto& l, const auto& r) { return l.size() < r.size(); })).size();
|
|
|
|
// Detect maximal column size
|
|
std::vector<int> columnSize(columns, 0);
|
|
for (size_t row = 0; row < rows; ++row)
|
|
{
|
|
for (size_t column = 0; column < columns; ++column)
|
|
{
|
|
const TableCell& tableCell = getTableCell(row, column);
|
|
columnSize[column] = qMax(columnSize[column], tableCell.text.size());
|
|
}
|
|
}
|
|
|
|
// Print cells
|
|
m_streamWriter.setPadChar(QChar(QChar::Space));
|
|
for (size_t row = 0; row < rows; ++row)
|
|
{
|
|
for (size_t column = 0; column < columns; ++column)
|
|
{
|
|
m_streamWriter.setFieldWidth(0);
|
|
writeIndent();
|
|
|
|
const TableCell& tableCell = getTableCell(row, column);
|
|
m_streamWriter.setFieldWidth(columnSize[column]);
|
|
|
|
if (tableCell.alignment.testFlag(Qt::AlignLeft))
|
|
{
|
|
m_streamWriter.setFieldAlignment(QTextStream::AlignLeft);
|
|
}
|
|
else if (tableCell.alignment.testFlag(Qt::AlignCenter))
|
|
{
|
|
m_streamWriter.setFieldAlignment(QTextStream::AlignCenter);
|
|
}
|
|
else if (tableCell.alignment.testFlag(Qt::AlignRight))
|
|
{
|
|
m_streamWriter.setFieldAlignment(QTextStream::AlignRight);
|
|
}
|
|
|
|
m_streamWriter << tableCell.text;
|
|
}
|
|
|
|
m_streamWriter.setFieldWidth(0);
|
|
m_streamWriter << Qt::endl;
|
|
}
|
|
|
|
m_indent -= INDENT_STEP;
|
|
m_table.clear();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Header:
|
|
{
|
|
m_indent -= INDENT_STEP;
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableRow:
|
|
case PDFOutputFormatter::Element::TableHeaderRow:
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
{
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
QString PDFTextOutputFormatterImpl::getString() const
|
|
{
|
|
return m_string;
|
|
}
|
|
|
|
void PDFTextOutputFormatterImpl::endl()
|
|
{
|
|
m_streamWriter << Qt::endl;
|
|
}
|
|
|
|
void PDFTextOutputFormatterImpl::writeIndent()
|
|
{
|
|
QString str(m_indent, QChar(QChar::Space));
|
|
m_streamWriter << str;
|
|
}
|
|
|
|
const PDFTextOutputFormatterImpl::TableCell& PDFTextOutputFormatterImpl::getTableCell(size_t row, size_t column) const
|
|
{
|
|
if (row < m_table.size())
|
|
{
|
|
const auto& columns = m_table[row];
|
|
if (column < columns.size())
|
|
{
|
|
return columns[column];
|
|
}
|
|
}
|
|
|
|
static const TableCell dummy;
|
|
return dummy;
|
|
}
|
|
|
|
PDFHtmlOutputFormatterImpl::PDFHtmlOutputFormatterImpl() :
|
|
m_string(),
|
|
m_streamWriter(&m_string),
|
|
m_depth(0),
|
|
m_headerDepth(1),
|
|
m_elementStack()
|
|
{
|
|
|
|
}
|
|
|
|
void PDFHtmlOutputFormatterImpl::beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference)
|
|
{
|
|
Q_UNUSED(reference);
|
|
m_elementStack.push(type);
|
|
|
|
auto writeTableCellAlignment = [this, alignment]()
|
|
{
|
|
if (alignment.testFlag(Qt::AlignLeft))
|
|
{
|
|
m_streamWriter.writeAttribute("align", "left");
|
|
}
|
|
if (alignment.testFlag(Qt::AlignCenter))
|
|
{
|
|
m_streamWriter.writeAttribute("align", "center");
|
|
}
|
|
if (alignment.testFlag(Qt::AlignRight))
|
|
{
|
|
m_streamWriter.writeAttribute("align", "right");
|
|
}
|
|
};
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Root:
|
|
{
|
|
m_streamWriter.writeStartDocument();
|
|
|
|
QString title = QString("%1 - Processed by %2 %3").arg(description, QCoreApplication::applicationName(), QCoreApplication::applicationVersion());
|
|
m_streamWriter.writeStartElement("html");
|
|
m_streamWriter.writeStartElement("head");
|
|
m_streamWriter.writeTextElement("title", title);
|
|
m_streamWriter.writeEndElement();
|
|
m_streamWriter.writeStartElement("body");
|
|
m_streamWriter.writeStartElement("p");
|
|
m_streamWriter.writeTextElement("h1", title);
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Header:
|
|
{
|
|
// Just print single paragraph with header
|
|
++m_headerDepth;
|
|
int headerTagDepth = qBound(1, m_headerDepth, 6);
|
|
QString headerTag = QString("h%1").arg(headerTagDepth);
|
|
m_streamWriter.writeStartElement("p");
|
|
m_streamWriter.writeTextElement(headerTag, description);
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Text:
|
|
{
|
|
m_streamWriter.writeTextElement("p", description);
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Table:
|
|
{
|
|
m_streamWriter.writeStartElement("table");
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableHeaderRow:
|
|
{
|
|
m_streamWriter.writeStartElement("tr");
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
{
|
|
m_streamWriter.writeStartElement("th");
|
|
writeTableCellAlignment();
|
|
m_streamWriter.writeCharacters(description);
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableRow:
|
|
{
|
|
m_streamWriter.writeStartElement("tr");
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
{
|
|
m_streamWriter.writeStartElement("td");
|
|
writeTableCellAlignment();
|
|
m_streamWriter.writeCharacters(description);
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Increment depth by one
|
|
++m_depth;
|
|
}
|
|
|
|
void PDFHtmlOutputFormatterImpl::endElement()
|
|
{
|
|
PDFOutputFormatter::Element type = m_elementStack.top();
|
|
m_elementStack.pop();
|
|
--m_depth;
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Root:
|
|
{
|
|
Q_ASSERT(m_depth == 0);
|
|
m_streamWriter.writeEndElement();
|
|
m_streamWriter.writeEndElement();
|
|
m_streamWriter.writeEndDocument();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Header:
|
|
{
|
|
// Just decrement header depth
|
|
--m_headerDepth;
|
|
break;
|
|
}
|
|
case PDFOutputFormatter::Element::Table:
|
|
case PDFOutputFormatter::Element::TableHeaderRow:
|
|
case PDFOutputFormatter::Element::TableRow:
|
|
{
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
case PDFOutputFormatter::Element::Text:
|
|
{
|
|
// Do nothing...
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
QString PDFHtmlOutputFormatterImpl::getString() const
|
|
{
|
|
QString html = m_string;
|
|
html.remove(0, html.indexOf("?>") + 2);
|
|
html.prepend("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">");
|
|
return html;
|
|
}
|
|
|
|
void PDFHtmlOutputFormatterImpl::endl()
|
|
{
|
|
m_streamWriter.writeStartElement("br");
|
|
m_streamWriter.writeEndElement();
|
|
}
|
|
|
|
PDFXmlOutputFormatterImpl::PDFXmlOutputFormatterImpl() :
|
|
m_string(),
|
|
m_streamWriter(&m_string),
|
|
m_depth(0)
|
|
{
|
|
m_streamWriter.setAutoFormatting(true);
|
|
m_streamWriter.setAutoFormattingIndent(2);
|
|
|
|
m_namespace = "https://github.com/JakubMelka/PDF4QT";
|
|
m_prefix = "pdftool";
|
|
}
|
|
|
|
void PDFXmlOutputFormatterImpl::beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference)
|
|
{
|
|
Q_UNUSED(alignment);
|
|
|
|
m_elementStack.push(type);
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Root:
|
|
{
|
|
m_streamWriter.writeStartDocument();
|
|
|
|
QString comment = QString("Processed by %1 %2").arg(QCoreApplication::applicationName(), QCoreApplication::applicationVersion());
|
|
|
|
m_streamWriter.writeComment(comment);
|
|
m_streamWriter.writeNamespace(m_namespace, m_prefix);
|
|
|
|
m_streamWriter.writeStartElement(m_namespace, name);
|
|
break;
|
|
}
|
|
|
|
case PDFOutputFormatter::Element::Text:
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
{
|
|
if (reference != 0)
|
|
{
|
|
m_streamWriter.writeStartElement(m_namespace, name);
|
|
m_streamWriter.writeAttribute(m_namespace, "ref", QString::number(reference));
|
|
m_streamWriter.writeCharacters(description);
|
|
m_streamWriter.writeEndElement();
|
|
}
|
|
else
|
|
{
|
|
m_streamWriter.writeTextElement(m_namespace, name, description);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
m_streamWriter.writeStartElement(m_namespace, name);
|
|
|
|
if (!description.isEmpty())
|
|
{
|
|
m_streamWriter.writeAttribute(m_namespace, "description", description);
|
|
}
|
|
|
|
if (reference > 0)
|
|
{
|
|
m_streamWriter.writeAttribute(m_namespace, "ref", QString::number(reference));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Increment depth by one
|
|
++m_depth;
|
|
}
|
|
|
|
void PDFXmlOutputFormatterImpl::endElement()
|
|
{
|
|
PDFOutputFormatter::Element type = m_elementStack.top();
|
|
m_elementStack.pop();
|
|
--m_depth;
|
|
|
|
switch (type)
|
|
{
|
|
case PDFOutputFormatter::Element::Text:
|
|
case PDFOutputFormatter::Element::TableColumn:
|
|
case PDFOutputFormatter::Element::TableHeaderColumn:
|
|
break;
|
|
|
|
default:
|
|
m_streamWriter.writeEndElement();
|
|
break;
|
|
}
|
|
|
|
// Do we finish the document? If yes, then tell stream writer to end the document
|
|
if (m_depth == 0)
|
|
{
|
|
m_streamWriter.writeEndDocument();
|
|
}
|
|
}
|
|
|
|
QString PDFXmlOutputFormatterImpl::getString() const
|
|
{
|
|
return m_string;
|
|
}
|
|
|
|
PDFOutputFormatter::PDFOutputFormatter(Style style) :
|
|
m_impl(nullptr)
|
|
{
|
|
switch (style)
|
|
{
|
|
case Style::Text:
|
|
m_impl = new PDFTextOutputFormatterImpl();
|
|
break;
|
|
|
|
case Style::Xml:
|
|
m_impl = new PDFXmlOutputFormatterImpl();
|
|
break;
|
|
|
|
case Style::Html:
|
|
m_impl = new PDFHtmlOutputFormatterImpl();
|
|
break;
|
|
}
|
|
|
|
Q_ASSERT(m_impl);
|
|
}
|
|
|
|
PDFOutputFormatter::~PDFOutputFormatter()
|
|
{
|
|
delete m_impl;
|
|
}
|
|
|
|
void PDFOutputFormatter::beginElement(PDFOutputFormatter::Element type, QString name, QString description, Qt::Alignment alignment, int reference)
|
|
{
|
|
m_impl->beginElement(type, name, description, alignment, reference);
|
|
}
|
|
|
|
void PDFOutputFormatter::endElement()
|
|
{
|
|
m_impl->endElement();
|
|
}
|
|
|
|
void PDFOutputFormatter::endl()
|
|
{
|
|
m_impl->endl();
|
|
}
|
|
|
|
QString PDFOutputFormatter::getString() const
|
|
{
|
|
return m_impl->getString();
|
|
}
|
|
|
|
void PDFConsole::writeText(QString text, QStringConverter::Encoding encoding)
|
|
{
|
|
#ifdef Q_OS_WIN
|
|
HANDLE outputHandle = GetStdHandle(STD_OUTPUT_HANDLE);
|
|
if (!WriteConsoleW(outputHandle, text.utf16(), text.size(), nullptr, nullptr))
|
|
{
|
|
// Write console failed. This can happen only, if outputHandle is not handle
|
|
// to console screen buffer, but, for example a file or a pipe.
|
|
|
|
QStringEncoder encoder(encoding);
|
|
QByteArray encodedData = encoder.encode(text);
|
|
WriteFile(outputHandle, encodedData.constData(), encodedData.size(), nullptr, nullptr);
|
|
}
|
|
#else
|
|
QTextStream(stdout) << text;
|
|
#endif
|
|
}
|
|
|
|
QMutex s_writeErrorMutex;
|
|
|
|
void PDFConsole::writeError(QString text, QStringConverter::Encoding encoding)
|
|
{
|
|
if (text.isEmpty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
QMutexLocker lock(&s_writeErrorMutex);
|
|
|
|
text += "\n";
|
|
|
|
#ifdef Q_OS_WIN
|
|
HANDLE outputHandle = GetStdHandle(STD_ERROR_HANDLE);
|
|
if (!WriteConsoleW(outputHandle, text.utf16(), text.size(), nullptr, nullptr))
|
|
{
|
|
// Write console failed. This can happen only, if outputHandle is not handle
|
|
// to console screen buffer, but, for example a file or a pipe.
|
|
|
|
QStringEncoder encoder(encoding);
|
|
QByteArray encodedData = encoder.encode(text);
|
|
WriteFile(outputHandle, encodedData.constData(), encodedData.size(), nullptr, nullptr);
|
|
}
|
|
#else
|
|
QTextStream stream(stdout);
|
|
stream << text;
|
|
stream << Qt::endl;
|
|
#endif
|
|
}
|
|
|
|
void PDFConsole::writeData(const QByteArray& data)
|
|
{
|
|
if (!data.isEmpty())
|
|
{
|
|
QTextStream stream(stdout);
|
|
stream.device()->write(data);
|
|
}
|
|
}
|
|
|
|
} // pdftool
|