mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Tool for exporting internal format of PDF to xml file
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
#include "pdfencoding.h"
|
||||
|
||||
#include <QTimeZone>
|
||||
#include <QTextCodec>
|
||||
|
||||
#include <cctype>
|
||||
|
||||
@@ -2187,6 +2188,21 @@ bool PDFEncoding::canConvertToEncoding(const QString& string, PDFEncoding::Encod
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PDFEncoding::canConvertFromEncoding(const QByteArray& stream, PDFEncoding::Encoding encoding)
|
||||
{
|
||||
const encoding::EncodingTable* table = getTableForEncoding(encoding);
|
||||
for (const unsigned char index : stream)
|
||||
{
|
||||
QChar character = (*table)[index];
|
||||
if (character == QChar(0xfffd))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
QString PDFEncoding::convertTextString(const QByteArray& stream)
|
||||
{
|
||||
if (hasUnicodeLeadMarkings(stream))
|
||||
@@ -2333,6 +2349,60 @@ const encoding::EncodingTable* PDFEncoding::getTableForEncoding(Encoding encodin
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QString PDFEncoding::convertSmartFromByteStringToUnicode(const QByteArray& stream, bool* isBinary)
|
||||
{
|
||||
if (isBinary)
|
||||
{
|
||||
*isBinary = false;
|
||||
}
|
||||
|
||||
if (hasUnicodeLeadMarkings(stream))
|
||||
{
|
||||
QTextCodec::ConverterState state = { };
|
||||
|
||||
{
|
||||
QTextCodec* codec = QTextCodec::codecForName("UTF-16BE");
|
||||
QString text = codec->toUnicode(stream.constData(), stream.length(), &state);
|
||||
if (state.invalidChars == 0)
|
||||
{
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
QTextCodec* codec = QTextCodec::codecForName("UTF-16LE");
|
||||
QString text = codec->toUnicode(stream.constData(), stream.length(), &state);
|
||||
if (state.invalidChars == 0)
|
||||
{
|
||||
return text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasUTF8LeadMarkings(stream))
|
||||
{
|
||||
QTextCodec::ConverterState state = { };
|
||||
|
||||
QTextCodec* codec = QTextCodec::codecForName("UTF-8");
|
||||
QString text = codec->toUnicode(stream.constData(), stream.length(), &state);
|
||||
if (state.invalidChars == 0)
|
||||
{
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
if (canConvertFromEncoding(stream, Encoding::PDFDoc))
|
||||
{
|
||||
return convert(stream, Encoding::PDFDoc);
|
||||
}
|
||||
|
||||
if (isBinary)
|
||||
{
|
||||
*isBinary = true;
|
||||
}
|
||||
return QString::fromLatin1(stream.toHex()).toUpper();
|
||||
}
|
||||
|
||||
bool PDFEncoding::hasUnicodeLeadMarkings(const QByteArray& stream)
|
||||
{
|
||||
if (stream.size() >= 2)
|
||||
|
@@ -77,6 +77,13 @@ public:
|
||||
/// \param encoding Encoding used in verification of conversion
|
||||
static bool canConvertToEncoding(const QString& string, Encoding encoding);
|
||||
|
||||
/// Checks, if stream can be converted to string using encoding (i.e. all
|
||||
/// characters are defined). If all characters are valid, then true is
|
||||
/// returned. This is only guess.
|
||||
/// \param stream Stream
|
||||
/// \param encoding Target encoding
|
||||
static bool canConvertFromEncoding(const QByteArray& stream, Encoding encoding);
|
||||
|
||||
/// Convert text string to the unicode string, using either PDFDocEncoding,
|
||||
/// or UTF-16BE encoding. Please see PDF Reference 1.7, Chapter 3.8.1. If
|
||||
/// UTF-16BE encoding is used, then leading bytes should be 0xFE and 0xFF
|
||||
@@ -104,6 +111,15 @@ public:
|
||||
/// \param encoding Encoding
|
||||
static const encoding::EncodingTable* getTableForEncoding(Encoding encoding);
|
||||
|
||||
/// Tries to convert stream to unicode string. Stream can be binary.
|
||||
/// If this is the case, then hexadecimal representation of stream is returned.
|
||||
/// Function checks if stream can be converted to unicode by heuristic
|
||||
/// way, it is not always reliable.
|
||||
/// \param stream Stream
|
||||
/// \param isBinary If specified, it is set to true if conversion failed
|
||||
/// \returns Unicode string or string converted to hexadecimal representation
|
||||
static QString convertSmartFromByteStringToUnicode(const QByteArray& stream, bool* isBinary);
|
||||
|
||||
private:
|
||||
/// Returns true, if byte array has UTF-16BE/LE unicode marking bytes at the
|
||||
/// stream start. If they are present, then byte stream is probably encoded
|
||||
|
@@ -111,7 +111,7 @@ struct PDFInplaceString
|
||||
};
|
||||
|
||||
/// Reference to the string implementations
|
||||
struct PDFStringRef
|
||||
struct PDFFORQTLIBSHARED_EXPORT PDFStringRef
|
||||
{
|
||||
const PDFInplaceString* inplaceString = nullptr;
|
||||
const PDFString* memoryString = nullptr;
|
||||
|
@@ -32,7 +32,7 @@ namespace pdf
|
||||
{
|
||||
|
||||
/// Abstract visitor, can iterate trough object tree
|
||||
class PDFAbstractVisitor
|
||||
class PDFFORQTLIBSHARED_EXPORT PDFAbstractVisitor
|
||||
{
|
||||
public:
|
||||
|
||||
|
Reference in New Issue
Block a user