properly decode HTML data according to declared charset for lite browser

This commit is contained in:
Martin Rotter 2023-12-13 10:44:36 +01:00
parent 8b91d4709f
commit 9c7f550942
5 changed files with 65 additions and 17 deletions

View File

@ -19,10 +19,13 @@
#include <QContextMenuEvent>
#include <QFileIconProvider>
#include <QScrollBar>
#include <QTextCodec>
#include <QTimer>
#include <QtConcurrent>
TextBrowserViewer::TextBrowserViewer(QWidget* parent)
: QTextBrowser(parent), m_resourcesEnabled(false), m_resourceDownloader(new Downloader(this)), m_loadedResources({}),
: QTextBrowser(parent), m_resourcesEnabled(false), m_resourceDownloader(new Downloader()),
m_resourceDownloaderThread(new QThread(this)), m_loadedResources({}),
m_placeholderImage(qApp->icons()->miscPixmap(QSL("image-placeholder"))),
m_placeholderImageError(qApp->icons()->miscPixmap(QSL("image-placeholder-error"))),
m_downloader(new Downloader(this)), m_document(new TextBrowserDocument(this)) {
@ -38,17 +41,28 @@ TextBrowserViewer::TextBrowserViewer(QWidget* parent)
setResourcesEnabled(qApp->settings()->value(GROUP(Messages), SETTING(Messages::ShowResourcesInArticles)).toBool());
setDocument(m_document.data());
m_resourceDownloader->moveToThread(m_resourceDownloaderThread);
m_resourceDownloaderThread->start();
connect(this, &TextBrowserViewer::reloadDocument, this, [this]() {
const auto scr = verticalScrollBarPosition();
setHtmlPrivate(html(), m_currentUrl);
setVerticalScrollBarPosition(scr);
});
connect(m_resourceDownloader.data(), &Downloader::completed, this, &TextBrowserViewer::resourceDownloaded);
connect(m_resourceDownloader, &Downloader::completed, this, &TextBrowserViewer::resourceDownloaded);
connect(this, &QTextBrowser::anchorClicked, this, &TextBrowserViewer::onAnchorClicked);
connect(this, QOverload<const QUrl&>::of(&QTextBrowser::highlighted), this, &TextBrowserViewer::linkMouseHighlighted);
}
TextBrowserViewer::~TextBrowserViewer() {
if (m_resourceDownloaderThread->isRunning()) {
m_resourceDownloaderThread->quit();
}
m_resourceDownloader->deleteLater();
}
QSize TextBrowserViewer::sizeHint() const {
auto doc_size = document()->size().toSize();
@ -172,13 +186,17 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
else {
QEventLoop loop;
connect(m_downloader.data(), &Downloader::completed, &loop, &QEventLoop::quit);
connect(m_downloader.data(),
&Downloader::completed,
&loop,
&QEventLoop::quit,
Qt::ConnectionType(Qt::ConnectionType::UniqueConnection | Qt::ConnectionType::AutoConnection));
m_downloader->manipulateData(url.toString(), QNetworkAccessManager::Operation::GetOperation, {}, 5000);
loop.exec();
const auto net_error = m_downloader->lastOutputError();
const QString content_type = m_downloader->lastContentType().toString();
const QString content_type = m_downloader->lastContentType();
if (net_error != QNetworkReply::NetworkError::NoError) {
is_error = true;
@ -189,7 +207,7 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
html_str = QSL("<img src=\"%1\">").arg(nonconst_url.toString());
}
else {
html_str = QString::fromUtf8(m_downloader->lastOutputData());
html_str = decodeHtmlData(m_downloader->lastOutputData(), content_type);
}
}
}
@ -199,6 +217,22 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
emit loadingFinished(!is_error);
}
QString TextBrowserViewer::decodeHtmlData(const QByteArray& data, const QString& content_type) const {
QString found_charset = QRegularExpression("charset=([0-9a-zA-Z-_]+)").match(content_type).captured(1);
QTextCodec* codec = QTextCodec::codecForName(found_charset.toLocal8Bit());
if (codec == nullptr) {
// No suitable codec for this encoding was found.
// Use UTF-8.
qWarningNN << LOGSEC_GUI << "Did not find charset for content-type" << QUOTE_W_SPACE_DOT(content_type);
return QString::fromUtf8(data);
}
else {
qDebugNN << LOGSEC_GUI << "Found charset for content-type" << QUOTE_W_SPACE_DOT(content_type);
return codec->toUnicode(data);
}
}
QString TextBrowserViewer::html() const {
return m_currentHtml;
}
@ -468,10 +502,20 @@ void TextBrowserViewer::downloadNextNeededResource() {
else {
QUrl res = m_neededResources.takeFirst();
m_resourceDownloader.data()->manipulateData(qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
{},
5000);
QMetaObject::invokeMethod(m_resourceDownloader,
"manipulateData",
Qt::ConnectionType::QueuedConnection,
qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
QByteArray(),
5000);
/*
m_resourceDownloader.data()->manipulateData(qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
{},
5000);
*/
}
}

View File

@ -40,6 +40,7 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {
public:
explicit TextBrowserViewer(QWidget* parent = nullptr);
virtual ~TextBrowserViewer();
QVariant loadOneResource(int type, const QUrl& name);
@ -78,7 +79,7 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {
void resourceDownloaded(const QUrl& url,
QNetworkReply::NetworkError status,
int http_code,
const QByteArray &contents = QByteArray());
const QByteArray& contents = QByteArray());
signals:
void reloadDocument();
@ -96,11 +97,14 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {
void setHtmlPrivate(const QString& html, const QUrl& base_url);
BlockingResult blockedWithAdblock(const QUrl& url);
QString decodeHtmlData(const QByteArray& data, const QString& content_type) const;
private:
QScopedPointer<Downloader> m_downloader;
bool m_resourcesEnabled;
QList<QUrl> m_neededResources; // All URLs here must be resolved.
QScopedPointer<Downloader> m_resourceDownloader;
Downloader* m_resourceDownloader;
QThread* m_resourceDownloaderThread;
QMap<QUrl, QByteArray> m_loadedResources; // All URLs here must be resolved.
QPixmap m_placeholderImage;
QPixmap m_placeholderImageError;

View File

@ -233,7 +233,7 @@ void Downloader::finished() {
m_lastCookies = {};
}
m_lastContentType = reply->header(QNetworkRequest::KnownHeaders::ContentTypeHeader);
m_lastContentType = reply->header(QNetworkRequest::KnownHeaders::ContentTypeHeader).toString();
m_lastOutputError = reply->error();
m_lastHttpStatusCode = reply->attribute(QNetworkRequest::Attribute::HttpStatusCodeAttribute).toInt();
m_lastHeaders.clear();
@ -382,7 +382,7 @@ QList<QNetworkCookie> Downloader::lastCookies() const {
return m_lastCookies;
}
QVariant Downloader::lastContentType() const {
QString Downloader::lastContentType() const {
return m_lastContentType;
}

View File

@ -27,7 +27,7 @@ class Downloader : public QObject {
QByteArray lastOutputData() const;
QNetworkReply::NetworkError lastOutputError() const;
QList<HttpResponse> lastOutputMultipartData() const;
QVariant lastContentType() const;
QString lastContentType() const;
QList<QNetworkCookie> lastCookies() const;
int lastHttpStatusCode() const;
QMap<QString, QString> lastHeaders() const;
@ -114,7 +114,7 @@ class Downloader : public QObject {
QList<HttpResponse> m_lastOutputMultipartData;
QNetworkReply::NetworkError m_lastOutputError;
int m_lastHttpStatusCode;
QVariant m_lastContentType;
QString m_lastContentType;
QList<QNetworkCookie> m_lastCookies;
QMap<QString, QString> m_lastHeaders;
};

View File

@ -294,7 +294,7 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
output = downloader.lastOutputData();
result.m_networkError = downloader.lastOutputError();
result.m_contentType = downloader.lastContentType().toString();
result.m_contentType = downloader.lastContentType();
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();
@ -335,7 +335,7 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
output = downloader.lastOutputMultipartData();
result.m_networkError = downloader.lastOutputError();
result.m_contentType = downloader.lastContentType().toString();
result.m_contentType = downloader.lastContentType();
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();