own implementation of tld

This commit is contained in:
Martin Rotter 2021-04-28 09:24:08 +02:00
parent df6368d3d5
commit c13cf31654
4 changed files with 13777 additions and 4 deletions

View File

@ -5,6 +5,8 @@
<file>text/COPYING_MIT</file>
<file>text/COPYING_GNU_GPL</file>
<file>text/COPYING_GNU_GPL_HTML</file>
<file>scripts/public_suffix_list.dat</file>
<file>graphics/rssguard.ico</file>

File diff suppressed because it is too large Load Diff

View File

@ -52,6 +52,8 @@
#include "network-web/adblock/adblockrequestinfo.h"
#include "network-web/adblock/adblocksubscription.h"
#include "network-web/urltld.cpp"
#include <QRegularExpression>
#include <QString>
#include <QStringList>
@ -59,14 +61,14 @@
#include <QWebEnginePage>
static QString toSecondLevelDomain(const QUrl& url) {
const QString topLevelDomain = url.topLevelDomain();
const QString tld = topLevelDomain(url);
const QString urlHost = url.host();
if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
if (tld.isEmpty() || urlHost.isEmpty()) {
return QString();
}
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
QString domain = urlHost.left(urlHost.size() - tld.size());
if (domain.count(QL1C('.')) == 0) {
return urlHost;
@ -76,7 +78,7 @@ static QString toSecondLevelDomain(const QUrl& url) {
domain = domain.mid(domain.indexOf(QL1C('.')) + 1);
}
return domain + topLevelDomain;
return domain + tld;
}
AdBlockRule::AdBlockRule(const QString& filter, AdBlockSubscription* subscription)

View File

@ -0,0 +1,89 @@
// For license of this file, see <project-root-folder>/LICENSE.md.
#include <QHash>
#include <QUrl>
#include <QFile>
#include <QStringList>
#include <QStringLiteral>
#include <QRegularExpression>
static QStringList s_tlds = {};
static void loadTlds() {
QFile fl(QStringLiteral(":/scripts/public_suffix_list.dat"));
QByteArray data;
if (fl.open(QIODevice::OpenModeFlag::Text | QIODevice::OpenModeFlag::Unbuffered | QIODevice::OpenModeFlag::ReadOnly)) {
data = fl.readAll();
fl.close();
}
QString str_data = QString::fromUtf8(data);
s_tlds << str_data.split(QStringLiteral("\n"), Qt::SplitBehaviorFlags::SkipEmptyParts).filter(QRegularExpression("^[^/].+$"));
}
static bool containsTldEntry(const QString& entry) {
if (s_tlds.isEmpty()) {
loadTlds();
std::sort(s_tlds.begin(), s_tlds.end(), [=](const QString& lhs, const QString& rhs) {
return lhs.compare(rhs) < 0;
});
}
return std::binary_search(s_tlds.begin(), s_tlds.end(), entry);
}
static bool isEffectiveTld(const QString& domain) {
// for domain 'foo.bar.com':
// 1. return if TLD table contains 'foo.bar.com'
if (containsTldEntry(domain)) {
return true;
}
if (domain.contains(QLatin1Char('.'))) {
int count = domain.size() - domain.indexOf(QLatin1Char('.'));
QString wild_card_domain;
wild_card_domain.reserve(count + 1);
wild_card_domain.append(QLatin1Char('*'));
wild_card_domain.append(domain.rightRef(count));
// 2. if table contains '*.bar.com',
// test if table contains '!foo.bar.com'
if (containsTldEntry(wild_card_domain)) {
QString exception_domain;
exception_domain.reserve(domain.size() + 1);
exception_domain.append(QLatin1Char('!'));
exception_domain.append(domain);
return !containsTldEntry(exception_domain);
}
}
return false;
}
static QString topLevelDomain(const QUrl& url) {
auto domain = url.toString(QUrl::ComponentFormattingOption::PrettyDecoded);
QStringList sections = domain.toLower().split(QLatin1Char('.'), Qt::SplitBehaviorFlags::SkipEmptyParts);
if (sections.isEmpty()) {
return QString();
}
QString level, tld;
for (int j = sections.count() - 1; j >= 0; j--) {
level.prepend(QLatin1Char('.') + sections.at(j));
if (isEffectiveTld(level.right(level.size() - 1))) {
tld = level;
}
}
return tld;
}