save work

This commit is contained in:
Martin Rotter 2023-10-18 14:57:36 +02:00
parent e8090ce68c
commit 114dbef367
14 changed files with 683 additions and 49 deletions

View File

@ -2,4 +2,10 @@
#include "exceptions/feedrecognizedbutfailedexception.h"
FeedRecognizedButFailedException::FeedRecognizedButFailedException(const QString &message) : ApplicationException(message) {}
FeedRecognizedButFailedException::FeedRecognizedButFailedException(const QString& message,
const QVariant& arbitrary_data)
: ApplicationException(message), m_arbitraryData(arbitrary_data) {}
QVariant FeedRecognizedButFailedException::arbitraryData() const {
return m_arbitraryData;
}

View File

@ -5,9 +5,16 @@
#include "exceptions/applicationexception.h"
#include <QVariant>
class FeedRecognizedButFailedException : public ApplicationException {
public:
explicit FeedRecognizedButFailedException(const QString& message = {});
explicit FeedRecognizedButFailedException(const QString& message = {}, const QVariant& arbitrary_data = {});
QVariant arbitraryData() const;
private:
QVariant m_arbitraryData;
};
#endif // UNRECOGNIZEDFEEDFORMATEXCEPTION_H

View File

@ -66,7 +66,7 @@ QNetworkReply* BaseNetworkAccessManager::createRequest(QNetworkAccessManager::Op
// new_request.setMaximumRedirectsAllowed(0);
new_request.setRawHeader(HTTP_HEADERS_COOKIE, QSL("JSESSIONID= ").toLocal8Bit());
/*
auto custom_ua = qApp->web()->customUserAgent();
if (custom_ua.isEmpty()) {
@ -75,6 +75,9 @@ QNetworkReply* BaseNetworkAccessManager::createRequest(QNetworkAccessManager::Op
else {
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, custom_ua.toLocal8Bit());
}
*/
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, " ");
auto reply = QNetworkAccessManager::createRequest(op, new_request, outgoingData);
return reply;

View File

@ -6,4 +6,17 @@
#define FEED_INITIAL_OPML_PATTERN "feeds-%1.opml"
#define DEFAULT_ENCLOSURE_MIME_TYPE "image/jpg"
#define ADVANCED_FEED_ADD_DIALOG_CODE 64
#define RSS_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:rss\\+xml)\"[^>]*>"
#define RSS_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
#define JSON_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:feed\\+json|json)\"[^>]*>"
#define JSON_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
#define ATOM_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:atom\\+xml|rss\\+xml)\"[^>]*>"
#define ATOM_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
#define GITHUB_URL_REGEX "github\\.com\\/(\\w+)\\/(\\w+)"
#endif // STANDARD_DEFINITIONS_H

View File

@ -2,11 +2,13 @@
#include "services/standard/gui/formdiscoverfeeds.h"
#include "3rd-party/boolinq/boolinq.h"
#include "gui/guiutilities.h"
#include "miscellaneous/application.h"
#include "miscellaneous/iconfactory.h"
#include "services/abstract/category.h"
#include "services/abstract/serviceroot.h"
#include "services/standard/definitions.h"
#include "services/standard/standardfeed.h"
#include "services/standard/parsers/atomparser.h"
@ -28,14 +30,20 @@ FormDiscoverFeeds::FormDiscoverFeeds(ServiceRoot* service_root,
m_parsers = {new AtomParser({}), new RssParser({}), new RdfParser({}), new JsonParser({}), new SitemapParser({})};
m_btnGoAdvanced = m_ui.m_buttonBox->addButton(tr("Close && &advanced mode"), QDialogButtonBox::ButtonRole::NoRole);
m_btnImportSelectedFeeds =
m_ui.m_buttonBox->addButton(tr("Import selected feeds"), QDialogButtonBox::ButtonRole::ActionRole);
m_btnGoAdvanced
->setToolTip(tr("Close this dialog and display dialog for adding individual feeds with advanced options."));
m_btnGoAdvanced->setIcon(qApp->icons()->fromTheme(QSL("system-upgrade")));
m_btnImportSelectedFeeds->setIcon(qApp->icons()->fromTheme(QSL("document-import")));
m_ui.m_btnDiscover->setIcon(qApp->icons()->fromTheme(QSL("system-search")));
connect(m_ui.m_txtUrl->lineEdit(), &QLineEdit::textChanged, this, &FormDiscoverFeeds::onUrlChanged);
connect(m_btnImportSelectedFeeds, &QPushButton::clicked, this, &FormDiscoverFeeds::importSelectedFeeds);
connect(m_btnGoAdvanced, &QPushButton::clicked, this, &FormDiscoverFeeds::userWantsAdvanced);
connect(m_ui.m_btnDiscover, &QPushButton::clicked, this, &FormDiscoverFeeds::discoverFeeds);
connect(&m_watcherLookup, &QFutureWatcher<QList<StandardFeed*>>::progressValueChanged, this, [=](int prog) {
@ -145,6 +153,11 @@ void FormDiscoverFeeds::addSingleFeed(StandardFeed* feed) {
void FormDiscoverFeeds::importSelectedFeeds() {}
void FormDiscoverFeeds::userWantsAdvanced() {
setResult(ADVANCED_FEED_ADD_DIALOG_CODE);
close();
}
void FormDiscoverFeeds::loadDiscoveredFeeds(const QList<StandardFeed*>& feeds) {
m_ui.m_pbDiscovery->setVisible(false);
m_discoveredModel->setDiscoveredFeeds(feeds);
@ -166,16 +179,27 @@ QVariant DiscoveredFeedsModel::data(const QModelIndex& index, int role) const {
switch (role) {
case Qt::ItemDataRole::DisplayRole: {
if (index.column() == 0) {
return m_discoveredFeeds.at(index.row())->title();
return m_discoveredFeeds.at(index.row()).m_feed->title();
}
else {
return StandardFeed::typeToString(m_discoveredFeeds.at(index.row())->type());
return StandardFeed::typeToString(m_discoveredFeeds.at(index.row()).m_feed->type());
}
}
case Qt::ItemDataRole::CheckStateRole: {
if (index.column() == 0) {
return m_discoveredFeeds.at(index.row()).m_isChecked ? Qt::CheckState::Checked : Qt::CheckState::Unchecked;
}
else {
return {};
}
break;
}
case Qt::ItemDataRole::DecorationRole: {
if (index.column() == 0) {
return m_discoveredFeeds.at(index.row())->fullIcon();
return m_discoveredFeeds.at(index.row()).m_feed->fullIcon();
}
}
@ -184,12 +208,18 @@ QVariant DiscoveredFeedsModel::data(const QModelIndex& index, int role) const {
}
}
QList<StandardFeed*> DiscoveredFeedsModel::discoveredFeeds() const {
QList<DiscoveredFeedsModel::FeedItem> DiscoveredFeedsModel::discoveredFeeds() const {
return m_discoveredFeeds;
}
void DiscoveredFeedsModel::setDiscoveredFeeds(const QList<StandardFeed*>& newDiscoveredFeeds) {
m_discoveredFeeds = newDiscoveredFeeds;
void DiscoveredFeedsModel::setDiscoveredFeeds(const QList<StandardFeed*>& feeds) {
auto std_feeds = boolinq::from(feeds)
.select([](StandardFeed* fd) {
return FeedItem{false, fd};
})
.toStdList();
m_discoveredFeeds = FROM_STD_LIST(QList<FeedItem>, std_feeds);
emit layoutAboutToBeChanged();
emit layoutChanged();
@ -208,3 +238,17 @@ QVariant DiscoveredFeedsModel::headerData(int section, Qt::Orientation orientati
return {};
}
Qt::ItemFlags DiscoveredFeedsModel::flags(const QModelIndex& index) const {
return index.column() == 0 ? Qt::ItemFlag::ItemIsUserCheckable | QAbstractListModel::flags(index)
: QAbstractListModel::flags(index);
}
bool DiscoveredFeedsModel::setData(const QModelIndex& index, const QVariant& value, int role) {
if (role == Qt::ItemDataRole::CheckStateRole && index.column() == 0) {
m_discoveredFeeds[index.row()].m_isChecked = value.value<Qt::CheckState>() == Qt::CheckState::Checked;
return true;
}
return QAbstractListModel::setData(index, value, role);
}

View File

@ -19,18 +19,25 @@ class DiscoveredFeedsModel : public QAbstractListModel {
Q_OBJECT
public:
struct FeedItem {
bool m_isChecked;
StandardFeed* m_feed;
};
explicit DiscoveredFeedsModel(QObject* parent = {});
virtual QVariant headerData(int section, Qt::Orientation orientation, int role) const;
virtual int rowCount(const QModelIndex& parent) const;
virtual int columnCount(const QModelIndex& parent) const;
virtual QVariant data(const QModelIndex& index, int role) const;
virtual bool setData(const QModelIndex& index, const QVariant& value, int role);
virtual Qt::ItemFlags flags(const QModelIndex& index) const;
QList<StandardFeed*> discoveredFeeds() const;
void setDiscoveredFeeds(const QList<StandardFeed*>& newDiscoveredFeeds);
QList<FeedItem> discoveredFeeds() const;
void setDiscoveredFeeds(const QList<StandardFeed*>& feeds);
private:
QList<StandardFeed*> m_discoveredFeeds;
QList<FeedItem> m_discoveredFeeds;
};
class FormDiscoverFeeds : public QDialog {
@ -50,12 +57,14 @@ class FormDiscoverFeeds : public QDialog {
void importSelectedFeeds();
private:
void userWantsAdvanced();
void loadDiscoveredFeeds(const QList<StandardFeed*>& feeds);
void loadCategories(const QList<Category*>& categories, RootItem* root_item);
private:
Ui::FormDiscoverFeeds m_ui;
QPushButton* m_btnImportSelectedFeeds;
QPushButton* m_btnGoAdvanced;
ServiceRoot* m_serviceRoot;
QList<FeedParser*> m_parsers;
QFutureWatcher<QList<StandardFeed*>> m_watcherLookup;

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>406</width>
<height>334</height>
<width>513</width>
<height>360</height>
</rect>
</property>
<property name="windowTitle">

View File

@ -4,6 +4,8 @@
#include "definitions/definitions.h"
#include "exceptions/applicationexception.h"
#include "miscellaneous/application.h"
#include "miscellaneous/settings.h"
#include "miscellaneous/textfactory.h"
#include "services/standard/definitions.h"
#include "services/standard/standardfeed.h"
@ -24,7 +26,178 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) {
AtomParser::~AtomParser() {}
QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
return {};
QString my_url = url.toString();
QList<StandardFeed*> feeds;
// 1. Test direct URL for a feed.
// 2. Test embedded ATOM feed links from HTML data.
// 3. Test "URL/feed" endpoint.
// 4. Test "URL/atom" endpoint.
// 5. If URL is Github repository, test for:
// https://github.com/:owner/:repo/releases.atom
// https://github.com/:owner/:repo/commits.atom
// https://github.com/:user/:repo/tags.atom
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
return {guessed_feed.first};
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
// 2.
QRegularExpression rx(QSL(ATOM_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
QRegularExpression rx_href(QSL(ATOM_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
rx_href.optimize();
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
while (it_rx.hasNext()) {
QRegularExpressionMatch mat_tx = it_rx.next();
QString link_tag = mat_tx.captured();
QString feed_link = rx_href.match(link_tag).captured(1);
if (feed_link.startsWith(QL1S("//"))) {
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
}
else if (feed_link.startsWith(QL1C('/'))) {
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
QUrl::UrlFormattingOption::StripTrailingSlash) +
feed_link;
}
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(feed_link,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
}
}
}
}
// 3.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
// 4.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/atom");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
// 5.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash);
if (QRegularExpression(QSL(GITHUB_URL_REGEX)).match(my_url).isValid()) {
QStringList github_feeds = {QSL("releases.atom"), QSL("commits.atom"), QSL("tags.atom")};
for (const QString& github_feed : github_feeds) {
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QL1C('/') + github_feed;
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
}
}
return feeds;
}
QPair<StandardFeed*, QList<IconLocation>> AtomParser::guessFeed(const QByteArray& content,

View File

@ -6,6 +6,7 @@
#include "definitions/typedefs.h"
#include "exceptions/applicationexception.h"
#include "exceptions/feedrecognizedbutfailedexception.h"
#include "miscellaneous/settings.h"
#include "miscellaneous/textfactory.h"
#include "services/standard/definitions.h"
#include "services/standard/standardfeed.h"
@ -19,7 +20,89 @@ JsonParser::JsonParser(const QString& data) : FeedParser(data, false) {}
JsonParser::~JsonParser() {}
QList<StandardFeed*> JsonParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
return {};
QString my_url = url.toString();
QList<StandardFeed*> feeds;
// 1. Test direct URL for a feed.
// 2. Test embedded JSON feed links from HTML data.
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
return {guessed_feed.first};
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
// 2.
QRegularExpression rx(QSL(JSON_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
QRegularExpression rx_href(QSL(JSON_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
rx_href.optimize();
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
while (it_rx.hasNext()) {
QRegularExpressionMatch mat_tx = it_rx.next();
QString link_tag = mat_tx.captured();
QString feed_link = rx_href.match(link_tag).captured(1);
if (feed_link.startsWith(QL1S("//"))) {
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
}
else if (feed_link.startsWith(QL1C('/'))) {
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
QUrl::UrlFormattingOption::StripTrailingSlash) +
feed_link;
}
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(feed_link,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
}
}
}
}
return feeds;
}
QPair<StandardFeed*, QList<IconLocation>> JsonParser::guessFeed(const QByteArray& content,

View File

@ -3,6 +3,7 @@
#include "services/standard/parsers/rdfparser.h"
#include "exceptions/applicationexception.h"
#include "miscellaneous/settings.h"
#include "miscellaneous/textfactory.h"
#include "services/standard/definitions.h"
#include "services/standard/standardfeed.h"
@ -18,7 +19,141 @@ RdfParser::RdfParser(const QString& data)
RdfParser::~RdfParser() {}
QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
return {};
QString my_url = url.toString();
QList<StandardFeed*> feeds;
// 1. Test direct URL for a feed.
// 2. Test embedded RDF feed links from HTML data.
// 3. Test "URL/feed" endpoint.
// 4. Test "URL/rdf" endpoint.
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
return {guessed_feed.first};
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
// 2.
QRegularExpression rx(QSL(RSS_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
QRegularExpression rx_href(QSL(RSS_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
rx_href.optimize();
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
while (it_rx.hasNext()) {
QRegularExpressionMatch mat_tx = it_rx.next();
QString link_tag = mat_tx.captured();
QString feed_link = rx_href.match(link_tag).captured(1);
if (feed_link.startsWith(QL1S("//"))) {
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
}
else if (feed_link.startsWith(QL1C('/'))) {
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
QUrl::UrlFormattingOption::StripTrailingSlash) +
feed_link;
}
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(feed_link,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
}
}
}
}
// 3.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
// 4.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/rdf");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
return feeds;
}
QPair<StandardFeed*, QList<IconLocation>> RdfParser::guessFeed(const QByteArray& content,

View File

@ -19,12 +19,18 @@ RssParser::RssParser(const QString& data) : FeedParser(data) {}
RssParser::~RssParser() {}
QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
QString my_url = url.toString();
QList<StandardFeed*> feeds;
// 1. Test direct URL for a feed.
// 2. Test embedded RSS feed links from HTML data.
// 3. Test "URL/feed" endpoint.
// 4. Test "URL/rss" endpoint.
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(url.toString(),
auto res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
@ -36,20 +42,21 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
// Parse result, might be HTML or directly the feed file.
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(url.toString());
guessed_feed.first->setSource(my_url);
return {guessed_feed.first};
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(url) << "is not a direct feed file.";
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
QRegularExpression rx(QSL(FEED_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
QRegularExpression rx_href(QSL(FEED_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
// 2.
QRegularExpression rx(QSL(RSS_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
QRegularExpression rx_href(QSL(RSS_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
rx_href.optimize();
@ -82,22 +89,70 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
// Parse result, might be HTML or directly the feed file.
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(url.toString());
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(url)
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
}
}
}
}
// 3.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
// 4.
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/rss");
res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
}
}
return feeds;
}

View File

@ -9,6 +9,7 @@
#include "definitions/definitions.h"
#include "exceptions/applicationexception.h"
#include "exceptions/feedrecognizedbutfailedexception.h"
#include "miscellaneous/settings.h"
#include "miscellaneous/textfactory.h"
#include "services/standard/definitions.h"
@ -21,7 +22,108 @@ SitemapParser::SitemapParser(const QString& data) : FeedParser(data) {}
SitemapParser::~SitemapParser() {}
QList<StandardFeed*> SitemapParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
return {};
QHash<QString, StandardFeed*> feeds;
QStringList to_process_sitemaps;
// 1. Process "URL/robots.txt" file.
// 2. Process "URLHOST/robots.txt" file.
// 3. Direct URL test. If sitemap index, process its children.
// 4. Test "URL/sitemap.xml" endpoint.
// 5. Test "URL/sitemap.xml.gz" endpoint.
// 1.
// 2.
QStringList to_process_robots = {
url.toString(QUrl::UrlFormattingOption::StripTrailingSlash).replace(QRegularExpression(QSL("\\/$")), QString()) +
QSL("/robots.txt"),
url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery) + QSL("/robots.txt")};
to_process_robots.removeDuplicates();
for (const QString& robots_url : to_process_robots) {
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(robots_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
QRegularExpression rx(QSL("Sitemap: ?([^\\r\\n]+)"),
QRegularExpression::PatternOption::CaseInsensitiveOption |
QRegularExpression::PatternOption::MultilineOption);
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
while (it_rx.hasNext()) {
QString sitemap_link = it_rx.next().captured(1);
to_process_sitemaps.append(sitemap_link);
}
}
}
// 3.
to_process_sitemaps.append(url.toString());
// 4.
to_process_sitemaps.append(url.toString(QUrl::UrlFormattingOption::StripTrailingSlash)
.replace(QRegularExpression(QSL("\\/$")), QString()) +
QSL("/sitemap.xml"));
// 5.
to_process_sitemaps.append(url.toString(QUrl::UrlFormattingOption::StripTrailingSlash)
.replace(QRegularExpression(QSL("\\/$")), QString()) +
QSL("/sitemap.xml.gz"));
while (!to_process_sitemaps.isEmpty()) {
to_process_sitemaps.removeDuplicates();
QString my_url = to_process_sitemaps.takeFirst();
if (feeds.contains(my_url)) {
continue;
}
// Download URL.
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray data;
auto res = NetworkFactory::performNetworkOperation(my_url,
timeout,
{},
data,
QNetworkAccessManager::Operation::GetOperation,
{},
{},
{},
{},
root->networkProxy());
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
feeds.insert(my_url, guessed_feed.first);
}
catch (const FeedRecognizedButFailedException& ex) {
// This is index.
to_process_sitemaps.append(ex.arbitraryData().toStringList());
}
catch (const ApplicationException&) {
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct sitemap file.";
}
}
}
return feeds.values();
}
QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteArray& content,
@ -73,7 +175,14 @@ QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteAr
QDomElement root_element = xml_document.documentElement();
if (root_element.tagName() == QSL("sitemapindex")) {
throw FeedRecognizedButFailedException(QObject::tr("sitemap indices are not supported"));
QStringList locs;
int i = 0;
for (QDomNodeList ndl = root_element.elementsByTagNameNS(sitemapNamespace(), QSL("loc")); i < ndl.size(); i++) {
locs << ndl.at(i).toElement().text();
}
throw FeedRecognizedButFailedException(QObject::tr("sitemap indices are not supported"), locs);
}
if (root_element.tagName() != QSL("urlset")) {
@ -180,5 +289,5 @@ QList<Enclosure> SitemapParser::xmlMessageEnclosures(const QDomElement& msg_elem
}
bool SitemapParser::isGzip(const QByteArray& content) {
return ((content[0] & 0xFF) == 0x1f) && ((content[1] & 0xFF) == 0x8b);
return content.size() >= 2 && ((content[0] & 0xFF) == 0x1f) && ((content[1] & 0xFF) == 0x8b);
}

View File

@ -140,16 +140,14 @@ void StandardServiceRoot::addNewFeed(RootItem* selected_item, const QString& url
url,
qApp->mainFormWidget()));
form_discover->exec();
/*
if (form_discover->exec() == ADVANCED_FEED_ADD_DIALOG_CODE) {
QScopedPointer<FormStandardFeedDetails> form_pointer(new FormStandardFeedDetails(this,
selected_item,
url,
qApp->mainFormWidget()));
form_pointer->addEditFeed<StandardFeed>();
*/
}
qApp->feedUpdateLock()->unlock();
}

View File

@ -48,7 +48,6 @@ class StandardServiceRoot : public ServiceRoot {
void exportFeeds();
private:
// Takes structure residing under given root item and adds feeds/categories from
// it to active structure.
// NOTE: This is used for import/export of the model.