save work
This commit is contained in:
parent
e8090ce68c
commit
114dbef367
|
@ -2,4 +2,10 @@
|
|||
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
|
||||
FeedRecognizedButFailedException::FeedRecognizedButFailedException(const QString &message) : ApplicationException(message) {}
|
||||
FeedRecognizedButFailedException::FeedRecognizedButFailedException(const QString& message,
|
||||
const QVariant& arbitrary_data)
|
||||
: ApplicationException(message), m_arbitraryData(arbitrary_data) {}
|
||||
|
||||
QVariant FeedRecognizedButFailedException::arbitraryData() const {
|
||||
return m_arbitraryData;
|
||||
}
|
||||
|
|
|
@ -5,9 +5,16 @@
|
|||
|
||||
#include "exceptions/applicationexception.h"
|
||||
|
||||
#include <QVariant>
|
||||
|
||||
class FeedRecognizedButFailedException : public ApplicationException {
|
||||
public:
|
||||
explicit FeedRecognizedButFailedException(const QString& message = {});
|
||||
explicit FeedRecognizedButFailedException(const QString& message = {}, const QVariant& arbitrary_data = {});
|
||||
|
||||
QVariant arbitraryData() const;
|
||||
|
||||
private:
|
||||
QVariant m_arbitraryData;
|
||||
};
|
||||
|
||||
#endif // UNRECOGNIZEDFEEDFORMATEXCEPTION_H
|
||||
|
|
|
@ -66,15 +66,18 @@ QNetworkReply* BaseNetworkAccessManager::createRequest(QNetworkAccessManager::Op
|
|||
// new_request.setMaximumRedirectsAllowed(0);
|
||||
|
||||
new_request.setRawHeader(HTTP_HEADERS_COOKIE, QSL("JSESSIONID= ").toLocal8Bit());
|
||||
/*
|
||||
auto custom_ua = qApp->web()->customUserAgent();
|
||||
|
||||
auto custom_ua = qApp->web()->customUserAgent();
|
||||
if (custom_ua.isEmpty()) {
|
||||
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, HTTP_COMPLETE_USERAGENT);
|
||||
}
|
||||
else {
|
||||
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, custom_ua.toLocal8Bit());
|
||||
}
|
||||
*/
|
||||
|
||||
if (custom_ua.isEmpty()) {
|
||||
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, HTTP_COMPLETE_USERAGENT);
|
||||
}
|
||||
else {
|
||||
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, custom_ua.toLocal8Bit());
|
||||
}
|
||||
new_request.setRawHeader(HTTP_HEADERS_USER_AGENT, " ");
|
||||
|
||||
auto reply = QNetworkAccessManager::createRequest(op, new_request, outgoingData);
|
||||
return reply;
|
||||
|
|
|
@ -6,4 +6,17 @@
|
|||
#define FEED_INITIAL_OPML_PATTERN "feeds-%1.opml"
|
||||
#define DEFAULT_ENCLOSURE_MIME_TYPE "image/jpg"
|
||||
|
||||
#define ADVANCED_FEED_ADD_DIALOG_CODE 64
|
||||
|
||||
#define RSS_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:rss\\+xml)\"[^>]*>"
|
||||
#define RSS_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
|
||||
|
||||
#define JSON_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:feed\\+json|json)\"[^>]*>"
|
||||
#define JSON_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
|
||||
|
||||
#define ATOM_REGEX_MATCHER "<link[^>]+type=\"application\\/(?:atom\\+xml|rss\\+xml)\"[^>]*>"
|
||||
#define ATOM_HREF_REGEX_MATCHER "href=\"([^\"]+)\""
|
||||
|
||||
#define GITHUB_URL_REGEX "github\\.com\\/(\\w+)\\/(\\w+)"
|
||||
|
||||
#endif // STANDARD_DEFINITIONS_H
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
|
||||
#include "services/standard/gui/formdiscoverfeeds.h"
|
||||
|
||||
#include "3rd-party/boolinq/boolinq.h"
|
||||
#include "gui/guiutilities.h"
|
||||
#include "miscellaneous/application.h"
|
||||
#include "miscellaneous/iconfactory.h"
|
||||
#include "services/abstract/category.h"
|
||||
#include "services/abstract/serviceroot.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include "services/standard/parsers/atomparser.h"
|
||||
|
@ -28,14 +30,20 @@ FormDiscoverFeeds::FormDiscoverFeeds(ServiceRoot* service_root,
|
|||
|
||||
m_parsers = {new AtomParser({}), new RssParser({}), new RdfParser({}), new JsonParser({}), new SitemapParser({})};
|
||||
|
||||
m_btnGoAdvanced = m_ui.m_buttonBox->addButton(tr("Close && &advanced mode"), QDialogButtonBox::ButtonRole::NoRole);
|
||||
m_btnImportSelectedFeeds =
|
||||
m_ui.m_buttonBox->addButton(tr("Import selected feeds"), QDialogButtonBox::ButtonRole::ActionRole);
|
||||
|
||||
m_btnGoAdvanced
|
||||
->setToolTip(tr("Close this dialog and display dialog for adding individual feeds with advanced options."));
|
||||
|
||||
m_btnGoAdvanced->setIcon(qApp->icons()->fromTheme(QSL("system-upgrade")));
|
||||
m_btnImportSelectedFeeds->setIcon(qApp->icons()->fromTheme(QSL("document-import")));
|
||||
m_ui.m_btnDiscover->setIcon(qApp->icons()->fromTheme(QSL("system-search")));
|
||||
|
||||
connect(m_ui.m_txtUrl->lineEdit(), &QLineEdit::textChanged, this, &FormDiscoverFeeds::onUrlChanged);
|
||||
connect(m_btnImportSelectedFeeds, &QPushButton::clicked, this, &FormDiscoverFeeds::importSelectedFeeds);
|
||||
connect(m_btnGoAdvanced, &QPushButton::clicked, this, &FormDiscoverFeeds::userWantsAdvanced);
|
||||
connect(m_ui.m_btnDiscover, &QPushButton::clicked, this, &FormDiscoverFeeds::discoverFeeds);
|
||||
|
||||
connect(&m_watcherLookup, &QFutureWatcher<QList<StandardFeed*>>::progressValueChanged, this, [=](int prog) {
|
||||
|
@ -145,6 +153,11 @@ void FormDiscoverFeeds::addSingleFeed(StandardFeed* feed) {
|
|||
|
||||
void FormDiscoverFeeds::importSelectedFeeds() {}
|
||||
|
||||
void FormDiscoverFeeds::userWantsAdvanced() {
|
||||
setResult(ADVANCED_FEED_ADD_DIALOG_CODE);
|
||||
close();
|
||||
}
|
||||
|
||||
void FormDiscoverFeeds::loadDiscoveredFeeds(const QList<StandardFeed*>& feeds) {
|
||||
m_ui.m_pbDiscovery->setVisible(false);
|
||||
m_discoveredModel->setDiscoveredFeeds(feeds);
|
||||
|
@ -166,16 +179,27 @@ QVariant DiscoveredFeedsModel::data(const QModelIndex& index, int role) const {
|
|||
switch (role) {
|
||||
case Qt::ItemDataRole::DisplayRole: {
|
||||
if (index.column() == 0) {
|
||||
return m_discoveredFeeds.at(index.row())->title();
|
||||
return m_discoveredFeeds.at(index.row()).m_feed->title();
|
||||
}
|
||||
else {
|
||||
return StandardFeed::typeToString(m_discoveredFeeds.at(index.row())->type());
|
||||
return StandardFeed::typeToString(m_discoveredFeeds.at(index.row()).m_feed->type());
|
||||
}
|
||||
}
|
||||
|
||||
case Qt::ItemDataRole::CheckStateRole: {
|
||||
if (index.column() == 0) {
|
||||
return m_discoveredFeeds.at(index.row()).m_isChecked ? Qt::CheckState::Checked : Qt::CheckState::Unchecked;
|
||||
}
|
||||
else {
|
||||
return {};
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case Qt::ItemDataRole::DecorationRole: {
|
||||
if (index.column() == 0) {
|
||||
return m_discoveredFeeds.at(index.row())->fullIcon();
|
||||
return m_discoveredFeeds.at(index.row()).m_feed->fullIcon();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,12 +208,18 @@ QVariant DiscoveredFeedsModel::data(const QModelIndex& index, int role) const {
|
|||
}
|
||||
}
|
||||
|
||||
QList<StandardFeed*> DiscoveredFeedsModel::discoveredFeeds() const {
|
||||
QList<DiscoveredFeedsModel::FeedItem> DiscoveredFeedsModel::discoveredFeeds() const {
|
||||
return m_discoveredFeeds;
|
||||
}
|
||||
|
||||
void DiscoveredFeedsModel::setDiscoveredFeeds(const QList<StandardFeed*>& newDiscoveredFeeds) {
|
||||
m_discoveredFeeds = newDiscoveredFeeds;
|
||||
void DiscoveredFeedsModel::setDiscoveredFeeds(const QList<StandardFeed*>& feeds) {
|
||||
auto std_feeds = boolinq::from(feeds)
|
||||
.select([](StandardFeed* fd) {
|
||||
return FeedItem{false, fd};
|
||||
})
|
||||
.toStdList();
|
||||
|
||||
m_discoveredFeeds = FROM_STD_LIST(QList<FeedItem>, std_feeds);
|
||||
|
||||
emit layoutAboutToBeChanged();
|
||||
emit layoutChanged();
|
||||
|
@ -208,3 +238,17 @@ QVariant DiscoveredFeedsModel::headerData(int section, Qt::Orientation orientati
|
|||
|
||||
return {};
|
||||
}
|
||||
|
||||
Qt::ItemFlags DiscoveredFeedsModel::flags(const QModelIndex& index) const {
|
||||
return index.column() == 0 ? Qt::ItemFlag::ItemIsUserCheckable | QAbstractListModel::flags(index)
|
||||
: QAbstractListModel::flags(index);
|
||||
}
|
||||
|
||||
bool DiscoveredFeedsModel::setData(const QModelIndex& index, const QVariant& value, int role) {
|
||||
if (role == Qt::ItemDataRole::CheckStateRole && index.column() == 0) {
|
||||
m_discoveredFeeds[index.row()].m_isChecked = value.value<Qt::CheckState>() == Qt::CheckState::Checked;
|
||||
return true;
|
||||
}
|
||||
|
||||
return QAbstractListModel::setData(index, value, role);
|
||||
}
|
||||
|
|
|
@ -19,18 +19,25 @@ class DiscoveredFeedsModel : public QAbstractListModel {
|
|||
Q_OBJECT
|
||||
|
||||
public:
|
||||
struct FeedItem {
|
||||
bool m_isChecked;
|
||||
StandardFeed* m_feed;
|
||||
};
|
||||
|
||||
explicit DiscoveredFeedsModel(QObject* parent = {});
|
||||
|
||||
virtual QVariant headerData(int section, Qt::Orientation orientation, int role) const;
|
||||
virtual int rowCount(const QModelIndex& parent) const;
|
||||
virtual int columnCount(const QModelIndex& parent) const;
|
||||
virtual QVariant data(const QModelIndex& index, int role) const;
|
||||
virtual bool setData(const QModelIndex& index, const QVariant& value, int role);
|
||||
virtual Qt::ItemFlags flags(const QModelIndex& index) const;
|
||||
|
||||
QList<StandardFeed*> discoveredFeeds() const;
|
||||
void setDiscoveredFeeds(const QList<StandardFeed*>& newDiscoveredFeeds);
|
||||
QList<FeedItem> discoveredFeeds() const;
|
||||
void setDiscoveredFeeds(const QList<StandardFeed*>& feeds);
|
||||
|
||||
private:
|
||||
QList<StandardFeed*> m_discoveredFeeds;
|
||||
QList<FeedItem> m_discoveredFeeds;
|
||||
};
|
||||
|
||||
class FormDiscoverFeeds : public QDialog {
|
||||
|
@ -50,12 +57,14 @@ class FormDiscoverFeeds : public QDialog {
|
|||
void importSelectedFeeds();
|
||||
|
||||
private:
|
||||
void userWantsAdvanced();
|
||||
void loadDiscoveredFeeds(const QList<StandardFeed*>& feeds);
|
||||
void loadCategories(const QList<Category*>& categories, RootItem* root_item);
|
||||
|
||||
private:
|
||||
Ui::FormDiscoverFeeds m_ui;
|
||||
QPushButton* m_btnImportSelectedFeeds;
|
||||
QPushButton* m_btnGoAdvanced;
|
||||
ServiceRoot* m_serviceRoot;
|
||||
QList<FeedParser*> m_parsers;
|
||||
QFutureWatcher<QList<StandardFeed*>> m_watcherLookup;
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>406</width>
|
||||
<height>334</height>
|
||||
<width>513</width>
|
||||
<height>360</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/application.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
@ -24,7 +26,178 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
|||
AtomParser::~AtomParser() {}
|
||||
|
||||
QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
|
||||
return {};
|
||||
QString my_url = url.toString();
|
||||
QList<StandardFeed*> feeds;
|
||||
|
||||
// 1. Test direct URL for a feed.
|
||||
// 2. Test embedded ATOM feed links from HTML data.
|
||||
// 3. Test "URL/feed" endpoint.
|
||||
// 4. Test "URL/atom" endpoint.
|
||||
// 5. If URL is Github repository, test for:
|
||||
// https://github.com/:owner/:repo/releases.atom
|
||||
// https://github.com/:owner/:repo/commits.atom
|
||||
// https://github.com/:user/:repo/tags.atom
|
||||
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
// 1.
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
|
||||
return {guessed_feed.first};
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
|
||||
// 2.
|
||||
QRegularExpression rx(QSL(ATOM_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
QRegularExpression rx_href(QSL(ATOM_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
|
||||
rx_href.optimize();
|
||||
|
||||
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
|
||||
|
||||
while (it_rx.hasNext()) {
|
||||
QRegularExpressionMatch mat_tx = it_rx.next();
|
||||
QString link_tag = mat_tx.captured();
|
||||
QString feed_link = rx_href.match(link_tag).captured(1);
|
||||
|
||||
if (feed_link.startsWith(QL1S("//"))) {
|
||||
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
|
||||
}
|
||||
else if (feed_link.startsWith(QL1C('/'))) {
|
||||
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
|
||||
QUrl::UrlFormattingOption::StripTrailingSlash) +
|
||||
feed_link;
|
||||
}
|
||||
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(feed_link,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(feed_link);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (const ApplicationException& ex) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
|
||||
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
// 4.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/atom");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
// 5.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash);
|
||||
|
||||
if (QRegularExpression(QSL(GITHUB_URL_REGEX)).match(my_url).isValid()) {
|
||||
QStringList github_feeds = {QSL("releases.atom"), QSL("commits.atom"), QSL("tags.atom")};
|
||||
|
||||
for (const QString& github_feed : github_feeds) {
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QL1C('/') + github_feed;
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> AtomParser::guessFeed(const QByteArray& content,
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "definitions/typedefs.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
@ -19,7 +20,89 @@ JsonParser::JsonParser(const QString& data) : FeedParser(data, false) {}
|
|||
JsonParser::~JsonParser() {}
|
||||
|
||||
QList<StandardFeed*> JsonParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
|
||||
return {};
|
||||
QString my_url = url.toString();
|
||||
QList<StandardFeed*> feeds;
|
||||
|
||||
// 1. Test direct URL for a feed.
|
||||
// 2. Test embedded JSON feed links from HTML data.
|
||||
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
// 1.
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
|
||||
return {guessed_feed.first};
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
|
||||
// 2.
|
||||
QRegularExpression rx(QSL(JSON_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
QRegularExpression rx_href(QSL(JSON_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
|
||||
rx_href.optimize();
|
||||
|
||||
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
|
||||
|
||||
while (it_rx.hasNext()) {
|
||||
QRegularExpressionMatch mat_tx = it_rx.next();
|
||||
QString link_tag = mat_tx.captured();
|
||||
QString feed_link = rx_href.match(link_tag).captured(1);
|
||||
|
||||
if (feed_link.startsWith(QL1S("//"))) {
|
||||
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
|
||||
}
|
||||
else if (feed_link.startsWith(QL1C('/'))) {
|
||||
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
|
||||
QUrl::UrlFormattingOption::StripTrailingSlash) +
|
||||
feed_link;
|
||||
}
|
||||
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(feed_link,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(feed_link);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (const ApplicationException& ex) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
|
||||
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> JsonParser::guessFeed(const QByteArray& content,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "services/standard/parsers/rdfparser.h"
|
||||
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
@ -18,7 +19,141 @@ RdfParser::RdfParser(const QString& data)
|
|||
RdfParser::~RdfParser() {}
|
||||
|
||||
QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
|
||||
return {};
|
||||
QString my_url = url.toString();
|
||||
QList<StandardFeed*> feeds;
|
||||
|
||||
// 1. Test direct URL for a feed.
|
||||
// 2. Test embedded RDF feed links from HTML data.
|
||||
// 3. Test "URL/feed" endpoint.
|
||||
// 4. Test "URL/rdf" endpoint.
|
||||
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
// 1.
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
|
||||
return {guessed_feed.first};
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
|
||||
// 2.
|
||||
QRegularExpression rx(QSL(RSS_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
QRegularExpression rx_href(QSL(RSS_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
|
||||
rx_href.optimize();
|
||||
|
||||
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
|
||||
|
||||
while (it_rx.hasNext()) {
|
||||
QRegularExpressionMatch mat_tx = it_rx.next();
|
||||
QString link_tag = mat_tx.captured();
|
||||
QString feed_link = rx_href.match(link_tag).captured(1);
|
||||
|
||||
if (feed_link.startsWith(QL1S("//"))) {
|
||||
feed_link = QSL(URI_SCHEME_HTTP) + feed_link.mid(2);
|
||||
}
|
||||
else if (feed_link.startsWith(QL1C('/'))) {
|
||||
feed_link = url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery |
|
||||
QUrl::UrlFormattingOption::StripTrailingSlash) +
|
||||
feed_link;
|
||||
}
|
||||
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(feed_link,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(feed_link);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (const ApplicationException& ex) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
|
||||
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
// 4.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/rdf");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> RdfParser::guessFeed(const QByteArray& content,
|
||||
|
|
|
@ -19,12 +19,18 @@ RssParser::RssParser(const QString& data) : FeedParser(data) {}
|
|||
RssParser::~RssParser() {}
|
||||
|
||||
QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
|
||||
QString my_url = url.toString();
|
||||
QList<StandardFeed*> feeds;
|
||||
|
||||
// 1. Test direct URL for a feed.
|
||||
// 2. Test embedded RSS feed links from HTML data.
|
||||
// 3. Test "URL/feed" endpoint.
|
||||
// 4. Test "URL/rss" endpoint.
|
||||
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(url.toString(),
|
||||
auto res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
|
@ -36,20 +42,21 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
|
|||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
// Parse result, might be HTML or directly the feed file.
|
||||
try {
|
||||
// 1.
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(url.toString());
|
||||
guessed_feed.first->setSource(my_url);
|
||||
|
||||
return {guessed_feed.first};
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(url) << "is not a direct feed file.";
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
|
||||
QRegularExpression rx(QSL(FEED_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
QRegularExpression rx_href(QSL(FEED_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
// 2.
|
||||
QRegularExpression rx(QSL(RSS_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
QRegularExpression rx_href(QSL(RSS_HREF_REGEX_MATCHER), QRegularExpression::PatternOption::CaseInsensitiveOption);
|
||||
|
||||
rx_href.optimize();
|
||||
|
||||
|
@ -82,22 +89,70 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
|
|||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
// Parse result, might be HTML or directly the feed file.
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(url.toString());
|
||||
|
||||
guessed_feed.first->setSource(feed_link);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (const ApplicationException& ex) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(url)
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(feed_link)
|
||||
<< " should be direct link to feed file but was not recognized:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/feed");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
// 4.
|
||||
my_url = url.toString(QUrl::UrlFormattingOption::StripTrailingSlash) + QSL("/rss");
|
||||
res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.append(guessed_feed.first);
|
||||
}
|
||||
catch (...) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct feed file.";
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
|
||||
|
@ -21,7 +22,108 @@ SitemapParser::SitemapParser(const QString& data) : FeedParser(data) {}
|
|||
SitemapParser::~SitemapParser() {}
|
||||
|
||||
QList<StandardFeed*> SitemapParser::discoverFeeds(ServiceRoot* root, const QUrl& url) const {
|
||||
return {};
|
||||
QHash<QString, StandardFeed*> feeds;
|
||||
QStringList to_process_sitemaps;
|
||||
|
||||
// 1. Process "URL/robots.txt" file.
|
||||
// 2. Process "URLHOST/robots.txt" file.
|
||||
// 3. Direct URL test. If sitemap index, process its children.
|
||||
// 4. Test "URL/sitemap.xml" endpoint.
|
||||
// 5. Test "URL/sitemap.xml.gz" endpoint.
|
||||
|
||||
// 1.
|
||||
// 2.
|
||||
QStringList to_process_robots = {
|
||||
url.toString(QUrl::UrlFormattingOption::StripTrailingSlash).replace(QRegularExpression(QSL("\\/$")), QString()) +
|
||||
QSL("/robots.txt"),
|
||||
url.toString(QUrl::UrlFormattingOption::RemovePath | QUrl::UrlFormattingOption::RemoveQuery) + QSL("/robots.txt")};
|
||||
|
||||
to_process_robots.removeDuplicates();
|
||||
|
||||
for (const QString& robots_url : to_process_robots) {
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(robots_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
QRegularExpression rx(QSL("Sitemap: ?([^\\r\\n]+)"),
|
||||
QRegularExpression::PatternOption::CaseInsensitiveOption |
|
||||
QRegularExpression::PatternOption::MultilineOption);
|
||||
QRegularExpressionMatchIterator it_rx = rx.globalMatch(QString::fromUtf8(data));
|
||||
|
||||
while (it_rx.hasNext()) {
|
||||
QString sitemap_link = it_rx.next().captured(1);
|
||||
|
||||
to_process_sitemaps.append(sitemap_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3.
|
||||
to_process_sitemaps.append(url.toString());
|
||||
|
||||
// 4.
|
||||
to_process_sitemaps.append(url.toString(QUrl::UrlFormattingOption::StripTrailingSlash)
|
||||
.replace(QRegularExpression(QSL("\\/$")), QString()) +
|
||||
QSL("/sitemap.xml"));
|
||||
|
||||
// 5.
|
||||
to_process_sitemaps.append(url.toString(QUrl::UrlFormattingOption::StripTrailingSlash)
|
||||
.replace(QRegularExpression(QSL("\\/$")), QString()) +
|
||||
QSL("/sitemap.xml.gz"));
|
||||
|
||||
while (!to_process_sitemaps.isEmpty()) {
|
||||
to_process_sitemaps.removeDuplicates();
|
||||
|
||||
QString my_url = to_process_sitemaps.takeFirst();
|
||||
|
||||
if (feeds.contains(my_url)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Download URL.
|
||||
int timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
|
||||
QByteArray data;
|
||||
auto res = NetworkFactory::performNetworkOperation(my_url,
|
||||
timeout,
|
||||
{},
|
||||
data,
|
||||
QNetworkAccessManager::Operation::GetOperation,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
root->networkProxy());
|
||||
|
||||
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
|
||||
try {
|
||||
// 1.
|
||||
auto guessed_feed = guessFeed(data, res.m_contentType);
|
||||
|
||||
guessed_feed.first->setSource(my_url);
|
||||
feeds.insert(my_url, guessed_feed.first);
|
||||
}
|
||||
catch (const FeedRecognizedButFailedException& ex) {
|
||||
// This is index.
|
||||
to_process_sitemaps.append(ex.arbitraryData().toStringList());
|
||||
}
|
||||
catch (const ApplicationException&) {
|
||||
qDebugNN << LOGSEC_CORE << QUOTE_W_SPACE(my_url) << "is not a direct sitemap file.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return feeds.values();
|
||||
}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteArray& content,
|
||||
|
@ -73,7 +175,14 @@ QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteAr
|
|||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.tagName() == QSL("sitemapindex")) {
|
||||
throw FeedRecognizedButFailedException(QObject::tr("sitemap indices are not supported"));
|
||||
QStringList locs;
|
||||
int i = 0;
|
||||
|
||||
for (QDomNodeList ndl = root_element.elementsByTagNameNS(sitemapNamespace(), QSL("loc")); i < ndl.size(); i++) {
|
||||
locs << ndl.at(i).toElement().text();
|
||||
}
|
||||
|
||||
throw FeedRecognizedButFailedException(QObject::tr("sitemap indices are not supported"), locs);
|
||||
}
|
||||
|
||||
if (root_element.tagName() != QSL("urlset")) {
|
||||
|
@ -180,5 +289,5 @@ QList<Enclosure> SitemapParser::xmlMessageEnclosures(const QDomElement& msg_elem
|
|||
}
|
||||
|
||||
bool SitemapParser::isGzip(const QByteArray& content) {
|
||||
return ((content[0] & 0xFF) == 0x1f) && ((content[1] & 0xFF) == 0x8b);
|
||||
return content.size() >= 2 && ((content[0] & 0xFF) == 0x1f) && ((content[1] & 0xFF) == 0x8b);
|
||||
}
|
||||
|
|
|
@ -140,16 +140,14 @@ void StandardServiceRoot::addNewFeed(RootItem* selected_item, const QString& url
|
|||
url,
|
||||
qApp->mainFormWidget()));
|
||||
|
||||
form_discover->exec();
|
||||
if (form_discover->exec() == ADVANCED_FEED_ADD_DIALOG_CODE) {
|
||||
QScopedPointer<FormStandardFeedDetails> form_pointer(new FormStandardFeedDetails(this,
|
||||
selected_item,
|
||||
url,
|
||||
qApp->mainFormWidget()));
|
||||
|
||||
/*
|
||||
QScopedPointer<FormStandardFeedDetails> form_pointer(new FormStandardFeedDetails(this,
|
||||
selected_item,
|
||||
url,
|
||||
qApp->mainFormWidget()));
|
||||
|
||||
form_pointer->addEditFeed<StandardFeed>();
|
||||
*/
|
||||
form_pointer->addEditFeed<StandardFeed>();
|
||||
}
|
||||
|
||||
qApp->feedUpdateLock()->unlock();
|
||||
}
|
||||
|
|
|
@ -15,10 +15,10 @@ class FeedsImportExportModel;
|
|||
class QMenu;
|
||||
|
||||
class StandardServiceRoot : public ServiceRoot {
|
||||
Q_OBJECT
|
||||
Q_OBJECT
|
||||
|
||||
friend class FormStandardFeedDetails;
|
||||
friend class FormStandardImportExport;
|
||||
friend class FormStandardFeedDetails;
|
||||
friend class FormStandardImportExport;
|
||||
|
||||
public:
|
||||
explicit StandardServiceRoot(RootItem* parent = nullptr);
|
||||
|
@ -48,7 +48,6 @@ class StandardServiceRoot : public ServiceRoot {
|
|||
void exportFeeds();
|
||||
|
||||
private:
|
||||
|
||||
// Takes structure residing under given root item and adds feeds/categories from
|
||||
// it to active structure.
|
||||
// NOTE: This is used for import/export of the model.
|
||||
|
|
Loading…
Reference in New Issue