mirror of
https://github.com/martinrotter/rssguard.git
synced 2025-02-03 02:37:46 +01:00
support for sitemaps
This commit is contained in:
parent
fe72cd1564
commit
8720fe663d
@ -10,7 +10,7 @@ RSS Guard
|
||||
|
||||
### [Discord server](https://discord.gg/7xbVMPPNqH) | [Downloads](https://github.com/martinrotter/rssguard/releases) | [Development builds](https://github.com/martinrotter/rssguard/releases/tag/devbuild) | [Documentation](https://rssguard.readthedocs.io)
|
||||
|
||||
RSS Guard is a simple RSS/ATOM feed reader for Windows, Linux, BSD, OS/2 or macOS which can work with RSS/ATOM/JSON feeds as well as many online feed services:
|
||||
RSS Guard is a simple RSS/ATOM feed reader for Windows, Linux, BSD, OS/2 or macOS which can work with RSS/ATOM/JSON/Sitemap feeds as well as many online feed services:
|
||||
* [Feedly](https://feedly.com)
|
||||
* [Gmail](https://developers.google.com/gmail/api)
|
||||
* Google Reader API ([Bazqux](https://bazqux.com), [FreshRSS](https://freshrss.org), [Inoreader](https://www.inoreader.com), [Miniflux](https://miniflux.app), [Reedah](http://reedah.com), [The Old Reader](https://theoldreader.com) and more)
|
||||
|
@ -1,6 +1,6 @@
|
||||
Supported Feed Readers
|
||||
======================
|
||||
RSS Guard is multi-account application and supports many web-based feed readers via built-in plugins. One of the plugins, of course, provides the support for standard list of `RSS/ATOM/JSON` feeds with the set of features everyone would expect from classic feed reader.
|
||||
RSS Guard is multi-account application and supports many web-based feed readers via built-in plugins. One of the plugins, of course, provides the support for standard list of `RSS/ATOM/JSON/Sitemap` feeds with the set of features everyone would expect from classic feed reader.
|
||||
|
||||
I organized the supported web-based feed readers into an elegant table:
|
||||
|
||||
|
@ -54,8 +54,6 @@ StandardFeedDetails::StandardFeedDetails(QWidget* parent) : QWidget(parent) {
|
||||
QVariant::fromValue(int(StandardFeed::Type::Json)));
|
||||
m_ui.m_cmbType->addItem(StandardFeed::typeToString(StandardFeed::Type::Sitemap),
|
||||
QVariant::fromValue(int(StandardFeed::Type::Sitemap)));
|
||||
m_ui.m_cmbType->addItem(StandardFeed::typeToString(StandardFeed::Type::SitemapIndex),
|
||||
QVariant::fromValue(int(StandardFeed::Type::SitemapIndex)));
|
||||
|
||||
// Load available encodings.
|
||||
const QList<QByteArray> encodings = QTextCodec::availableCodecs();
|
||||
|
@ -180,7 +180,7 @@ QString AtomParser::xmlMessageUrl(const QDomElement& msg_element) const {
|
||||
|
||||
QList<Enclosure> AtomParser::xmlMessageEnclosures(const QDomElement& msg_element) const {
|
||||
QList<Enclosure> enclosures;
|
||||
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
||||
QDomNodeList elem_links = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
||||
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
|
@ -9,8 +9,8 @@
|
||||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QDomDocument>
|
||||
#include <QTextCodec>
|
||||
@ -68,7 +68,11 @@ QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteAr
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.tagName() != QSL("urlset") && root_element.tagName() != QSL("sitemapindex")) {
|
||||
if (root_element.tagName() == QSL("sitemapindex")) {
|
||||
throw FeedRecognizedButFailedException(QObject::tr("sitemap indices are not supported"));
|
||||
}
|
||||
|
||||
if (root_element.tagName() != QSL("urlset")) {
|
||||
throw ApplicationException(QObject::tr("not a Sitemap"));
|
||||
}
|
||||
|
||||
@ -76,17 +80,8 @@ QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteAr
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(xml_schema_encoding);
|
||||
|
||||
if (root_element.tagName() == QSL("urlset")) {
|
||||
// Sitemap.
|
||||
feed->setType(StandardFeed::Type::Sitemap);
|
||||
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::Sitemap));
|
||||
}
|
||||
else {
|
||||
// Sitemap index.
|
||||
feed->setType(StandardFeed::Type::SitemapIndex);
|
||||
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::SitemapIndex));
|
||||
}
|
||||
feed->setType(StandardFeed::Type::Sitemap);
|
||||
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::Sitemap));
|
||||
|
||||
return {feed, icon_possible_locations};
|
||||
}
|
||||
@ -108,45 +103,76 @@ QString SitemapParser::sitemapVideoNamespace() const {
|
||||
}
|
||||
|
||||
QDomNodeList SitemapParser::xmlMessageElements() {
|
||||
return {};
|
||||
return m_xml.elementsByTagNameNS(sitemapNamespace(), QSL("url"));
|
||||
}
|
||||
|
||||
// TODO: implement
|
||||
|
||||
QString SitemapParser::xmlMessageTitle(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
QString str_title = msg_element.elementsByTagNameNS(sitemapNewsNamespace(), QSL("title")).at(0).toElement().text();
|
||||
|
||||
if (str_title.isEmpty()) {
|
||||
str_title = msg_element.elementsByTagNameNS(sitemapVideoNamespace(), QSL("title")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
return str_title;
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageUrl(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
return msg_element.elementsByTagNameNS(sitemapNamespace(), QSL("loc")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageDescription(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageAuthor(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
return xmlRawChild(msg_element.elementsByTagNameNS(sitemapVideoNamespace(), QSL("description")).at(0).toElement());
|
||||
}
|
||||
|
||||
QDateTime SitemapParser::xmlMessageDateCreated(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
QString str_date = msg_element.elementsByTagNameNS(sitemapNamespace(), QSL("lastmod")).at(0).toElement().text();
|
||||
|
||||
if (str_date.isEmpty()) {
|
||||
str_date =
|
||||
msg_element.elementsByTagNameNS(sitemapNewsNamespace(), QSL("publication_date")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
return TextFactory::parseDateTime(str_date);
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageId(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
return xmlMessageUrl(msg_element);
|
||||
}
|
||||
|
||||
QList<Enclosure> SitemapParser::xmlMessageEnclosures(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
QList<Enclosure> enclosures;
|
||||
|
||||
QList<MessageCategory> SitemapParser::xmlMessageCategories(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
// sitemap-image
|
||||
QDomNodeList elem_links = msg_element.elementsByTagNameNS(sitemapImageNamespace(), QSL("image"));
|
||||
|
||||
QString SitemapParser::xmlMessageRawContents(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
QString loc = link.elementsByTagNameNS(sitemapImageNamespace(), QSL("loc")).at(0).toElement().text();
|
||||
|
||||
if (!loc.isEmpty()) {
|
||||
// NOTE: The MIME is made up.
|
||||
enclosures.append(Enclosure(loc, QSL("image/png")));
|
||||
}
|
||||
}
|
||||
|
||||
// sitemap-video
|
||||
elem_links = msg_element.elementsByTagNameNS(sitemapVideoNamespace(), QSL("video"));
|
||||
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
QString loc = link.elementsByTagNameNS(sitemapVideoNamespace(), QSL("player_loc")).at(0).toElement().text();
|
||||
|
||||
if (loc.isEmpty()) {
|
||||
loc = link.elementsByTagNameNS(sitemapVideoNamespace(), QSL("content_loc")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
if (!loc.isEmpty()) {
|
||||
// NOTE: The MIME is made up.
|
||||
enclosures.append(Enclosure(loc, QSL("video/mpeg")));
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures;
|
||||
}
|
||||
|
||||
bool SitemapParser::isGzip(const QByteArray& content) {
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "services/standard/parsers/feedparser.h"
|
||||
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
class SitemapParser : public FeedParser {
|
||||
public:
|
||||
explicit SitemapParser(const QString& data);
|
||||
@ -20,12 +22,9 @@ class SitemapParser : public FeedParser {
|
||||
virtual QString xmlMessageTitle(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageUrl(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageDescription(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageAuthor(const QDomElement& msg_element) const;
|
||||
virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageId(const QDomElement& msg_element) const;
|
||||
virtual QList<Enclosure> xmlMessageEnclosures(const QDomElement& msg_element) const;
|
||||
virtual QList<MessageCategory> xmlMessageCategories(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageRawContents(const QDomElement& msg_element) const;
|
||||
|
||||
private:
|
||||
QString sitemapNamespace() const;
|
||||
|
@ -159,9 +159,6 @@ QString StandardFeed::typeToString(StandardFeed::Type type) {
|
||||
case Type::Sitemap:
|
||||
return QSL("Sitemap");
|
||||
|
||||
case Type::SitemapIndex:
|
||||
return QSL("Sitemap Index");
|
||||
|
||||
case Type::Rss2X:
|
||||
default:
|
||||
return QSL("RSS 2.0/2.0.1");
|
||||
|
@ -36,8 +36,7 @@ class StandardFeed : public Feed {
|
||||
Rdf = 2, // Sometimes denoted as RSS 1.0.
|
||||
Atom10 = 3,
|
||||
Json = 4,
|
||||
SitemapIndex = 5,
|
||||
Sitemap = 6
|
||||
Sitemap = 5
|
||||
};
|
||||
|
||||
explicit StandardFeed(RootItem* parent_item = nullptr);
|
||||
|
@ -161,10 +161,6 @@ bool FeedsImportExportModel::exportToOMPL20(QByteArray& result, bool export_icon
|
||||
outline_feed.setAttribute(QSL("version"), QSL("Sitemap"));
|
||||
break;
|
||||
|
||||
case StandardFeed::Type::SitemapIndex:
|
||||
outline_feed.setAttribute(QSL("version"), QSL("SitemapIndex"));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -22,11 +22,16 @@
|
||||
#include "services/standard/parsers/jsonparser.h"
|
||||
#include "services/standard/parsers/rdfparser.h"
|
||||
#include "services/standard/parsers/rssparser.h"
|
||||
#include "services/standard/parsers/sitemapparser.h"
|
||||
#include "services/standard/standardcategory.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
#include "services/standard/standardfeedsimportexportmodel.h"
|
||||
#include "services/standard/standardserviceentrypoint.h"
|
||||
|
||||
#if defined(ENABLE_COMPRESSED_SITEMAP)
|
||||
#include "3rd-party/qcompressor/qcompressor.h"
|
||||
#endif
|
||||
|
||||
#include <QAction>
|
||||
#include <QClipboard>
|
||||
#include <QSqlTableModel>
|
||||
@ -180,6 +185,20 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
||||
throw FeedFetchException(Feed::Status::NetworkError, NetworkFactory::networkErrorText(network_result));
|
||||
}
|
||||
|
||||
// Sitemap parser supports gzip-encoded data too.
|
||||
if (SitemapParser::isGzip(feed_contents)) {
|
||||
#if defined(ENABLE_COMPRESSED_SITEMAP)
|
||||
qWarningNN << LOGSEC_CORE << "Decompressing gzipped feed data.";
|
||||
|
||||
QByteArray uncompressed_feed_contents;
|
||||
QCompressor::gzipDecompress(feed_contents, uncompressed_feed_contents);
|
||||
|
||||
feed_contents = uncompressed_feed_contents;
|
||||
#else
|
||||
qWarningNN << LOGSEC_CORE << "This feed is gzipped.";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Encode downloaded data for further parsing.
|
||||
QTextCodec* codec = QTextCodec::codecForName(f->encoding().toLocal8Bit());
|
||||
|
||||
@ -243,6 +262,9 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
||||
messages = JsonParser(formatted_feed_contents).messages();
|
||||
break;
|
||||
|
||||
case StandardFeed::Type::Sitemap:
|
||||
messages = SitemapParser(formatted_feed_contents).messages();
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user