make RDF parser inherit from FeedParser, enhance ATOM and RDF detection reliability
This commit is contained in:
parent
b0a11288cb
commit
3b9cdb477b
@ -20,8 +20,6 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AtomParser::~AtomParser() = default;
|
|
||||||
|
|
||||||
QString AtomParser::feedAuthor() const {
|
QString AtomParser::feedAuthor() const {
|
||||||
QDomNodeList top_level_nodes = m_xml.documentElement().childNodes();
|
QDomNodeList top_level_nodes = m_xml.documentElement().childNodes();
|
||||||
QStringList author_str;
|
QStringList author_str;
|
||||||
@ -148,6 +146,10 @@ QString AtomParser::messageAuthor(const QDomElement& msg_element) const {
|
|||||||
return author_str.join(", ");
|
return author_str.join(", ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QString AtomParser::atomNamespace() const {
|
||||||
|
return m_atomNamespace;
|
||||||
|
}
|
||||||
|
|
||||||
QDomNodeList AtomParser::messageElements() {
|
QDomNodeList AtomParser::messageElements() {
|
||||||
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
|
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,8 @@
|
|||||||
class AtomParser : public FeedParser {
|
class AtomParser : public FeedParser {
|
||||||
public:
|
public:
|
||||||
explicit AtomParser(const QString& data);
|
explicit AtomParser(const QString& data);
|
||||||
virtual ~AtomParser();
|
|
||||||
|
QString atomNamespace() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QDomNodeList messageElements();
|
QDomNodeList messageElements();
|
||||||
|
@ -19,8 +19,6 @@ FeedParser::FeedParser(QString data) : m_xmlData(std::move(data)), m_mrssNamespa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FeedParser::~FeedParser() = default;
|
|
||||||
|
|
||||||
QList<Message> FeedParser::messages() {
|
QList<Message> FeedParser::messages() {
|
||||||
QString feed_author = feedAuthor();
|
QString feed_author = feedAuthor();
|
||||||
QList<Message> messages;
|
QList<Message> messages;
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
class FeedParser {
|
class FeedParser {
|
||||||
public:
|
public:
|
||||||
explicit FeedParser(QString data);
|
explicit FeedParser(QString data);
|
||||||
virtual ~FeedParser();
|
|
||||||
|
|
||||||
virtual QList<Message> messages();
|
virtual QList<Message> messages();
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include "services/standard/rdfparser.h"
|
#include "services/standard/rdfparser.h"
|
||||||
|
|
||||||
|
#include "exceptions/applicationexception.h"
|
||||||
#include "miscellaneous/application.h"
|
#include "miscellaneous/application.h"
|
||||||
#include "miscellaneous/textfactory.h"
|
#include "miscellaneous/textfactory.h"
|
||||||
#include "network-web/webfactory.h"
|
#include "network-web/webfactory.h"
|
||||||
@ -9,33 +10,32 @@
|
|||||||
|
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
|
|
||||||
RdfParser::RdfParser() = default;
|
RdfParser::RdfParser(const QString& data)
|
||||||
|
: FeedParser(data),
|
||||||
|
m_rdfNamespace(QSL("http://www.w3.org/1999/02/22-rdf-syntax-ns#")),
|
||||||
|
m_rssNamespace(QSL("http://purl.org/rss/1.0/")) {}
|
||||||
|
|
||||||
RdfParser::~RdfParser() = default;
|
QDomNodeList RdfParser::messageElements() {
|
||||||
|
|
||||||
QList<Message> RdfParser::parseXmlData(const QString& data) {
|
|
||||||
QList<Message> messages;
|
|
||||||
QDomDocument xml_file;
|
QDomDocument xml_file;
|
||||||
QDateTime current_time = QDateTime::currentDateTime();
|
|
||||||
|
|
||||||
xml_file.setContent(data, true);
|
xml_file.setContent(m_xmlData, true);
|
||||||
|
|
||||||
// Pull out all messages.
|
// Pull out all messages.
|
||||||
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item"));
|
return xml_file.elementsByTagName(QSL("item"));
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < messages_in_xml.size(); i++) {
|
Message RdfParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const {
|
||||||
QDomNode message_item = messages_in_xml.item(i);
|
|
||||||
Message new_message;
|
Message new_message;
|
||||||
|
|
||||||
// Deal with title and description.
|
// Deal with title and description.
|
||||||
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
QString elem_title = msg_element.namedItem(QSL("title")).toElement().text().simplified();
|
||||||
QString elem_description = message_item.namedItem(QSL("description")).toElement().text();
|
QString elem_description = rawXmlChild(msg_element.namedItem(QSL("description")).toElement());
|
||||||
|
|
||||||
// Now we obtained maximum of information for title & description.
|
// Now we obtained maximum of information for title & description.
|
||||||
if (elem_title.isEmpty()) {
|
if (elem_title.isEmpty()) {
|
||||||
if (elem_description.isEmpty()) {
|
if (elem_description.isEmpty()) {
|
||||||
// BOTH title and description are empty, skip this message.
|
// BOTH title and description are empty, skip this message.
|
||||||
continue;
|
throw ApplicationException(QSL("Not enough data for the message."));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Title is empty but description is not.
|
// Title is empty but description is not.
|
||||||
@ -54,18 +54,18 @@ QList<Message> RdfParser::parseXmlData(const QString& data) {
|
|||||||
|
|
||||||
str.setCodec(DEFAULT_FEED_ENCODING);
|
str.setCodec(DEFAULT_FEED_ENCODING);
|
||||||
|
|
||||||
message_item.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
||||||
new_message.m_rawContents = raw_contents;
|
new_message.m_rawContents = raw_contents;
|
||||||
|
|
||||||
// Deal with link and author.
|
// Deal with link and author.
|
||||||
new_message.m_url = message_item.namedItem(QSL("link")).toElement().text();
|
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().text();
|
||||||
new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text();
|
new_message.m_author = msg_element.namedItem(QSL("creator")).toElement().text();
|
||||||
|
|
||||||
// Deal with creation date.
|
// Deal with creation date.
|
||||||
QString elem_updated = message_item.namedItem(QSL("date")).toElement().text();
|
QString elem_updated = msg_element.namedItem(QSL("date")).toElement().text();
|
||||||
|
|
||||||
if (elem_updated.isEmpty()) {
|
if (elem_updated.isEmpty()) {
|
||||||
elem_updated = message_item.namedItem(QSL("dc:date")).toElement().text();
|
elem_updated = msg_element.namedItem(QSL("dc:date")).toElement().text();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deal with creation date.
|
// Deal with creation date.
|
||||||
@ -85,8 +85,13 @@ QList<Message> RdfParser::parseXmlData(const QString& data) {
|
|||||||
new_message.m_url = "";
|
new_message.m_url = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
messages.append(new_message);
|
return new_message;
|
||||||
}
|
}
|
||||||
|
|
||||||
return messages;
|
QString RdfParser::rssNamespace() const {
|
||||||
|
return m_rssNamespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString RdfParser::rdfNamespace() const {
|
||||||
|
return m_rdfNamespace;
|
||||||
}
|
}
|
||||||
|
@ -3,16 +3,25 @@
|
|||||||
#ifndef RDFPARSER_H
|
#ifndef RDFPARSER_H
|
||||||
#define RDFPARSER_H
|
#define RDFPARSER_H
|
||||||
|
|
||||||
|
#include "services/standard/feedparser.h"
|
||||||
|
|
||||||
#include "core/message.h"
|
#include "core/message.h"
|
||||||
|
|
||||||
#include <QList>
|
#include <QList>
|
||||||
|
|
||||||
class RdfParser {
|
class RdfParser : public FeedParser {
|
||||||
public:
|
public:
|
||||||
explicit RdfParser();
|
explicit RdfParser(const QString& data);
|
||||||
virtual ~RdfParser();
|
|
||||||
|
|
||||||
QList<Message> parseXmlData(const QString& data);
|
QString rdfNamespace() const;
|
||||||
|
QString rssNamespace() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
QDomNodeList messageElements();
|
||||||
|
Message extractMessage(const QDomElement& msg_element, QDateTime current_time) const;
|
||||||
|
|
||||||
|
QString m_rdfNamespace;
|
||||||
|
QString m_rssNamespace;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // RDFPARSER_H
|
#endif // RDFPARSER_H
|
||||||
|
@ -14,8 +14,6 @@
|
|||||||
|
|
||||||
RssParser::RssParser(const QString& data) : FeedParser(data) {}
|
RssParser::RssParser(const QString& data) : FeedParser(data) {}
|
||||||
|
|
||||||
RssParser::~RssParser() = default;
|
|
||||||
|
|
||||||
QDomNodeList RssParser::messageElements() {
|
QDomNodeList RssParser::messageElements() {
|
||||||
QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel"));
|
QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel"));
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
class RssParser : public FeedParser {
|
class RssParser : public FeedParser {
|
||||||
public:
|
public:
|
||||||
explicit RssParser(const QString& data);
|
explicit RssParser(const QString& data);
|
||||||
virtual ~RssParser();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QDomNodeList messageElements();
|
QDomNodeList messageElements();
|
||||||
|
@ -352,6 +352,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
|||||||
int error_line, error_column;
|
int error_line, error_column;
|
||||||
|
|
||||||
if (!xml_document.setContent(xml_contents_encoded,
|
if (!xml_document.setContent(xml_contents_encoded,
|
||||||
|
true,
|
||||||
&error_msg,
|
&error_msg,
|
||||||
&error_line,
|
&error_line,
|
||||||
&error_column)) {
|
&error_column)) {
|
||||||
@ -362,23 +363,24 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
|||||||
feed->setEncoding(encod);
|
feed->setEncoding(encod);
|
||||||
|
|
||||||
QDomElement root_element = xml_document.documentElement();
|
QDomElement root_element = xml_document.documentElement();
|
||||||
QString root_tag_name = root_element.tagName();
|
RdfParser rdf(QSL("<a/>"));
|
||||||
|
AtomParser atom(QSL("<a/>"));
|
||||||
|
|
||||||
if (root_tag_name == QL1S("rdf:RDF")) {
|
if (root_element.namespaceURI() == rdf.rdfNamespace()) {
|
||||||
// We found RDF feed.
|
// We found RDF feed.
|
||||||
QDomElement channel_element = root_element.namedItem(QSL("channel")).toElement();
|
QDomElement channel_element = root_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("channel")).at(0).toElement();
|
||||||
|
|
||||||
feed->setType(Type::Rdf);
|
feed->setType(Type::Rdf);
|
||||||
feed->setTitle(channel_element.namedItem(QSL("title")).toElement().text());
|
feed->setTitle(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("title")).at(0).toElement().text());
|
||||||
feed->setDescription(channel_element.namedItem(QSL("description")).toElement().text());
|
feed->setDescription(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("description")).at(0).toElement().text());
|
||||||
|
|
||||||
QString home_page = channel_element.namedItem(QSL("link")).toElement().text();
|
QString home_page = channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("link")).at(0).toElement().text();
|
||||||
|
|
||||||
if (!home_page.isEmpty()) {
|
if (!home_page.isEmpty()) {
|
||||||
icon_possible_locations.prepend({ home_page, false });
|
icon_possible_locations.prepend({ home_page, false });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (root_tag_name == QL1S("rss")) {
|
else if (root_element.tagName() == QL1S("rss")) {
|
||||||
// We found RSS 0.91/0.92/0.93/2.0/2.0.1 feed.
|
// We found RSS 0.91/0.92/0.93/2.0/2.0.1 feed.
|
||||||
QString rss_type = root_element.attribute("version", "2.0");
|
QString rss_type = root_element.attribute("version", "2.0");
|
||||||
|
|
||||||
@ -410,7 +412,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
|||||||
icon_possible_locations.prepend({ home_page, false });
|
icon_possible_locations.prepend({ home_page, false });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (root_tag_name == QL1S("feed")) {
|
else if (root_element.namespaceURI() == atom.atomNamespace()) {
|
||||||
// We found ATOM feed.
|
// We found ATOM feed.
|
||||||
feed->setType(Type::Atom10);
|
feed->setType(Type::Atom10);
|
||||||
feed->setTitle(root_element.namedItem(QSL("title")).toElement().text());
|
feed->setTitle(root_element.namedItem(QSL("title")).toElement().text());
|
||||||
|
@ -244,7 +244,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(const QList<Feed*>& feeds)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case StandardFeed::Type::Rdf:
|
case StandardFeed::Type::Rdf:
|
||||||
messages = RdfParser().parseXmlData(formatted_feed_contents);
|
messages = RdfParser(formatted_feed_contents).messages();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case StandardFeed::Type::Atom10:
|
case StandardFeed::Type::Atom10:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user