diff --git a/src/librssguard/services/standard/atomparser.cpp b/src/librssguard/services/standard/atomparser.cpp index cbb5f6bfa..20bb05049 100755 --- a/src/librssguard/services/standard/atomparser.cpp +++ b/src/librssguard/services/standard/atomparser.cpp @@ -20,8 +20,6 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) { } } -AtomParser::~AtomParser() = default; - QString AtomParser::feedAuthor() const { QDomNodeList top_level_nodes = m_xml.documentElement().childNodes(); QStringList author_str; @@ -148,6 +146,10 @@ QString AtomParser::messageAuthor(const QDomElement& msg_element) const { return author_str.join(", "); } +QString AtomParser::atomNamespace() const { + return m_atomNamespace; +} + QDomNodeList AtomParser::messageElements() { return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry")); } diff --git a/src/librssguard/services/standard/atomparser.h b/src/librssguard/services/standard/atomparser.h index 1a46a8d61..e538d180d 100755 --- a/src/librssguard/services/standard/atomparser.h +++ b/src/librssguard/services/standard/atomparser.h @@ -13,7 +13,8 @@ class AtomParser : public FeedParser { public: explicit AtomParser(const QString& data); - virtual ~AtomParser(); + + QString atomNamespace() const; private: QDomNodeList messageElements(); diff --git a/src/librssguard/services/standard/feedparser.cpp b/src/librssguard/services/standard/feedparser.cpp index 487b4d97a..b925c886f 100755 --- a/src/librssguard/services/standard/feedparser.cpp +++ b/src/librssguard/services/standard/feedparser.cpp @@ -19,8 +19,6 @@ FeedParser::FeedParser(QString data) : m_xmlData(std::move(data)), m_mrssNamespa } } -FeedParser::~FeedParser() = default; - QList FeedParser::messages() { QString feed_author = feedAuthor(); QList messages; diff --git a/src/librssguard/services/standard/feedparser.h b/src/librssguard/services/standard/feedparser.h index a1bc6c236..d8c7a83ea 100755 --- a/src/librssguard/services/standard/feedparser.h +++ b/src/librssguard/services/standard/feedparser.h @@ -12,7 +12,6 @@ class FeedParser { public: explicit FeedParser(QString data); - virtual ~FeedParser(); virtual QList messages(); diff --git a/src/librssguard/services/standard/rdfparser.cpp b/src/librssguard/services/standard/rdfparser.cpp index b0adeb37b..90f434416 100644 --- a/src/librssguard/services/standard/rdfparser.cpp +++ b/src/librssguard/services/standard/rdfparser.cpp @@ -2,6 +2,7 @@ #include "services/standard/rdfparser.h" +#include "exceptions/applicationexception.h" #include "miscellaneous/application.h" #include "miscellaneous/textfactory.h" #include "network-web/webfactory.h" @@ -9,84 +10,88 @@ #include -RdfParser::RdfParser() = default; +RdfParser::RdfParser(const QString& data) + : FeedParser(data), + m_rdfNamespace(QSL("http://www.w3.org/1999/02/22-rdf-syntax-ns#")), + m_rssNamespace(QSL("http://purl.org/rss/1.0/")) {} -RdfParser::~RdfParser() = default; - -QList RdfParser::parseXmlData(const QString& data) { - QList messages; +QDomNodeList RdfParser::messageElements() { QDomDocument xml_file; - QDateTime current_time = QDateTime::currentDateTime(); - xml_file.setContent(data, true); + xml_file.setContent(m_xmlData, true); // Pull out all messages. - QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item")); + return xml_file.elementsByTagName(QSL("item")); +} - for (int i = 0; i < messages_in_xml.size(); i++) { - QDomNode message_item = messages_in_xml.item(i); - Message new_message; +Message RdfParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const { + Message new_message; - // Deal with title and description. - QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified(); - QString elem_description = message_item.namedItem(QSL("description")).toElement().text(); + // Deal with title and description. + QString elem_title = msg_element.namedItem(QSL("title")).toElement().text().simplified(); + QString elem_description = rawXmlChild(msg_element.namedItem(QSL("description")).toElement()); - // Now we obtained maximum of information for title & description. - if (elem_title.isEmpty()) { - if (elem_description.isEmpty()) { - // BOTH title and description are empty, skip this message. - continue; - } - else { - // Title is empty but description is not. - new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified())); - new_message.m_contents = elem_description; - } + // Now we obtained maximum of information for title & description. + if (elem_title.isEmpty()) { + if (elem_description.isEmpty()) { + // BOTH title and description are empty, skip this message. + throw ApplicationException(QSL("Not enough data for the message.")); } else { - // Title is really not empty, description does not matter. - new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title)); + // Title is empty but description is not. + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified())); new_message.m_contents = elem_description; } - - QString raw_contents; - QTextStream str(&raw_contents); - - str.setCodec(DEFAULT_FEED_ENCODING); - - message_item.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream); - new_message.m_rawContents = raw_contents; - - // Deal with link and author. - new_message.m_url = message_item.namedItem(QSL("link")).toElement().text(); - new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text(); - - // Deal with creation date. - QString elem_updated = message_item.namedItem(QSL("date")).toElement().text(); - - if (elem_updated.isEmpty()) { - elem_updated = message_item.namedItem(QSL("dc:date")).toElement().text(); - } - - // Deal with creation date. - new_message.m_created = TextFactory::parseDateTime(elem_updated); - new_message.m_createdFromFeed = !new_message.m_created.isNull(); - - if (!new_message.m_createdFromFeed) { - // Date was NOT obtained from the feed, set current date as creation date for the message. - new_message.m_created = current_time; - } - - if (new_message.m_author.isNull()) { - new_message.m_author = ""; - } - - if (new_message.m_url.isNull()) { - new_message.m_url = ""; - } - - messages.append(new_message); + } + else { + // Title is really not empty, description does not matter. + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title)); + new_message.m_contents = elem_description; } - return messages; + QString raw_contents; + QTextStream str(&raw_contents); + + str.setCodec(DEFAULT_FEED_ENCODING); + + msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream); + new_message.m_rawContents = raw_contents; + + // Deal with link and author. + new_message.m_url = msg_element.namedItem(QSL("link")).toElement().text(); + new_message.m_author = msg_element.namedItem(QSL("creator")).toElement().text(); + + // Deal with creation date. + QString elem_updated = msg_element.namedItem(QSL("date")).toElement().text(); + + if (elem_updated.isEmpty()) { + elem_updated = msg_element.namedItem(QSL("dc:date")).toElement().text(); + } + + // Deal with creation date. + new_message.m_created = TextFactory::parseDateTime(elem_updated); + new_message.m_createdFromFeed = !new_message.m_created.isNull(); + + if (!new_message.m_createdFromFeed) { + // Date was NOT obtained from the feed, set current date as creation date for the message. + new_message.m_created = current_time; + } + + if (new_message.m_author.isNull()) { + new_message.m_author = ""; + } + + if (new_message.m_url.isNull()) { + new_message.m_url = ""; + } + + return new_message; +} + +QString RdfParser::rssNamespace() const { + return m_rssNamespace; +} + +QString RdfParser::rdfNamespace() const { + return m_rdfNamespace; } diff --git a/src/librssguard/services/standard/rdfparser.h b/src/librssguard/services/standard/rdfparser.h index cb3d3c112..1b9147cf6 100644 --- a/src/librssguard/services/standard/rdfparser.h +++ b/src/librssguard/services/standard/rdfparser.h @@ -3,16 +3,25 @@ #ifndef RDFPARSER_H #define RDFPARSER_H +#include "services/standard/feedparser.h" + #include "core/message.h" #include -class RdfParser { +class RdfParser : public FeedParser { public: - explicit RdfParser(); - virtual ~RdfParser(); + explicit RdfParser(const QString& data); - QList parseXmlData(const QString& data); + QString rdfNamespace() const; + QString rssNamespace() const; + + private: + QDomNodeList messageElements(); + Message extractMessage(const QDomElement& msg_element, QDateTime current_time) const; + + QString m_rdfNamespace; + QString m_rssNamespace; }; #endif // RDFPARSER_H diff --git a/src/librssguard/services/standard/rssparser.cpp b/src/librssguard/services/standard/rssparser.cpp index 6ec75be90..504e8b329 100644 --- a/src/librssguard/services/standard/rssparser.cpp +++ b/src/librssguard/services/standard/rssparser.cpp @@ -14,8 +14,6 @@ RssParser::RssParser(const QString& data) : FeedParser(data) {} -RssParser::~RssParser() = default; - QDomNodeList RssParser::messageElements() { QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel")); diff --git a/src/librssguard/services/standard/rssparser.h b/src/librssguard/services/standard/rssparser.h index f5a4eea5c..e1c014530 100644 --- a/src/librssguard/services/standard/rssparser.h +++ b/src/librssguard/services/standard/rssparser.h @@ -12,7 +12,6 @@ class RssParser : public FeedParser { public: explicit RssParser(const QString& data); - virtual ~RssParser(); private: QDomNodeList messageElements(); diff --git a/src/librssguard/services/standard/standardfeed.cpp b/src/librssguard/services/standard/standardfeed.cpp index 62964c23b..a8254023b 100644 --- a/src/librssguard/services/standard/standardfeed.cpp +++ b/src/librssguard/services/standard/standardfeed.cpp @@ -352,6 +352,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type, int error_line, error_column; if (!xml_document.setContent(xml_contents_encoded, + true, &error_msg, &error_line, &error_column)) { @@ -362,23 +363,24 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type, feed->setEncoding(encod); QDomElement root_element = xml_document.documentElement(); - QString root_tag_name = root_element.tagName(); + RdfParser rdf(QSL("")); + AtomParser atom(QSL("")); - if (root_tag_name == QL1S("rdf:RDF")) { + if (root_element.namespaceURI() == rdf.rdfNamespace()) { // We found RDF feed. - QDomElement channel_element = root_element.namedItem(QSL("channel")).toElement(); + QDomElement channel_element = root_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("channel")).at(0).toElement(); feed->setType(Type::Rdf); - feed->setTitle(channel_element.namedItem(QSL("title")).toElement().text()); - feed->setDescription(channel_element.namedItem(QSL("description")).toElement().text()); + feed->setTitle(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("title")).at(0).toElement().text()); + feed->setDescription(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("description")).at(0).toElement().text()); - QString home_page = channel_element.namedItem(QSL("link")).toElement().text(); + QString home_page = channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("link")).at(0).toElement().text(); if (!home_page.isEmpty()) { icon_possible_locations.prepend({ home_page, false }); } } - else if (root_tag_name == QL1S("rss")) { + else if (root_element.tagName() == QL1S("rss")) { // We found RSS 0.91/0.92/0.93/2.0/2.0.1 feed. QString rss_type = root_element.attribute("version", "2.0"); @@ -410,7 +412,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type, icon_possible_locations.prepend({ home_page, false }); } } - else if (root_tag_name == QL1S("feed")) { + else if (root_element.namespaceURI() == atom.atomNamespace()) { // We found ATOM feed. feed->setType(Type::Atom10); feed->setTitle(root_element.namedItem(QSL("title")).toElement().text()); diff --git a/src/librssguard/services/standard/standardserviceroot.cpp b/src/librssguard/services/standard/standardserviceroot.cpp index 5f86292d0..6f50c58a1 100644 --- a/src/librssguard/services/standard/standardserviceroot.cpp +++ b/src/librssguard/services/standard/standardserviceroot.cpp @@ -244,7 +244,7 @@ QList StandardServiceRoot::obtainNewMessages(const QList& feeds) break; case StandardFeed::Type::Rdf: - messages = RdfParser().parseXmlData(formatted_feed_contents); + messages = RdfParser(formatted_feed_contents).messages(); break; case StandardFeed::Type::Atom10: