mirror of
https://github.com/martinrotter/rssguard.git
synced 2025-02-06 04:14:22 +01:00
unified abstraction in RSS/RDF/ATOM parsers, improved support for RDF/modules
This commit is contained in:
parent
3052b45726
commit
91573ec11d
@ -21,119 +21,17 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
||||
}
|
||||
|
||||
QString AtomParser::feedAuthor() const {
|
||||
QDomNodeList top_level_nodes = m_xml.documentElement().childNodes();
|
||||
QStringList author_str;
|
||||
auto authors = m_xml.documentElement().elementsByTagNameNS(m_atomNamespace, QSL("author"));
|
||||
|
||||
for (int i = 0; i < top_level_nodes.size(); i++) {
|
||||
auto elem = top_level_nodes.at(i).toElement();
|
||||
for (int i = 0; i < authors.size(); i++) {
|
||||
QDomNode auth = authors.at(i);
|
||||
|
||||
if (elem.localName() != QSL("author") || elem.namespaceURI() != m_atomNamespace) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QDomNodeList names = elem.elementsByTagNameNS(m_atomNamespace, QSL("name"));
|
||||
|
||||
if (!names.isEmpty()) {
|
||||
const QString name = names.at(0).toElement().text();
|
||||
|
||||
if (!name.isEmpty() && !author_str.contains(name)) {
|
||||
author_str.append(name);
|
||||
}
|
||||
if (auth.parentNode() == m_xml.documentElement()) {
|
||||
return auth.toElement().elementsByTagNameNS(m_atomNamespace, QSL("name")).at(0).toElement().text();
|
||||
}
|
||||
}
|
||||
|
||||
return author_str.join(QSL(", "));
|
||||
}
|
||||
|
||||
Message AtomParser::extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const {
|
||||
Message new_message;
|
||||
QString title = textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", "));
|
||||
QString summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement());
|
||||
|
||||
if (summary.isEmpty()) {
|
||||
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement());
|
||||
|
||||
if (summary.isEmpty()) {
|
||||
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement());
|
||||
}
|
||||
}
|
||||
|
||||
// Now we obtained maximum of information for title & description.
|
||||
if (title.isEmpty() && summary.isEmpty()) {
|
||||
// BOTH title and description are empty, skip this message.
|
||||
throw ApplicationException(QSL("Not enough data for the message."));
|
||||
}
|
||||
|
||||
// Title is not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(title));
|
||||
new_message.m_contents = summary;
|
||||
new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element));
|
||||
new_message.m_customId = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("id")).at(0).toElement().text();
|
||||
|
||||
QString raw_contents;
|
||||
QTextStream str(&raw_contents);
|
||||
|
||||
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
||||
new_message.m_rawContents = raw_contents;
|
||||
|
||||
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
|
||||
|
||||
if (updated.isEmpty()) {
|
||||
updated = textsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", "));
|
||||
}
|
||||
|
||||
// Deal with creation date.
|
||||
new_message.m_created = TextFactory::parseDateTime(updated);
|
||||
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
||||
|
||||
if (!new_message.m_createdFromFeed) {
|
||||
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
||||
new_message.m_created = current_time;
|
||||
}
|
||||
|
||||
// Deal with links
|
||||
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
||||
QString last_link_alternate, last_link_other;
|
||||
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
QString attribute = link.attribute(QSL("rel"));
|
||||
|
||||
if (attribute == QSL("enclosure")) {
|
||||
QString enclosure_type = link.attribute(QSL("type"));
|
||||
|
||||
if (enclosure_type.isEmpty()) {
|
||||
enclosure_type = QSL(DEFAULT_ENCLOSURE_MIME_TYPE);
|
||||
}
|
||||
|
||||
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), enclosure_type));
|
||||
qDebugNN << LOGSEC_CORE
|
||||
<< "Found enclosure"
|
||||
<< QUOTE_W_SPACE(new_message.m_enclosures.last().m_url)
|
||||
<< "for the message.";
|
||||
}
|
||||
else if (attribute.isEmpty() || attribute == QSL("alternate")) {
|
||||
last_link_alternate = link.attribute(QSL("href"));
|
||||
}
|
||||
else {
|
||||
last_link_other = link.attribute(QSL("href"));
|
||||
}
|
||||
}
|
||||
|
||||
// Obtain MRSS enclosures.
|
||||
new_message.m_enclosures.append(mrssGetEnclosures(msg_element));
|
||||
|
||||
if (!last_link_alternate.isEmpty()) {
|
||||
new_message.m_url = last_link_alternate;
|
||||
}
|
||||
else if (!last_link_other.isEmpty()) {
|
||||
new_message.m_url = last_link_other;
|
||||
}
|
||||
else if (!new_message.m_enclosures.isEmpty()) {
|
||||
new_message.m_url = new_message.m_enclosures.first().m_url;
|
||||
}
|
||||
|
||||
return new_message;
|
||||
return {};
|
||||
}
|
||||
|
||||
QString AtomParser::messageAuthor(const QDomElement& msg_element) const {
|
||||
@ -158,3 +56,75 @@ QString AtomParser::atomNamespace() const {
|
||||
QDomNodeList AtomParser::messageElements() {
|
||||
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
|
||||
}
|
||||
|
||||
QString AtomParser::messageTitle(const QDomElement& msg_element) const {
|
||||
return textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", "));
|
||||
}
|
||||
|
||||
QString AtomParser::messageDescription(const QDomElement& msg_element) const {
|
||||
QString summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement());
|
||||
|
||||
if (summary.isEmpty()) {
|
||||
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement());
|
||||
|
||||
if (summary.isEmpty()) {
|
||||
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement());
|
||||
}
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
QDateTime AtomParser::messageDateCreated(const QDomElement& msg_element) const {
|
||||
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
|
||||
|
||||
if (updated.simplified().isEmpty()) {
|
||||
updated = textsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", "));
|
||||
}
|
||||
|
||||
return TextFactory::parseDateTime(updated);
|
||||
}
|
||||
|
||||
QString AtomParser::messageId(const QDomElement& msg_element) const {
|
||||
return msg_element.elementsByTagNameNS(m_atomNamespace, QSL("id")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QString AtomParser::messageUrl(const QDomElement& msg_element) const {
|
||||
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
||||
QString last_link_other;
|
||||
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
QString attribute = link.attribute(QSL("rel"));
|
||||
|
||||
if (attribute.isEmpty() || attribute == QSL("alternate")) {
|
||||
return link.attribute(QSL("href"));
|
||||
}
|
||||
else if (attribute != QSL("enclosure")) {
|
||||
last_link_other = link.attribute(QSL("href"));
|
||||
}
|
||||
}
|
||||
|
||||
if (!last_link_other.isEmpty()) {
|
||||
return last_link_other;
|
||||
}
|
||||
else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
QList<Enclosure> AtomParser::messageEnclosures(const QDomElement& msg_element) const {
|
||||
QList<Enclosure> enclosures;
|
||||
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
||||
|
||||
for (int i = 0; i < elem_links.size(); i++) {
|
||||
QDomElement link = elem_links.at(i).toElement();
|
||||
QString attribute = link.attribute(QSL("rel"));
|
||||
|
||||
if (attribute == QSL("enclosure")) {
|
||||
enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures;
|
||||
}
|
||||
|
@ -16,11 +16,16 @@ class AtomParser : public FeedParser {
|
||||
|
||||
QString atomNamespace() const;
|
||||
|
||||
private:
|
||||
QDomNodeList messageElements();
|
||||
QString feedAuthor() const;
|
||||
Message extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const;
|
||||
QString messageAuthor(const QDomElement& msg_element) const;
|
||||
protected:
|
||||
virtual QString messageTitle(const QDomElement& msg_element) const;
|
||||
virtual QString messageDescription(const QDomElement& msg_element) const;
|
||||
virtual QDateTime messageDateCreated(const QDomElement& msg_element) const;
|
||||
virtual QString messageId(const QDomElement& msg_element) const;
|
||||
virtual QString messageUrl(const QDomElement& msg_element) const;
|
||||
virtual QList<Enclosure> messageEnclosures(const QDomElement& msg_element) const;
|
||||
virtual QDomNodeList messageElements();
|
||||
virtual QString messageAuthor(const QDomElement& msg_element) const;
|
||||
virtual QString feedAuthor() const;
|
||||
|
||||
private:
|
||||
QString m_atomNamespace;
|
||||
|
@ -20,6 +20,14 @@ FeedParser::FeedParser(QString data) : m_xmlData(std::move(data)), m_mrssNamespa
|
||||
}
|
||||
}
|
||||
|
||||
QString FeedParser::messageRawContents(const QDomElement& msg_element) const {
|
||||
QString raw_contents;
|
||||
QTextStream str(&raw_contents);
|
||||
|
||||
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
||||
return raw_contents;
|
||||
}
|
||||
|
||||
QList<Message> FeedParser::messages() {
|
||||
QString feed_author = feedAuthor();
|
||||
QList<Message> messages;
|
||||
@ -29,15 +37,50 @@ QList<Message> FeedParser::messages() {
|
||||
QDomNodeList messages_in_xml = messageElements();
|
||||
|
||||
for (int i = 0; i < messages_in_xml.size(); i++) {
|
||||
QDomNode message_item = messages_in_xml.item(i);
|
||||
QDomElement message_item = messages_in_xml.item(i).toElement();
|
||||
|
||||
try {
|
||||
Message new_message = extractMessage(message_item.toElement(), current_time);
|
||||
Message new_message;
|
||||
|
||||
// Fill available data.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(messageTitle(message_item));
|
||||
new_message.m_contents = messageDescription(message_item);
|
||||
new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(message_item));
|
||||
new_message.m_url = messageUrl(message_item);
|
||||
new_message.m_created = messageDateCreated(message_item);
|
||||
new_message.m_customId = messageId(message_item);
|
||||
new_message.m_rawContents = messageRawContents(message_item);
|
||||
new_message.m_enclosures = messageEnclosures(message_item);
|
||||
new_message.m_enclosures.append(mrssGetEnclosures(message_item));
|
||||
|
||||
// Fixup missing data.
|
||||
//
|
||||
// NOTE: Message must have "title" field, otherwise it is skipped.
|
||||
|
||||
// Author.
|
||||
if (new_message.m_author.isEmpty() && !feed_author.isEmpty()) {
|
||||
new_message.m_author = feed_author;
|
||||
}
|
||||
|
||||
// Created date.
|
||||
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
||||
|
||||
if (!new_message.m_createdFromFeed) {
|
||||
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
||||
// NOTE: Date is lessened by 1 second for each message to allow for more
|
||||
// stable sorting.
|
||||
new_message.m_created = current_time.addSecs(-1);
|
||||
current_time = new_message.m_created;
|
||||
}
|
||||
|
||||
// Enclosures.
|
||||
for (Enclosure& enc : new_message.m_enclosures) {
|
||||
if (enc.m_mimeType.simplified().isEmpty()) {
|
||||
enc.m_mimeType = QSL(DEFAULT_ENCLOSURE_MIME_TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
// Url.
|
||||
new_message.m_url = new_message.m_url.replace(QRegularExpression(QSL("[\\t\\n]")), QString());
|
||||
|
||||
messages.append(new_message);
|
||||
|
@ -15,14 +15,23 @@ class FeedParser {
|
||||
|
||||
virtual QList<Message> messages();
|
||||
|
||||
protected:
|
||||
virtual QString feedAuthor() const;
|
||||
virtual QDomNodeList messageElements() = 0;
|
||||
virtual QString messageTitle(const QDomElement& msg_element) const = 0;
|
||||
virtual QString messageUrl(const QDomElement& msg_element) const = 0;
|
||||
virtual QString messageDescription(const QDomElement& msg_element) const = 0;
|
||||
virtual QString messageAuthor(const QDomElement& msg_element) const = 0;
|
||||
virtual QDateTime messageDateCreated(const QDomElement& msg_element) const = 0;
|
||||
virtual QString messageId(const QDomElement& msg_element) const = 0;
|
||||
virtual QList<Enclosure> messageEnclosures(const QDomElement& msg_element) const = 0;
|
||||
virtual QString messageRawContents(const QDomElement& msg_element) const;
|
||||
|
||||
protected:
|
||||
QList<Enclosure> mrssGetEnclosures(const QDomElement& msg_element) const;
|
||||
QString mrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const;
|
||||
QString rawXmlChild(const QDomElement& container) const;
|
||||
QStringList textsFromPath(const QDomElement& element, const QString& namespace_uri, const QString& xml_path, bool only_first) const;
|
||||
virtual QDomNodeList messageElements() = 0;
|
||||
virtual QString feedAuthor() const;
|
||||
virtual Message extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const = 0;
|
||||
|
||||
protected:
|
||||
QString m_xmlData;
|
||||
|
@ -13,77 +13,12 @@
|
||||
RdfParser::RdfParser(const QString& data)
|
||||
: FeedParser(data),
|
||||
m_rdfNamespace(QSL("http://www.w3.org/1999/02/22-rdf-syntax-ns#")),
|
||||
m_rssNamespace(QSL("http://purl.org/rss/1.0/")) {}
|
||||
m_rssNamespace(QSL("http://purl.org/rss/1.0/")),
|
||||
m_rssCoNamespace(QSL("http://purl.org/rss/1.0/modules/content/")),
|
||||
m_dcElNamespace(QSL("http://purl.org/dc/elements/1.1/")) {}
|
||||
|
||||
QDomNodeList RdfParser::messageElements() {
|
||||
QDomDocument xml_file;
|
||||
|
||||
xml_file.setContent(m_xmlData, true);
|
||||
|
||||
// Pull out all messages.
|
||||
return xml_file.elementsByTagName(QSL("item"));
|
||||
}
|
||||
|
||||
Message RdfParser::extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const {
|
||||
Message new_message;
|
||||
|
||||
// Deal with title and description.
|
||||
QString elem_title = msg_element.namedItem(QSL("title")).toElement().text().simplified();
|
||||
QString elem_description = rawXmlChild(msg_element.namedItem(QSL("description")).toElement());
|
||||
|
||||
// Now we obtained maximum of information for title & description.
|
||||
if (elem_title.isEmpty()) {
|
||||
if (elem_description.isEmpty()) {
|
||||
// BOTH title and description are empty, skip this message.
|
||||
throw ApplicationException(QSL("Not enough data for the message."));
|
||||
}
|
||||
else {
|
||||
// Title is empty but description is not.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Title is really not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
|
||||
QString raw_contents;
|
||||
QTextStream str(&raw_contents);
|
||||
|
||||
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
||||
new_message.m_rawContents = raw_contents;
|
||||
|
||||
// Deal with link and author.
|
||||
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().text();
|
||||
new_message.m_author = msg_element.namedItem(QSL("creator")).toElement().text();
|
||||
|
||||
// Deal with creation date.
|
||||
QString elem_updated = msg_element.namedItem(QSL("date")).toElement().text();
|
||||
|
||||
if (elem_updated.isEmpty()) {
|
||||
elem_updated = msg_element.namedItem(QSL("dc:date")).toElement().text();
|
||||
}
|
||||
|
||||
// Deal with creation date.
|
||||
new_message.m_created = TextFactory::parseDateTime(elem_updated);
|
||||
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
||||
|
||||
if (!new_message.m_createdFromFeed) {
|
||||
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
||||
new_message.m_created = current_time;
|
||||
}
|
||||
|
||||
if (new_message.m_author.isNull()) {
|
||||
new_message.m_author = QL1S("");
|
||||
}
|
||||
|
||||
if (new_message.m_url.isNull()) {
|
||||
new_message.m_url = QL1S("");
|
||||
}
|
||||
|
||||
return new_message;
|
||||
return m_xml.elementsByTagNameNS(m_rssNamespace, QSL("item"));
|
||||
}
|
||||
|
||||
QString RdfParser::rssNamespace() const {
|
||||
@ -93,3 +28,37 @@ QString RdfParser::rssNamespace() const {
|
||||
QString RdfParser::rdfNamespace() const {
|
||||
return m_rdfNamespace;
|
||||
}
|
||||
|
||||
QString RdfParser::messageTitle(const QDomElement& msg_element) const {
|
||||
return msg_element.elementsByTagNameNS(m_rssNamespace, QSL("title")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QString RdfParser::messageDescription(const QDomElement& msg_element) const {
|
||||
QString description = msg_element.elementsByTagNameNS(m_rssCoNamespace, QSL("encoded")).at(0).toElement().text();
|
||||
|
||||
if (description.simplified().isEmpty()) {
|
||||
description = msg_element.elementsByTagNameNS(m_rssNamespace, QSL("description")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
return description;
|
||||
}
|
||||
|
||||
QString RdfParser::messageAuthor(const QDomElement& msg_element) const {
|
||||
return msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("creator")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QDateTime RdfParser::messageDateCreated(const QDomElement& msg_element) const {
|
||||
return TextFactory::parseDateTime(msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("date")).at(0).toElement().text());
|
||||
}
|
||||
|
||||
QString RdfParser::messageId(const QDomElement& msg_element) const {
|
||||
return msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("identifier")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QString RdfParser::messageUrl(const QDomElement& msg_element) const {
|
||||
return msg_element.elementsByTagNameNS(m_rssNamespace, QSL("link")).at(0).toElement().text();
|
||||
}
|
||||
|
||||
QList<Enclosure> RdfParser::messageEnclosures(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
@ -16,12 +16,21 @@ class RdfParser : public FeedParser {
|
||||
QString rdfNamespace() const;
|
||||
QString rssNamespace() const;
|
||||
|
||||
private:
|
||||
QDomNodeList messageElements();
|
||||
Message extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const;
|
||||
protected:
|
||||
virtual QString messageTitle(const QDomElement& msg_element) const;
|
||||
virtual QString messageDescription(const QDomElement& msg_element) const;
|
||||
virtual QString messageAuthor(const QDomElement& msg_element) const;
|
||||
virtual QDateTime messageDateCreated(const QDomElement& msg_element) const;
|
||||
virtual QString messageId(const QDomElement& msg_element) const;
|
||||
virtual QString messageUrl(const QDomElement& msg_element) const;
|
||||
virtual QList<Enclosure> messageEnclosures(const QDomElement& msg_element) const;
|
||||
virtual QDomNodeList messageElements();
|
||||
|
||||
private:
|
||||
QString m_rdfNamespace;
|
||||
QString m_rssNamespace;
|
||||
QString m_rssCoNamespace;
|
||||
QString m_dcElNamespace;
|
||||
};
|
||||
|
||||
#endif // RDFPARSER_H
|
||||
|
@ -25,102 +25,63 @@ QDomNodeList RssParser::messageElements() {
|
||||
}
|
||||
}
|
||||
|
||||
Message RssParser::extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const {
|
||||
Message new_message;
|
||||
QString RssParser::messageTitle(const QDomElement& msg_element) const {
|
||||
return msg_element.namedItem(QSL("title")).toElement().text();
|
||||
}
|
||||
|
||||
// Deal with titles & descriptions.
|
||||
QString elem_title = msg_element.namedItem(QSL("title")).toElement().text().simplified();
|
||||
QString elem_description = rawXmlChild(msg_element.elementsByTagName(QSL("encoded")).at(0).toElement());
|
||||
QString RssParser::messageDescription(const QDomElement& msg_element) const {
|
||||
QString description = rawXmlChild(msg_element.elementsByTagName(QSL("encoded")).at(0).toElement());
|
||||
|
||||
if (description.isEmpty()) {
|
||||
description = rawXmlChild(msg_element.elementsByTagName(QSL("description")).at(0).toElement());
|
||||
}
|
||||
|
||||
return description;
|
||||
}
|
||||
|
||||
QString RssParser::messageAuthor(const QDomElement& msg_element) const {
|
||||
QString author = msg_element.namedItem(QSL("author")).toElement().text();
|
||||
|
||||
if (author.isEmpty()) {
|
||||
author = msg_element.namedItem(QSL("creator")).toElement().text();
|
||||
}
|
||||
|
||||
return author;
|
||||
}
|
||||
|
||||
QDateTime RssParser::messageDateCreated(const QDomElement& msg_element) const {
|
||||
QDateTime date_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("pubDate")).toElement().text());
|
||||
|
||||
if (date_created.isNull()) {
|
||||
date_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("date")).toElement().text());
|
||||
}
|
||||
|
||||
return date_created;
|
||||
}
|
||||
|
||||
QString RssParser::messageId(const QDomElement& msg_element) const {
|
||||
return msg_element.namedItem(QSL("guid")).toElement().text();
|
||||
}
|
||||
|
||||
QString RssParser::messageUrl(const QDomElement& msg_element) const {
|
||||
QString url = msg_element.namedItem(QSL("link")).toElement().text();
|
||||
|
||||
if (url.isEmpty()) {
|
||||
// Try to get "href" attribute.
|
||||
url = msg_element.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
QList<Enclosure> RssParser::messageEnclosures(const QDomElement& msg_element) const {
|
||||
QString elem_enclosure = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("url"));
|
||||
QString elem_enclosure_type = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("type"));
|
||||
|
||||
new_message.m_customId = msg_element.namedItem(QSL("guid")).toElement().text();
|
||||
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().text();
|
||||
|
||||
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
||||
new_message.m_url = new_message.m_enclosures.first().m_url;
|
||||
}
|
||||
|
||||
if (new_message.m_url.isEmpty()) {
|
||||
// Try to get "href" attribute.
|
||||
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
||||
}
|
||||
|
||||
if (elem_description.isEmpty()) {
|
||||
elem_description = rawXmlChild(msg_element.elementsByTagName(QSL("description")).at(0).toElement());
|
||||
}
|
||||
|
||||
if (elem_description.isEmpty()) {
|
||||
elem_description = new_message.m_url;
|
||||
}
|
||||
|
||||
// Now we obtained maximum of information for title & description.
|
||||
if (elem_title.isEmpty()) {
|
||||
if (elem_description.isEmpty()) {
|
||||
// BOTH title and description are empty, skip this message.
|
||||
throw ApplicationException(QSL("Not enough data for the message."));
|
||||
}
|
||||
else {
|
||||
// Title is empty but description is not.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Title is really not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
|
||||
if (!elem_enclosure.isEmpty()) {
|
||||
if (elem_enclosure_type.isEmpty()) {
|
||||
elem_enclosure_type = QSL(DEFAULT_ENCLOSURE_MIME_TYPE);
|
||||
}
|
||||
|
||||
new_message.m_enclosures.append(Enclosure(elem_enclosure, elem_enclosure_type));
|
||||
qDebugNN << LOGSEC_CORE
|
||||
<< "Found enclosure"
|
||||
<< QUOTE_W_SPACE(elem_enclosure)
|
||||
<< "for the message.";
|
||||
return { Enclosure(elem_enclosure, elem_enclosure_type) };
|
||||
}
|
||||
else {
|
||||
new_message.m_enclosures.append(mrssGetEnclosures(msg_element));
|
||||
return {};
|
||||
}
|
||||
|
||||
QString raw_contents;
|
||||
QTextStream str(&raw_contents);
|
||||
|
||||
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
||||
new_message.m_rawContents = raw_contents;
|
||||
|
||||
new_message.m_author = msg_element.namedItem(QSL("author")).toElement().text();
|
||||
|
||||
if (new_message.m_author.isEmpty()) {
|
||||
new_message.m_author = msg_element.namedItem(QSL("creator")).toElement().text();
|
||||
}
|
||||
|
||||
// Deal with creation date.
|
||||
new_message.m_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("pubDate")).toElement().text());
|
||||
|
||||
if (new_message.m_created.isNull()) {
|
||||
new_message.m_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("date")).toElement().text());
|
||||
}
|
||||
|
||||
if (!(new_message.m_createdFromFeed = !new_message.m_created.isNull())) {
|
||||
// Date was NOT obtained from the feed,
|
||||
// set current date as creation date for the message.
|
||||
new_message.m_created = current_time;
|
||||
}
|
||||
|
||||
if (new_message.m_author.isNull()) {
|
||||
new_message.m_author = QL1S("");
|
||||
}
|
||||
|
||||
new_message.m_author = qApp->web()->unescapeHtml(new_message.m_author);
|
||||
|
||||
if (new_message.m_url.isNull()) {
|
||||
new_message.m_url = QL1S("");
|
||||
}
|
||||
|
||||
return new_message;
|
||||
}
|
||||
|
@ -13,9 +13,15 @@ class RssParser : public FeedParser {
|
||||
public:
|
||||
explicit RssParser(const QString& data);
|
||||
|
||||
private:
|
||||
QDomNodeList messageElements();
|
||||
Message extractMessage(const QDomElement& msg_element, const QDateTime& current_time) const;
|
||||
protected:
|
||||
virtual QDomNodeList messageElements();
|
||||
virtual QString messageTitle(const QDomElement& msg_element) const;
|
||||
virtual QString messageDescription(const QDomElement& msg_element) const;
|
||||
virtual QString messageAuthor(const QDomElement& msg_element) const;
|
||||
virtual QDateTime messageDateCreated(const QDomElement& msg_element) const;
|
||||
virtual QString messageId(const QDomElement& msg_element) const;
|
||||
virtual QString messageUrl(const QDomElement& msg_element) const;
|
||||
virtual QList<Enclosure> messageEnclosures(const QDomElement& msg_element) const;
|
||||
};
|
||||
|
||||
#endif // RSSPARSER_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user