116 lines
3.8 KiB
C++
116 lines
3.8 KiB
C++
// For license of this file, see <project-root-folder>/LICENSE.md.
|
|
|
|
#include "services/standard/rssparser.h"
|
|
|
|
#include "exceptions/applicationexception.h"
|
|
#include "miscellaneous/application.h"
|
|
#include "miscellaneous/iofactory.h"
|
|
#include "miscellaneous/textfactory.h"
|
|
#include "network-web/webfactory.h"
|
|
|
|
#include <QDomDocument>
|
|
|
|
RssParser::RssParser(const QString& data) : FeedParser(data) {}
|
|
|
|
RssParser::~RssParser() = default;
|
|
|
|
QDomNodeList RssParser::messageElements() {
|
|
QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel"));
|
|
|
|
if (channel_elem.isNull()) {
|
|
return QDomNodeList();
|
|
}
|
|
else {
|
|
return channel_elem.toElement().elementsByTagName(QSL("item"));
|
|
}
|
|
}
|
|
|
|
Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const {
|
|
Message new_message;
|
|
|
|
// Deal with titles & descriptions.
|
|
QString elem_title = msg_element.namedItem(QSL("title")).toElement().text().simplified();
|
|
QString elem_description = msg_element.namedItem(QSL("encoded")).toElement().text();
|
|
QString elem_enclosure = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("url"));
|
|
QString elem_enclosure_type = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("type"));
|
|
|
|
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().text();
|
|
|
|
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
|
new_message.m_url = new_message.m_enclosures.first().m_url;
|
|
}
|
|
|
|
if (new_message.m_url.isEmpty()) {
|
|
// Try to get "href" attribute.
|
|
new_message.m_url = msg_element.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
|
}
|
|
|
|
if (elem_description.isEmpty()) {
|
|
elem_description = msg_element.namedItem(QSL("description")).toElement().text();
|
|
}
|
|
|
|
if (elem_description.isEmpty()) {
|
|
elem_description = new_message.m_url;
|
|
}
|
|
|
|
// Now we obtained maximum of information for title & description.
|
|
if (elem_title.isEmpty()) {
|
|
if (elem_description.isEmpty()) {
|
|
// BOTH title and description are empty, skip this message.
|
|
throw ApplicationException(QSL("Not enough data for the message."));
|
|
}
|
|
else {
|
|
// Title is empty but description is not.
|
|
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
|
new_message.m_contents = elem_description;
|
|
}
|
|
}
|
|
else {
|
|
// Title is really not empty, description does not matter.
|
|
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title));
|
|
new_message.m_contents = elem_description;
|
|
}
|
|
|
|
if (!elem_enclosure.isEmpty()) {
|
|
new_message.m_enclosures.append(Enclosure(elem_enclosure, elem_enclosure_type));
|
|
qDebugNN << LOGSEC_CORE
|
|
<< "Found enclosure"
|
|
<< QUOTE_W_SPACE(elem_enclosure)
|
|
<< "for the message.";
|
|
}
|
|
else {
|
|
new_message.m_enclosures.append(mrssGetEnclosures(msg_element));
|
|
}
|
|
|
|
new_message.m_author = msg_element.namedItem(QSL("author")).toElement().text();
|
|
|
|
if (new_message.m_author.isEmpty()) {
|
|
new_message.m_author = msg_element.namedItem(QSL("creator")).toElement().text();
|
|
}
|
|
|
|
// Deal with creation date.
|
|
new_message.m_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("pubDate")).toElement().text());
|
|
|
|
if (new_message.m_created.isNull()) {
|
|
new_message.m_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("date")).toElement().text());
|
|
}
|
|
|
|
if (!(new_message.m_createdFromFeed = !new_message.m_created.isNull())) {
|
|
// Date was NOT obtained from the feed,
|
|
// set current date as creation date for the message.
|
|
new_message.m_created = current_time;
|
|
}
|
|
|
|
if (new_message.m_author.isNull()) {
|
|
new_message.m_author = "";
|
|
}
|
|
|
|
new_message.m_author = qApp->web()->unescapeHtml(new_message.m_author);
|
|
|
|
if (new_message.m_url.isNull()) {
|
|
new_message.m_url = "";
|
|
}
|
|
|
|
return new_message;
|
|
}
|