Refactored parsers into own files.
This commit is contained in:
parent
cd7f0fc4a2
commit
d7d6037deb
12
rssguard.pro
12
rssguard.pro
@ -209,7 +209,6 @@ HEADERS += src/core/feeddownloader.h \
|
|||||||
src/core/message.h \
|
src/core/message.h \
|
||||||
src/core/messagesmodel.h \
|
src/core/messagesmodel.h \
|
||||||
src/core/messagesproxymodel.h \
|
src/core/messagesproxymodel.h \
|
||||||
src/core/parsingfactory.h \
|
|
||||||
src/definitions/definitions.h \
|
src/definitions/definitions.h \
|
||||||
src/dynamic-shortcuts/dynamicshortcuts.h \
|
src/dynamic-shortcuts/dynamicshortcuts.h \
|
||||||
src/dynamic-shortcuts/dynamicshortcutswidget.h \
|
src/dynamic-shortcuts/dynamicshortcutswidget.h \
|
||||||
@ -319,7 +318,10 @@ HEADERS += src/core/feeddownloader.h \
|
|||||||
src/gui/settings/settingsbrowsermail.h \
|
src/gui/settings/settingsbrowsermail.h \
|
||||||
src/gui/settings/settingsfeedsmessages.h \
|
src/gui/settings/settingsfeedsmessages.h \
|
||||||
src/gui/settings/settingsdownloads.h \
|
src/gui/settings/settingsdownloads.h \
|
||||||
src/miscellaneous/feedreader.h
|
src/miscellaneous/feedreader.h \
|
||||||
|
src/core/atomparser.h \
|
||||||
|
src/core/rssparser.h \
|
||||||
|
src/core/rdfparser.h
|
||||||
|
|
||||||
SOURCES += src/core/feeddownloader.cpp \
|
SOURCES += src/core/feeddownloader.cpp \
|
||||||
src/core/feedsmodel.cpp \
|
src/core/feedsmodel.cpp \
|
||||||
@ -327,7 +329,6 @@ SOURCES += src/core/feeddownloader.cpp \
|
|||||||
src/core/message.cpp \
|
src/core/message.cpp \
|
||||||
src/core/messagesmodel.cpp \
|
src/core/messagesmodel.cpp \
|
||||||
src/core/messagesproxymodel.cpp \
|
src/core/messagesproxymodel.cpp \
|
||||||
src/core/parsingfactory.cpp \
|
|
||||||
src/dynamic-shortcuts/dynamicshortcuts.cpp \
|
src/dynamic-shortcuts/dynamicshortcuts.cpp \
|
||||||
src/dynamic-shortcuts/dynamicshortcutswidget.cpp \
|
src/dynamic-shortcuts/dynamicshortcutswidget.cpp \
|
||||||
src/dynamic-shortcuts/shortcutbutton.cpp \
|
src/dynamic-shortcuts/shortcutbutton.cpp \
|
||||||
@ -434,7 +435,10 @@ SOURCES += src/core/feeddownloader.cpp \
|
|||||||
src/gui/settings/settingsbrowsermail.cpp \
|
src/gui/settings/settingsbrowsermail.cpp \
|
||||||
src/gui/settings/settingsfeedsmessages.cpp \
|
src/gui/settings/settingsfeedsmessages.cpp \
|
||||||
src/gui/settings/settingsdownloads.cpp \
|
src/gui/settings/settingsdownloads.cpp \
|
||||||
src/miscellaneous/feedreader.cpp
|
src/miscellaneous/feedreader.cpp \
|
||||||
|
src/core/atomparser.cpp \
|
||||||
|
src/core/rssparser.cpp \
|
||||||
|
src/core/rdfparser.cpp
|
||||||
|
|
||||||
FORMS += src/gui/toolbareditor.ui \
|
FORMS += src/gui/toolbareditor.ui \
|
||||||
src/network-web/downloaditem.ui \
|
src/network-web/downloaditem.ui \
|
||||||
|
120
src/core/atomparser.cpp
Normal file
120
src/core/atomparser.cpp
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
// This file is part of RSS Guard.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
||||||
|
//
|
||||||
|
// RSS Guard is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// RSS Guard is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "core/atomparser.h"
|
||||||
|
|
||||||
|
#include "miscellaneous/textfactory.h"
|
||||||
|
#include "network-web/webfactory.h"
|
||||||
|
|
||||||
|
#include <QDomDocument>
|
||||||
|
|
||||||
|
|
||||||
|
AtomParser::AtomParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomParser::~AtomParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<Message> AtomParser::parseXmlData(const QString &data) {
|
||||||
|
QList<Message> messages;
|
||||||
|
QDomDocument xml_file;
|
||||||
|
QDateTime current_time = QDateTime::currentDateTime();
|
||||||
|
const QString atom_ns = QSL("http://www.w3.org/2005/Atom");
|
||||||
|
|
||||||
|
xml_file.setContent(data, true);
|
||||||
|
|
||||||
|
// Pull out all messages.
|
||||||
|
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("entry"));
|
||||||
|
|
||||||
|
for (int i = 0; i < messages_in_xml.size(); i++) {
|
||||||
|
QDomNode message_item = messages_in_xml.item(i);
|
||||||
|
Message new_message;
|
||||||
|
|
||||||
|
// Deal with titles & descriptions.
|
||||||
|
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
||||||
|
QString elem_summary = message_item.namedItem(QSL("summary")).toElement().text();
|
||||||
|
|
||||||
|
if (elem_summary.isEmpty()) {
|
||||||
|
elem_summary = message_item.namedItem(QSL("content")).toElement().text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we obtained maximum of information for title & description.
|
||||||
|
if (elem_title.isEmpty()) {
|
||||||
|
if (elem_summary.isEmpty()) {
|
||||||
|
// BOTH title and description are empty, skip this message.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is empty but description is not.
|
||||||
|
new_message.m_title = WebFactory::instance()->stripTags(elem_summary.simplified());
|
||||||
|
new_message.m_contents = elem_summary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is not empty, description does not matter.
|
||||||
|
new_message.m_title = WebFactory::instance()->stripTags(elem_title);
|
||||||
|
new_message.m_contents = elem_summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deal with link.
|
||||||
|
QDomNodeList elem_links = message_item.toElement().elementsByTagName(QSL("link"));
|
||||||
|
|
||||||
|
for (int i = 0; i < elem_links.size(); i++) {
|
||||||
|
QDomElement link = elem_links.at(i).toElement();
|
||||||
|
|
||||||
|
if (link.attribute(QSL("rel")) == QSL("enclosure")) {
|
||||||
|
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
|
||||||
|
|
||||||
|
qDebug("Adding enclosure '%s' for the message.", qPrintable(new_message.m_enclosures.last().m_url));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
new_message.m_url = link.attribute(QSL("href"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
||||||
|
new_message.m_url = new_message.m_enclosures.first().m_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deal with authors.
|
||||||
|
new_message.m_author = WebFactory::instance()->escapeHtml(message_item.namedItem(QSL("author")).namedItem(QSL("name")).toElement().text());
|
||||||
|
|
||||||
|
// Deal with creation date.
|
||||||
|
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("updated")).toElement().text());
|
||||||
|
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
||||||
|
|
||||||
|
if (!new_message.m_createdFromFeed) {
|
||||||
|
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
||||||
|
new_message.m_created = current_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
// WARNING: There is a difference between "" and QString() in terms of nullptr SQL values!
|
||||||
|
// This is because of difference in QString::isNull() and QString::isEmpty(), the "" is not null
|
||||||
|
// while QString() is.
|
||||||
|
if (new_message.m_author.isNull()) {
|
||||||
|
new_message.m_author = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_url.isNull()) {
|
||||||
|
new_message.m_url = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.append(new_message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return messages;
|
||||||
|
}
|
31
src/core/parsingfactory.h → src/core/atomparser.h
Executable file → Normal file
31
src/core/parsingfactory.h → src/core/atomparser.h
Executable file → Normal file
@ -15,33 +15,20 @@
|
|||||||
// You should have received a copy of the GNU General Public License
|
// You should have received a copy of the GNU General Public License
|
||||||
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#ifndef PARSINGFACTORY_H
|
#ifndef ATOMPARSER_H
|
||||||
#define PARSINGFACTORY_H
|
#define ATOMPARSER_H
|
||||||
|
|
||||||
#include "core/messagesmodel.h"
|
#include "core/message.h"
|
||||||
|
|
||||||
#include <QList>
|
#include <QList>
|
||||||
|
|
||||||
|
|
||||||
// This class contains methods to
|
class AtomParser {
|
||||||
// parse input Unicode textual data into
|
|
||||||
// another objects.
|
|
||||||
//
|
|
||||||
// NOTE: Each parsed message MUST CONTAINT THESE FIELDS (fields
|
|
||||||
// of Message class:
|
|
||||||
// a) m_created,
|
|
||||||
// b) m_title.
|
|
||||||
class ParsingFactory {
|
|
||||||
private:
|
|
||||||
// Constructors and destructors.
|
|
||||||
explicit ParsingFactory();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Parses input textual data into Message objects.
|
explicit AtomParser();
|
||||||
// NOTE: Input is correctly encoded in Unicode.
|
virtual ~AtomParser();
|
||||||
static QList<Message> parseAsATOM10(const QString &data);
|
|
||||||
static QList<Message> parseAsRDF(const QString &data);
|
QList<Message> parseXmlData(const QString &data);
|
||||||
static QList<Message> parseAsRSS20(const QString &data);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // PARSINGFACTORY_H
|
#endif // ATOMPARSER_H
|
@ -1,281 +0,0 @@
|
|||||||
// This file is part of RSS Guard.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
|
||||||
//
|
|
||||||
// RSS Guard is free software: you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation, either version 3 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// RSS Guard is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
#include "core/parsingfactory.h"
|
|
||||||
|
|
||||||
#include "miscellaneous/textfactory.h"
|
|
||||||
#include "network-web/webfactory.h"
|
|
||||||
|
|
||||||
#include <QDomDocument>
|
|
||||||
#include <QDomElement>
|
|
||||||
|
|
||||||
|
|
||||||
ParsingFactory::ParsingFactory() {
|
|
||||||
}
|
|
||||||
|
|
||||||
QList<Message> ParsingFactory::parseAsATOM10(const QString &data) {
|
|
||||||
QList<Message> messages;
|
|
||||||
QDomDocument xml_file;
|
|
||||||
QDateTime current_time = QDateTime::currentDateTime();
|
|
||||||
|
|
||||||
xml_file.setContent(data, true);
|
|
||||||
|
|
||||||
// Pull out all messages.
|
|
||||||
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("entry"));
|
|
||||||
|
|
||||||
for (int i = 0; i < messages_in_xml.size(); i++) {
|
|
||||||
QDomNode message_item = messages_in_xml.item(i);
|
|
||||||
Message new_message;
|
|
||||||
|
|
||||||
// Deal with titles & descriptions.
|
|
||||||
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
|
||||||
QString elem_summary = message_item.namedItem(QSL("summary")).toElement().text();
|
|
||||||
|
|
||||||
if (elem_summary.isEmpty()) {
|
|
||||||
elem_summary = message_item.namedItem(QSL("content")).toElement().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we obtained maximum of information for title & description.
|
|
||||||
if (elem_title.isEmpty()) {
|
|
||||||
if (elem_summary.isEmpty()) {
|
|
||||||
// BOTH title and description are empty, skip this message.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is empty but description is not.
|
|
||||||
new_message.m_title = WebFactory::instance()->stripTags(elem_summary.simplified());
|
|
||||||
new_message.m_contents = elem_summary;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is not empty, description does not matter.
|
|
||||||
new_message.m_title = WebFactory::instance()->stripTags(elem_title);
|
|
||||||
new_message.m_contents = elem_summary;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with link.
|
|
||||||
QDomNodeList elem_links = message_item.toElement().elementsByTagName(QSL("link"));
|
|
||||||
|
|
||||||
for (int i = 0; i < elem_links.size(); i++) {
|
|
||||||
QDomElement link = elem_links.at(i).toElement();
|
|
||||||
|
|
||||||
if (link.attribute(QSL("rel")) == QSL("enclosure")) {
|
|
||||||
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
|
|
||||||
|
|
||||||
qDebug("Adding enclosure '%s' for the message.", qPrintable(new_message.m_enclosures.last().m_url));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
new_message.m_url = link.attribute(QSL("href"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
|
||||||
new_message.m_url = new_message.m_enclosures.first().m_url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with authors.
|
|
||||||
new_message.m_author = WebFactory::instance()->escapeHtml(message_item.namedItem(QSL("author")).namedItem(QSL("name")).toElement().text());
|
|
||||||
|
|
||||||
// Deal with creation date.
|
|
||||||
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("updated")).toElement().text());
|
|
||||||
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
|
||||||
|
|
||||||
if (!new_message.m_createdFromFeed) {
|
|
||||||
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
|
||||||
new_message.m_created = current_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
// WARNING: There is a difference between "" and QString() in terms of nullptr SQL values!
|
|
||||||
// This is because of difference in QString::isNull() and QString::isEmpty(), the "" is not null
|
|
||||||
// while QString() is.
|
|
||||||
if (new_message.m_author.isNull()) {
|
|
||||||
new_message.m_author = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_url.isNull()) {
|
|
||||||
new_message.m_url = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.append(new_message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return messages;
|
|
||||||
}
|
|
||||||
|
|
||||||
QList<Message> ParsingFactory::parseAsRDF(const QString &data) {
|
|
||||||
QList<Message> messages;
|
|
||||||
QDomDocument xml_file;
|
|
||||||
QDateTime current_time = QDateTime::currentDateTime();
|
|
||||||
|
|
||||||
xml_file.setContent(data, true);
|
|
||||||
|
|
||||||
// Pull out all messages.
|
|
||||||
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item"));
|
|
||||||
|
|
||||||
for (int i = 0; i < messages_in_xml.size(); i++) {
|
|
||||||
QDomNode message_item = messages_in_xml.item(i);
|
|
||||||
Message new_message;
|
|
||||||
|
|
||||||
// Deal with title and description.
|
|
||||||
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
|
||||||
QString elem_description = message_item.namedItem(QSL("description")).toElement().text();
|
|
||||||
|
|
||||||
// Now we obtained maximum of information for title & description.
|
|
||||||
if (elem_title.isEmpty()) {
|
|
||||||
if (elem_description.isEmpty()) {
|
|
||||||
// BOTH title and description are empty, skip this message.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is empty but description is not.
|
|
||||||
new_message.m_title = WebFactory::instance()->escapeHtml(WebFactory::instance()->stripTags(elem_description.simplified()));
|
|
||||||
new_message.m_contents = elem_description;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is really not empty, description does not matter.
|
|
||||||
new_message.m_title = WebFactory::instance()->escapeHtml(WebFactory::instance()->stripTags(elem_title));
|
|
||||||
new_message.m_contents = elem_description;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Deal with link and author.
|
|
||||||
new_message.m_url = message_item.namedItem(QSL("link")).toElement().text();
|
|
||||||
new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text();
|
|
||||||
|
|
||||||
// Deal with creation date.
|
|
||||||
QString elem_updated = message_item.namedItem(QSL("date")).toElement().text();
|
|
||||||
|
|
||||||
if (elem_updated.isEmpty()) {
|
|
||||||
elem_updated = message_item.namedItem(QSL("dc:date")).toElement().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with creation date.
|
|
||||||
new_message.m_created = TextFactory::parseDateTime(elem_updated);
|
|
||||||
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
|
||||||
|
|
||||||
if (!new_message.m_createdFromFeed) {
|
|
||||||
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
|
||||||
new_message.m_created = current_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_author.isNull()) {
|
|
||||||
new_message.m_author = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_url.isNull()) {
|
|
||||||
new_message.m_url = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.append(new_message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return messages;
|
|
||||||
}
|
|
||||||
|
|
||||||
QList<Message> ParsingFactory::parseAsRSS20(const QString &data) {
|
|
||||||
QList<Message> messages;
|
|
||||||
QDomDocument xml_file;
|
|
||||||
QDateTime current_time = QDateTime::currentDateTime();
|
|
||||||
|
|
||||||
xml_file.setContent(data, true);
|
|
||||||
|
|
||||||
// Pull out all messages.
|
|
||||||
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item"));
|
|
||||||
|
|
||||||
for (int i = 0; i < messages_in_xml.size(); i++) {
|
|
||||||
QDomNode message_item = messages_in_xml.item(i);
|
|
||||||
Message new_message;
|
|
||||||
|
|
||||||
// Deal with titles & descriptions.
|
|
||||||
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
|
||||||
QString elem_description = message_item.namedItem(QSL("encoded")).toElement().text();
|
|
||||||
QString elem_enclosure = message_item.namedItem(QSL("enclosure")).toElement().attribute(QSL("url"));
|
|
||||||
QString elem_enclosure_type = message_item.namedItem(QSL("enclosure")).toElement().attribute(QSL("type"));
|
|
||||||
|
|
||||||
if (elem_description.isEmpty()) {
|
|
||||||
elem_description = message_item.namedItem(QSL("description")).toElement().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we obtained maximum of information for title & description.
|
|
||||||
if (elem_title.isEmpty()) {
|
|
||||||
if (elem_description.isEmpty()) {
|
|
||||||
// BOTH title and description are empty, skip this message.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is empty but description is not.
|
|
||||||
new_message.m_title = WebFactory::instance()->stripTags(elem_description.simplified());
|
|
||||||
new_message.m_contents = elem_description;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Title is really not empty, description does not matter.
|
|
||||||
new_message.m_title = WebFactory::instance()->stripTags(elem_title);
|
|
||||||
new_message.m_contents = elem_description;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!elem_enclosure.isEmpty()) {
|
|
||||||
new_message.m_enclosures.append(Enclosure(elem_enclosure, elem_enclosure_type));
|
|
||||||
|
|
||||||
qDebug("Adding enclosure '%s' for the message.", qPrintable(elem_enclosure));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with link and author.
|
|
||||||
new_message.m_url = message_item.namedItem(QSL("link")).toElement().text();
|
|
||||||
|
|
||||||
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
|
||||||
new_message.m_url = new_message.m_enclosures.first().m_url;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_url.isEmpty()) {
|
|
||||||
// Try to get "href" attribute.
|
|
||||||
new_message.m_url = message_item.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
|
||||||
}
|
|
||||||
|
|
||||||
new_message.m_author = message_item.namedItem(QSL("author")).toElement().text();
|
|
||||||
|
|
||||||
if (new_message.m_author.isEmpty()) {
|
|
||||||
new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with creation date.
|
|
||||||
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("pubDate")).toElement().text());
|
|
||||||
|
|
||||||
if (new_message.m_created.isNull()) {
|
|
||||||
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("date")).toElement().text());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(new_message.m_createdFromFeed = !new_message.m_created.isNull())) {
|
|
||||||
// Date was NOT obtained from the feed,
|
|
||||||
// set current date as creation date for the message.
|
|
||||||
new_message.m_created = current_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_author.isNull()) {
|
|
||||||
new_message.m_author = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_message.m_url.isNull()) {
|
|
||||||
new_message.m_url = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.append(new_message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return messages;
|
|
||||||
}
|
|
101
src/core/rdfparser.cpp
Normal file
101
src/core/rdfparser.cpp
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
// This file is part of RSS Guard.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
||||||
|
//
|
||||||
|
// RSS Guard is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// RSS Guard is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "core/rdfparser.h"
|
||||||
|
|
||||||
|
#include "miscellaneous/textfactory.h"
|
||||||
|
#include "network-web/webfactory.h"
|
||||||
|
|
||||||
|
#include <QDomDocument>
|
||||||
|
|
||||||
|
|
||||||
|
RdfParser::RdfParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
RdfParser::~RdfParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<Message> RdfParser::parseXmlData(const QString &data) {
|
||||||
|
QList<Message> messages;
|
||||||
|
QDomDocument xml_file;
|
||||||
|
QDateTime current_time = QDateTime::currentDateTime();
|
||||||
|
|
||||||
|
xml_file.setContent(data, true);
|
||||||
|
|
||||||
|
// Pull out all messages.
|
||||||
|
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item"));
|
||||||
|
|
||||||
|
for (int i = 0; i < messages_in_xml.size(); i++) {
|
||||||
|
QDomNode message_item = messages_in_xml.item(i);
|
||||||
|
Message new_message;
|
||||||
|
|
||||||
|
// Deal with title and description.
|
||||||
|
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
||||||
|
QString elem_description = message_item.namedItem(QSL("description")).toElement().text();
|
||||||
|
|
||||||
|
// Now we obtained maximum of information for title & description.
|
||||||
|
if (elem_title.isEmpty()) {
|
||||||
|
if (elem_description.isEmpty()) {
|
||||||
|
// BOTH title and description are empty, skip this message.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is empty but description is not.
|
||||||
|
new_message.m_title = WebFactory::instance()->escapeHtml(WebFactory::instance()->stripTags(elem_description.simplified()));
|
||||||
|
new_message.m_contents = elem_description;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is really not empty, description does not matter.
|
||||||
|
new_message.m_title = WebFactory::instance()->escapeHtml(WebFactory::instance()->stripTags(elem_title));
|
||||||
|
new_message.m_contents = elem_description;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Deal with link and author.
|
||||||
|
new_message.m_url = message_item.namedItem(QSL("link")).toElement().text();
|
||||||
|
new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text();
|
||||||
|
|
||||||
|
// Deal with creation date.
|
||||||
|
QString elem_updated = message_item.namedItem(QSL("date")).toElement().text();
|
||||||
|
|
||||||
|
if (elem_updated.isEmpty()) {
|
||||||
|
elem_updated = message_item.namedItem(QSL("dc:date")).toElement().text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deal with creation date.
|
||||||
|
new_message.m_created = TextFactory::parseDateTime(elem_updated);
|
||||||
|
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
||||||
|
|
||||||
|
if (!new_message.m_createdFromFeed) {
|
||||||
|
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
||||||
|
new_message.m_created = current_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_author.isNull()) {
|
||||||
|
new_message.m_author = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_url.isNull()) {
|
||||||
|
new_message.m_url = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.append(new_message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return messages;
|
||||||
|
}
|
34
src/core/rdfparser.h
Normal file
34
src/core/rdfparser.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// This file is part of RSS Guard.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
||||||
|
//
|
||||||
|
// RSS Guard is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// RSS Guard is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef RDFPARSER_H
|
||||||
|
#define RDFPARSER_H
|
||||||
|
|
||||||
|
#include "core/message.h"
|
||||||
|
|
||||||
|
#include <QList>
|
||||||
|
|
||||||
|
|
||||||
|
class RdfParser {
|
||||||
|
public:
|
||||||
|
explicit RdfParser();
|
||||||
|
virtual ~RdfParser();
|
||||||
|
|
||||||
|
QList<Message> parseXmlData(const QString &data);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // RDFPARSER_H
|
123
src/core/rssparser.cpp
Normal file
123
src/core/rssparser.cpp
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
// This file is part of RSS Guard.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
||||||
|
//
|
||||||
|
// RSS Guard is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// RSS Guard is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "core/rssparser.h"
|
||||||
|
|
||||||
|
#include "miscellaneous/textfactory.h"
|
||||||
|
#include "network-web/webfactory.h"
|
||||||
|
|
||||||
|
#include <QDomDocument>
|
||||||
|
|
||||||
|
|
||||||
|
RssParser::RssParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
RssParser::~RssParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<Message> RssParser::parseXmlData(const QString &data) {
|
||||||
|
QList<Message> messages;
|
||||||
|
QDomDocument xml_file;
|
||||||
|
QDateTime current_time = QDateTime::currentDateTime();
|
||||||
|
|
||||||
|
xml_file.setContent(data, true);
|
||||||
|
|
||||||
|
// Pull out all messages.
|
||||||
|
QDomNodeList messages_in_xml = xml_file.elementsByTagName(QSL("item"));
|
||||||
|
|
||||||
|
for (int i = 0; i < messages_in_xml.size(); i++) {
|
||||||
|
QDomNode message_item = messages_in_xml.item(i);
|
||||||
|
Message new_message;
|
||||||
|
|
||||||
|
// Deal with titles & descriptions.
|
||||||
|
QString elem_title = message_item.namedItem(QSL("title")).toElement().text().simplified();
|
||||||
|
QString elem_description = message_item.namedItem(QSL("encoded")).toElement().text();
|
||||||
|
QString elem_enclosure = message_item.namedItem(QSL("enclosure")).toElement().attribute(QSL("url"));
|
||||||
|
QString elem_enclosure_type = message_item.namedItem(QSL("enclosure")).toElement().attribute(QSL("type"));
|
||||||
|
|
||||||
|
if (elem_description.isEmpty()) {
|
||||||
|
elem_description = message_item.namedItem(QSL("description")).toElement().text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we obtained maximum of information for title & description.
|
||||||
|
if (elem_title.isEmpty()) {
|
||||||
|
if (elem_description.isEmpty()) {
|
||||||
|
// BOTH title and description are empty, skip this message.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is empty but description is not.
|
||||||
|
new_message.m_title = WebFactory::instance()->stripTags(elem_description.simplified());
|
||||||
|
new_message.m_contents = elem_description;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Title is really not empty, description does not matter.
|
||||||
|
new_message.m_title = WebFactory::instance()->stripTags(elem_title);
|
||||||
|
new_message.m_contents = elem_description;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!elem_enclosure.isEmpty()) {
|
||||||
|
new_message.m_enclosures.append(Enclosure(elem_enclosure, elem_enclosure_type));
|
||||||
|
|
||||||
|
qDebug("Adding enclosure '%s' for the message.", qPrintable(elem_enclosure));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deal with link and author.
|
||||||
|
new_message.m_url = message_item.namedItem(QSL("link")).toElement().text();
|
||||||
|
|
||||||
|
if (new_message.m_url.isEmpty() && !new_message.m_enclosures.isEmpty()) {
|
||||||
|
new_message.m_url = new_message.m_enclosures.first().m_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_url.isEmpty()) {
|
||||||
|
// Try to get "href" attribute.
|
||||||
|
new_message.m_url = message_item.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
||||||
|
}
|
||||||
|
|
||||||
|
new_message.m_author = message_item.namedItem(QSL("author")).toElement().text();
|
||||||
|
|
||||||
|
if (new_message.m_author.isEmpty()) {
|
||||||
|
new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deal with creation date.
|
||||||
|
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("pubDate")).toElement().text());
|
||||||
|
|
||||||
|
if (new_message.m_created.isNull()) {
|
||||||
|
new_message.m_created = TextFactory::parseDateTime(message_item.namedItem(QSL("date")).toElement().text());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(new_message.m_createdFromFeed = !new_message.m_created.isNull())) {
|
||||||
|
// Date was NOT obtained from the feed,
|
||||||
|
// set current date as creation date for the message.
|
||||||
|
new_message.m_created = current_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_author.isNull()) {
|
||||||
|
new_message.m_author = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_message.m_url.isNull()) {
|
||||||
|
new_message.m_url = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.append(new_message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return messages;
|
||||||
|
}
|
34
src/core/rssparser.h
Normal file
34
src/core/rssparser.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// This file is part of RSS Guard.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2011-2016 by Martin Rotter <rotter.martinos@gmail.com>
|
||||||
|
//
|
||||||
|
// RSS Guard is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// RSS Guard is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with RSS Guard. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef RSSPARSER_H
|
||||||
|
#define RSSPARSER_H
|
||||||
|
|
||||||
|
#include "core/message.h"
|
||||||
|
|
||||||
|
#include <QList>
|
||||||
|
|
||||||
|
|
||||||
|
class RssParser {
|
||||||
|
public:
|
||||||
|
explicit RssParser();
|
||||||
|
virtual ~RssParser();
|
||||||
|
|
||||||
|
QList<Message> parseXmlData(const QString &data);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // RSSPARSER_H
|
@ -18,7 +18,9 @@
|
|||||||
#include "services/standard/standardfeed.h"
|
#include "services/standard/standardfeed.h"
|
||||||
|
|
||||||
#include "definitions/definitions.h"
|
#include "definitions/definitions.h"
|
||||||
#include "core/parsingfactory.h"
|
#include "core/rssparser.h"
|
||||||
|
#include "core/rdfparser.h"
|
||||||
|
#include "core/atomparser.h"
|
||||||
#include "core/feedsmodel.h"
|
#include "core/feedsmodel.h"
|
||||||
#include "miscellaneous/databasequeries.h"
|
#include "miscellaneous/databasequeries.h"
|
||||||
#include "miscellaneous/textfactory.h"
|
#include "miscellaneous/textfactory.h"
|
||||||
@ -444,15 +446,15 @@ QList<Message> StandardFeed::obtainNewMessages(bool *error_during_obtaining) {
|
|||||||
switch (type()) {
|
switch (type()) {
|
||||||
case StandardFeed::Rss0X:
|
case StandardFeed::Rss0X:
|
||||||
case StandardFeed::Rss2X:
|
case StandardFeed::Rss2X:
|
||||||
messages = ParsingFactory::parseAsRSS20(formatted_feed_contents);
|
messages = RssParser().parseXmlData(formatted_feed_contents);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case StandardFeed::Rdf:
|
case StandardFeed::Rdf:
|
||||||
messages = ParsingFactory::parseAsRDF(formatted_feed_contents);
|
messages = RdfParser().parseXmlData(formatted_feed_contents);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case StandardFeed::Atom10:
|
case StandardFeed::Atom10:
|
||||||
messages = ParsingFactory::parseAsATOM10(formatted_feed_contents);
|
messages = AtomParser().parseXmlData(formatted_feed_contents);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user