From 617040c275fb4785b22ae6120b754afd42b53796 Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Sun, 6 Feb 2022 19:39:25 +0100 Subject: [PATCH] unify json parser --- .../services/standard/parsers/atomparser.cpp | 28 +- .../services/standard/parsers/atomparser.h | 16 +- .../services/standard/parsers/feedparser.cpp | 244 +++++++++++++----- .../services/standard/parsers/feedparser.h | 48 ++-- .../services/standard/parsers/jsonparser.cpp | 127 +++++---- .../services/standard/parsers/jsonparser.h | 19 +- .../services/standard/parsers/rdfparser.cpp | 16 +- .../services/standard/parsers/rdfparser.h | 16 +- .../services/standard/parsers/rssparser.cpp | 20 +- .../services/standard/parsers/rssparser.h | 16 +- 10 files changed, 351 insertions(+), 199 deletions(-) diff --git a/src/librssguard/services/standard/parsers/atomparser.cpp b/src/librssguard/services/standard/parsers/atomparser.cpp index 2283c17c4..cf33568c0 100644 --- a/src/librssguard/services/standard/parsers/atomparser.cpp +++ b/src/librssguard/services/standard/parsers/atomparser.cpp @@ -34,7 +34,7 @@ QString AtomParser::feedAuthor() const { return {}; } -QString AtomParser::messageAuthor(const QDomElement& msg_element) const { +QString AtomParser::xmlMessageAuthor(const QDomElement& msg_element) const { QDomNodeList authors = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("author")); QStringList author_str; @@ -53,43 +53,43 @@ QString AtomParser::atomNamespace() const { return m_atomNamespace; } -QDomNodeList AtomParser::messageElements() { +QDomNodeList AtomParser::xmlMessageElements() { return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry")); } -QString AtomParser::messageTitle(const QDomElement& msg_element) const { - return textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", ")); +QString AtomParser::xmlMessageTitle(const QDomElement& msg_element) const { + return xmlTextsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", ")); } -QString AtomParser::messageDescription(const QDomElement& msg_element) const { - QString summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement()); +QString AtomParser::xmlMessageDescription(const QDomElement& msg_element) const { + QString summary = xmlRawChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement()); if (summary.isEmpty()) { - summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement()); + summary = xmlRawChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement()); if (summary.isEmpty()) { - summary = rawXmlChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement()); + summary = xmlRawChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement()); } } return summary; } -QDateTime AtomParser::messageDateCreated(const QDomElement& msg_element) const { - QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", ")); +QDateTime AtomParser::xmlMessageDateCreated(const QDomElement& msg_element) const { + QString updated = xmlTextsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", ")); if (updated.simplified().isEmpty()) { - updated = textsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", ")); + updated = xmlTextsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", ")); } return TextFactory::parseDateTime(updated); } -QString AtomParser::messageId(const QDomElement& msg_element) const { +QString AtomParser::xmlMessageId(const QDomElement& msg_element) const { return msg_element.elementsByTagNameNS(m_atomNamespace, QSL("id")).at(0).toElement().text(); } -QString AtomParser::messageUrl(const QDomElement& msg_element) const { +QString AtomParser::xmlMessageUrl(const QDomElement& msg_element) const { QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link")); QString last_link_other; @@ -113,7 +113,7 @@ QString AtomParser::messageUrl(const QDomElement& msg_element) const { } } -QList AtomParser::messageEnclosures(const QDomElement& msg_element) const { +QList AtomParser::xmlMessageEnclosures(const QDomElement& msg_element) const { QList enclosures; QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link")); diff --git a/src/librssguard/services/standard/parsers/atomparser.h b/src/librssguard/services/standard/parsers/atomparser.h index f27b0b4dc..861218480 100644 --- a/src/librssguard/services/standard/parsers/atomparser.h +++ b/src/librssguard/services/standard/parsers/atomparser.h @@ -17,14 +17,14 @@ class AtomParser : public FeedParser { QString atomNamespace() const; protected: - virtual QString messageTitle(const QDomElement& msg_element) const; - virtual QString messageDescription(const QDomElement& msg_element) const; - virtual QDateTime messageDateCreated(const QDomElement& msg_element) const; - virtual QString messageId(const QDomElement& msg_element) const; - virtual QString messageUrl(const QDomElement& msg_element) const; - virtual QList messageEnclosures(const QDomElement& msg_element) const; - virtual QDomNodeList messageElements(); - virtual QString messageAuthor(const QDomElement& msg_element) const; + virtual QString xmlMessageTitle(const QDomElement& msg_element) const; + virtual QString xmlMessageDescription(const QDomElement& msg_element) const; + virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const; + virtual QString xmlMessageId(const QDomElement& msg_element) const; + virtual QString xmlMessageUrl(const QDomElement& msg_element) const; + virtual QList xmlMessageEnclosures(const QDomElement& msg_element) const; + virtual QDomNodeList xmlMessageElements(); + virtual QString xmlMessageAuthor(const QDomElement& msg_element) const; virtual QString feedAuthor() const; private: diff --git a/src/librssguard/services/standard/parsers/feedparser.cpp b/src/librssguard/services/standard/parsers/feedparser.cpp index 9aa9b640b..6bc4138e5 100644 --- a/src/librssguard/services/standard/parsers/feedparser.cpp +++ b/src/librssguard/services/standard/parsers/feedparser.cpp @@ -3,6 +3,7 @@ #include "services/standard/parsers/feedparser.h" #include "exceptions/applicationexception.h" +#include "exceptions/feedfetchexception.h" #include "miscellaneous/application.h" #include "network-web/webfactory.h" #include "services/standard/definitions.h" @@ -12,15 +13,30 @@ #include -FeedParser::FeedParser(QString data) : m_xmlData(std::move(data)), m_mrssNamespace(QSL("http://search.yahoo.com/mrss/")) { - QString error; +FeedParser::FeedParser(QString data, bool is_xml) : m_isXml(is_xml), m_data(std::move(data)), + m_mrssNamespace(QSL("http://search.yahoo.com/mrss/")) { - if (!m_xml.setContent(m_xmlData, true, &error)) { - throw ApplicationException(QObject::tr("XML problem: %1").arg(error)); + if (m_isXml) { + // XML. + QString error; + + if (!m_xml.setContent(m_data, true, &error)) { + throw FeedFetchException(Feed::Status::ParsingError, QObject::tr("XML problem: %1").arg(error)); + } + } + else { + // JSON. + QJsonParseError err; + + m_json = QJsonDocument::fromJson(m_data.toUtf8(), &err); + + if (m_json.isNull() && err.error != QJsonParseError::ParseError::NoError) { + throw FeedFetchException(Feed::Status::ParsingError, QObject::tr("JSON problem: %1").arg(err.errorString())); + } } } -QString FeedParser::messageRawContents(const QDomElement& msg_element) const { +QString FeedParser::xmlMessageRawContents(const QDomElement& msg_element) const { QString raw_contents; QTextStream str(&raw_contents); @@ -28,74 +44,154 @@ QString FeedParser::messageRawContents(const QDomElement& msg_element) const { return raw_contents; } +QJsonArray FeedParser::jsonMessageElements() { + return {}; +} + +QString FeedParser::jsonMessageTitle(const QJsonObject& msg_element) const { + return {}; +} + +QString FeedParser::jsonMessageUrl(const QJsonObject& msg_element) const { + return {}; +} + +QString FeedParser::jsonMessageDescription(const QJsonObject& msg_element) const { + return {}; +} + +QString FeedParser::jsonMessageAuthor(const QJsonObject& msg_element) const { + return {}; +} + +QDateTime FeedParser::jsonMessageDateCreated(const QJsonObject& msg_element) const { + return {}; +} + +QString FeedParser::jsonMessageId(const QJsonObject& msg_element) const { + return {}; +} + +QList FeedParser::jsonMessageEnclosures(const QJsonObject& msg_element) const { + return {}; +} + +QString FeedParser::jsonMessageRawContents(const QJsonObject& msg_element) const { + return {}; +} + QList FeedParser::messages() { QString feed_author = feedAuthor(); QList messages; QDateTime current_time = QDateTime::currentDateTime(); // Pull out all messages. - QDomNodeList messages_in_xml = messageElements(); + if (m_isXml) { + QDomNodeList messages_in_xml = xmlMessageElements(); - for (int i = 0; i < messages_in_xml.size(); i++) { - QDomElement message_item = messages_in_xml.item(i).toElement(); + for (int i = 0; i < messages_in_xml.size(); i++) { + QDomElement message_item = messages_in_xml.item(i).toElement(); - try { - Message new_message; + try { + Message new_message; - // Fill available data. - new_message.m_title = qApp->web()->unescapeHtml(messageTitle(message_item)); - new_message.m_contents = messageDescription(message_item); - new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(message_item)); - new_message.m_url = messageUrl(message_item); - new_message.m_created = messageDateCreated(message_item); - new_message.m_customId = messageId(message_item); - new_message.m_rawContents = messageRawContents(message_item); - new_message.m_enclosures = messageEnclosures(message_item); - new_message.m_enclosures.append(mrssGetEnclosures(message_item)); + // Fill available data. + new_message.m_title = qApp->web()->unescapeHtml(xmlMessageTitle(message_item)); + new_message.m_contents = xmlMessageDescription(message_item); + new_message.m_author = qApp->web()->unescapeHtml(xmlMessageAuthor(message_item)); + new_message.m_url = xmlMessageUrl(message_item); + new_message.m_created = xmlMessageDateCreated(message_item); + new_message.m_customId = xmlMessageId(message_item); + new_message.m_rawContents = xmlMessageRawContents(message_item); + new_message.m_enclosures = xmlMessageEnclosures(message_item); + new_message.m_enclosures.append(xmlMrssGetEnclosures(message_item)); - // Fixup missing data. - // - // NOTE: Message must have "title" field, otherwise it is skipped. - - // Author. - if (new_message.m_author.isEmpty() && !feed_author.isEmpty()) { - new_message.m_author = feed_author; + messages.append(new_message); } - - // Created date. - new_message.m_createdFromFeed = !new_message.m_created.isNull(); - - if (!new_message.m_createdFromFeed) { - // Date was NOT obtained from the feed, set current date as creation date for the message. - // NOTE: Date is lessened by 1 second for each message to allow for more - // stable sorting. - new_message.m_created = current_time.addSecs(-1); - current_time = new_message.m_created; + catch (const ApplicationException& ex) { + qDebugNN << LOGSEC_CORE + << "Problem when extracting XML message: " + << ex.message(); } - - // Enclosures. - for (Enclosure& enc : new_message.m_enclosures) { - if (enc.m_mimeType.simplified().isEmpty()) { - enc.m_mimeType = QSL(DEFAULT_ENCLOSURE_MIME_TYPE); - } - } - - // Url. - new_message.m_url = new_message.m_url.replace(QRegularExpression(QSL("[\\t\\n]")), QString()); - - messages.append(new_message); } - catch (const ApplicationException& ex) { - qDebugNN << LOGSEC_CORE - << "Problem when extracting message: " - << ex.message(); + } + else { + QJsonArray messages_in_json = jsonMessageElements(); + + for (int i = 0; i < messages_in_json.size(); i++) { + QJsonObject message_item = messages_in_json.at(i).toObject(); + + try { + Message new_message; + + // Fill available data. + new_message.m_title = qApp->web()->unescapeHtml(jsonMessageTitle(message_item)); + new_message.m_contents = jsonMessageDescription(message_item); + new_message.m_author = qApp->web()->unescapeHtml(jsonMessageAuthor(message_item)); + new_message.m_url = jsonMessageUrl(message_item); + new_message.m_created = jsonMessageDateCreated(message_item); + new_message.m_customId = jsonMessageId(message_item); + new_message.m_rawContents = jsonMessageRawContents(message_item); + new_message.m_enclosures = jsonMessageEnclosures(message_item); + + messages.append(new_message); + } + catch (const ApplicationException& ex) { + qDebugNN << LOGSEC_CORE + << "Problem when extracting JSON message: " + << ex.message(); + } } } + // Fixup missing data. + // + // NOTE: Message must have "title" field, otherwise it is skipped. + for (int i = 0; i < messages.size(); i++) { + Message& new_message = messages[i]; + + // Title. + if (new_message.m_title.simplified().isEmpty()) { + if (new_message.m_url.simplified().isEmpty()) { + messages.removeAt(i--); + continue; + } + else { + new_message.m_title = new_message.m_url; + } + } + + // Author. + if (new_message.m_author.isEmpty() && !feed_author.isEmpty()) { + new_message.m_author = feed_author; + } + + // Created date. + new_message.m_createdFromFeed = !new_message.m_created.isNull(); + + if (!new_message.m_createdFromFeed) { + // Date was NOT obtained from the feed, set current date as creation date for the message. + // NOTE: Date is lessened by 1 second for each message to allow for more + // stable sorting. + new_message.m_created = current_time.addSecs(-1); + current_time = new_message.m_created; + } + + // Enclosures. + for (Enclosure& enc : new_message.m_enclosures) { + if (enc.m_mimeType.simplified().isEmpty()) { + enc.m_mimeType = QSL(DEFAULT_ENCLOSURE_MIME_TYPE); + } + } + + // Url. + new_message.m_url = new_message.m_url.replace(QRegularExpression(QSL("[\\t\\n]")), QString()); + } + return messages; } -QList FeedParser::mrssGetEnclosures(const QDomElement& msg_element) const { +QList FeedParser::xmlMrssGetEnclosures(const QDomElement& msg_element) const { QList enclosures; auto content_list = msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("content")); @@ -127,13 +223,13 @@ QList FeedParser::mrssGetEnclosures(const QDomElement& msg_element) c return enclosures; } -QString FeedParser::mrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const { +QString FeedParser::xmlMrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const { QString text = msg_element.elementsByTagNameNS(m_mrssNamespace, xml_path).at(0).toElement().text(); return text; } -QString FeedParser::rawXmlChild(const QDomElement& container) const { +QString FeedParser::xmlRawChild(const QDomElement& container) const { QString raw; auto children = container.childNodes(); @@ -153,8 +249,8 @@ QString FeedParser::rawXmlChild(const QDomElement& container) const { return raw; } -QStringList FeedParser::textsFromPath(const QDomElement& element, const QString& namespace_uri, - const QString& xml_path, bool only_first) const { +QStringList FeedParser::xmlTextsFromPath(const QDomElement& element, const QString& namespace_uri, + const QString& xml_path, bool only_first) const { QStringList paths = xml_path.split('/'); QStringList result; QList current_elements; @@ -196,3 +292,35 @@ QStringList FeedParser::textsFromPath(const QDomElement& element, const QString& QString FeedParser::feedAuthor() const { return QL1S(""); } + +QDomNodeList FeedParser::xmlMessageElements() { + return {}; +} + +QString FeedParser::xmlMessageTitle(const QDomElement& msg_element) const { + return {}; +} + +QString FeedParser::xmlMessageUrl(const QDomElement& msg_element) const { + return {}; +} + +QString FeedParser::xmlMessageDescription(const QDomElement& msg_element) const { + return {}; +} + +QString FeedParser::xmlMessageAuthor(const QDomElement& msg_element) const { + return {}; +} + +QDateTime FeedParser::xmlMessageDateCreated(const QDomElement& msg_element) const { + return {}; +} + +QString FeedParser::xmlMessageId(const QDomElement& msg_element) const { + return {}; +} + +QList FeedParser::xmlMessageEnclosures(const QDomElement& msg_element) const { + return {}; +} diff --git a/src/librssguard/services/standard/parsers/feedparser.h b/src/librssguard/services/standard/parsers/feedparser.h index a42364e1a..5a4aaae66 100644 --- a/src/librssguard/services/standard/parsers/feedparser.h +++ b/src/librssguard/services/standard/parsers/feedparser.h @@ -4,6 +4,9 @@ #define FEEDPARSER_H #include +#include +#include +#include #include #include "core/message.h" @@ -11,31 +14,46 @@ // Base class for all XML-based feed parsers. class FeedParser { public: - explicit FeedParser(QString data); + explicit FeedParser(QString data, bool is_xml = true); virtual QList messages(); protected: virtual QString feedAuthor() const; - virtual QDomNodeList messageElements() = 0; - virtual QString messageTitle(const QDomElement& msg_element) const = 0; - virtual QString messageUrl(const QDomElement& msg_element) const = 0; - virtual QString messageDescription(const QDomElement& msg_element) const = 0; - virtual QString messageAuthor(const QDomElement& msg_element) const = 0; - virtual QDateTime messageDateCreated(const QDomElement& msg_element) const = 0; - virtual QString messageId(const QDomElement& msg_element) const = 0; - virtual QList messageEnclosures(const QDomElement& msg_element) const = 0; - virtual QString messageRawContents(const QDomElement& msg_element) const; + + // XML. + virtual QDomNodeList xmlMessageElements(); + virtual QString xmlMessageTitle(const QDomElement& msg_element) const; + virtual QString xmlMessageUrl(const QDomElement& msg_element) const; + virtual QString xmlMessageDescription(const QDomElement& msg_element) const; + virtual QString xmlMessageAuthor(const QDomElement& msg_element) const; + virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const; + virtual QString xmlMessageId(const QDomElement& msg_element) const; + virtual QList xmlMessageEnclosures(const QDomElement& msg_element) const; + virtual QString xmlMessageRawContents(const QDomElement& msg_element) const; + + // JSON. + virtual QJsonArray jsonMessageElements(); + virtual QString jsonMessageTitle(const QJsonObject& msg_element) const; + virtual QString jsonMessageUrl(const QJsonObject& msg_element) const; + virtual QString jsonMessageDescription(const QJsonObject& msg_element) const; + virtual QString jsonMessageAuthor(const QJsonObject& msg_element) const; + virtual QDateTime jsonMessageDateCreated(const QJsonObject& msg_element) const; + virtual QString jsonMessageId(const QJsonObject& msg_element) const; + virtual QList jsonMessageEnclosures(const QJsonObject& msg_element) const; + virtual QString jsonMessageRawContents(const QJsonObject& msg_element) const; protected: - QList mrssGetEnclosures(const QDomElement& msg_element) const; - QString mrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const; - QString rawXmlChild(const QDomElement& container) const; - QStringList textsFromPath(const QDomElement& element, const QString& namespace_uri, const QString& xml_path, bool only_first) const; + QList xmlMrssGetEnclosures(const QDomElement& msg_element) const; + QString xmlMrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const; + QString xmlRawChild(const QDomElement& container) const; + QStringList xmlTextsFromPath(const QDomElement& element, const QString& namespace_uri, const QString& xml_path, bool only_first) const; protected: - QString m_xmlData; + bool m_isXml; + QString m_data; QDomDocument m_xml; + QJsonDocument m_json; QString m_mrssNamespace; }; diff --git a/src/librssguard/services/standard/parsers/jsonparser.cpp b/src/librssguard/services/standard/parsers/jsonparser.cpp index b83b085ff..bd80e296c 100644 --- a/src/librssguard/services/standard/parsers/jsonparser.cpp +++ b/src/librssguard/services/standard/parsers/jsonparser.cpp @@ -9,74 +9,71 @@ #include #include -JsonParser::JsonParser(const QString& data) : m_jsonData(data) {} +JsonParser::JsonParser(const QString& data) : FeedParser(data, false) {} -QList JsonParser::messages() const { - QList msgs; - QJsonParseError json_err; - QJsonDocument json = QJsonDocument::fromJson(m_jsonData.toUtf8(), &json_err); - - if (json.isNull() && !json_err.errorString().isEmpty()) { - throw FeedFetchException(Feed::Status::ParsingError, - QObject::tr("JSON error '%1'").arg(json_err.errorString())); - } - - QString global_author = json.object()[QSL("author")].toObject()[QSL("name")].toString(); +QString JsonParser::feedAuthor() const { + QString global_author = m_json.object()[QSL("author")].toObject()[QSL("name")].toString(); if (global_author.isEmpty()) { - global_author = json.object()[QSL("authors")].toArray().at(0).toObject()[QSL("name")].toString(); + global_author = m_json.object()[QSL("authors")].toArray().at(0).toObject()[QSL("name")].toString(); } - auto json_items = json.object()[QSL("items")].toArray(); - - for (const QJsonValue& msg_val : qAsConst(json_items)) { - QJsonObject msg_obj = msg_val.toObject(); - Message msg; - - msg.m_customId = msg_obj[QSL("id")].toString(); - msg.m_title = msg_obj[QSL("title")].toString(); - msg.m_url = msg_obj[QSL("url")].toString(); - msg.m_contents = msg_obj.contains(QSL("content_html")) - ? msg_obj[QSL("content_html")].toString() - : msg_obj[QSL("content_text")].toString(); - msg.m_rawContents = QJsonDocument(msg_obj).toJson(QJsonDocument::JsonFormat::Compact); - - msg.m_created = TextFactory::parseDateTime(msg_obj.contains(QSL("date_modified")) - ? msg_obj[QSL("date_modified")].toString() - : msg_obj[QSL("date_published")].toString()); - - if (!msg.m_created.isValid()) { - msg.m_created = QDateTime::currentDateTime(); - msg.m_createdFromFeed = false; - } - else { - msg.m_createdFromFeed = true; - } - - if (msg_obj.contains(QSL("author"))) { - msg.m_author = msg_obj[QSL("author")].toObject()[QSL("name")].toString(); - } - else if (msg_obj.contains(QSL("authors"))) { - msg.m_author = msg_obj[QSL("authors")].toArray().at(0).toObject()[QSL("name")].toString(); - } - else if (!global_author.isEmpty()) { - msg.m_author = global_author; - } - - auto json_att = msg_obj[QSL("attachments")].toArray(); - - for (const QJsonValue& att : qAsConst(json_att)) { - QJsonObject att_obj = att.toObject(); - - msg.m_enclosures.append(Enclosure(att_obj[QSL("url")].toString(), att_obj[QSL("mime_type")].toString())); - } - - if (msg.m_title.isEmpty() && !msg.m_url.isEmpty()) { - msg.m_title = msg.m_url; - } - - msgs.append(msg); - } - - return msgs; + return global_author; +} + +QJsonArray JsonParser::jsonMessageElements() { + return m_json.object()[QSL("items")].toArray(); +} + +QString JsonParser::jsonMessageTitle(const QJsonObject& msg_element) const { + return msg_element[QSL("title")].toString(); +} + +QString JsonParser::jsonMessageUrl(const QJsonObject& msg_element) const { + return msg_element[QSL("url")].toString(); +} + +QString JsonParser::jsonMessageDescription(const QJsonObject& msg_element) const { + return msg_element.contains(QSL("content_html")) + ? msg_element[QSL("content_html")].toString() + : msg_element[QSL("content_text")].toString(); +} + +QString JsonParser::jsonMessageAuthor(const QJsonObject& msg_element) const { + if (msg_element.contains(QSL("author"))) { + return msg_element[QSL("author")].toObject()[QSL("name")].toString(); + } + else if (msg_element.contains(QSL("authors"))) { + return msg_element[QSL("authors")].toArray().at(0).toObject()[QSL("name")].toString(); + } + else { + return {}; + } +} + +QDateTime JsonParser::jsonMessageDateCreated(const QJsonObject& msg_element) const { + return TextFactory::parseDateTime(msg_element.contains(QSL("date_modified")) + ? msg_element[QSL("date_modified")].toString() + : msg_element[QSL("date_published")].toString()); +} + +QString JsonParser::jsonMessageId(const QJsonObject& msg_element) const { + return msg_element[QSL("id")].toString(); +} + +QList JsonParser::jsonMessageEnclosures(const QJsonObject& msg_element) const { + auto json_att = msg_element[QSL("attachments")].toArray(); + QList enc; + + for (const QJsonValue& att : qAsConst(json_att)) { + QJsonObject att_obj = att.toObject(); + + enc.append(Enclosure(att_obj[QSL("url")].toString(), att_obj[QSL("mime_type")].toString())); + } + + return enc; +} + +QString JsonParser::jsonMessageRawContents(const QJsonObject& msg_element) const { + return QJsonDocument(msg_element).toJson(QJsonDocument::JsonFormat::Compact); } diff --git a/src/librssguard/services/standard/parsers/jsonparser.h b/src/librssguard/services/standard/parsers/jsonparser.h index 5dd922fcf..42b6dd921 100644 --- a/src/librssguard/services/standard/parsers/jsonparser.h +++ b/src/librssguard/services/standard/parsers/jsonparser.h @@ -3,16 +3,25 @@ #ifndef JSONPARSER_H #define JSONPARSER_H +#include "services/standard/parsers/feedparser.h" + #include "core/message.h" -class JsonParser { +class JsonParser : public FeedParser { public: explicit JsonParser(const QString& data); - QList messages() const; - - private: - QString m_jsonData; + protected: + virtual QString feedAuthor() const; + virtual QJsonArray jsonMessageElements(); + virtual QString jsonMessageTitle(const QJsonObject& msg_element) const; + virtual QString jsonMessageUrl(const QJsonObject& msg_element) const; + virtual QString jsonMessageDescription(const QJsonObject& msg_element) const; + virtual QString jsonMessageAuthor(const QJsonObject& msg_element) const; + virtual QDateTime jsonMessageDateCreated(const QJsonObject& msg_element) const; + virtual QString jsonMessageId(const QJsonObject& msg_element) const; + virtual QList jsonMessageEnclosures(const QJsonObject& msg_element) const; + virtual QString jsonMessageRawContents(const QJsonObject& msg_element) const; }; #endif // JSONPARSER_H diff --git a/src/librssguard/services/standard/parsers/rdfparser.cpp b/src/librssguard/services/standard/parsers/rdfparser.cpp index cd40c2552..5838338b1 100644 --- a/src/librssguard/services/standard/parsers/rdfparser.cpp +++ b/src/librssguard/services/standard/parsers/rdfparser.cpp @@ -17,7 +17,7 @@ RdfParser::RdfParser(const QString& data) m_rssCoNamespace(QSL("http://purl.org/rss/1.0/modules/content/")), m_dcElNamespace(QSL("http://purl.org/dc/elements/1.1/")) {} -QDomNodeList RdfParser::messageElements() { +QDomNodeList RdfParser::xmlMessageElements() { return m_xml.elementsByTagNameNS(m_rssNamespace, QSL("item")); } @@ -29,11 +29,11 @@ QString RdfParser::rdfNamespace() const { return m_rdfNamespace; } -QString RdfParser::messageTitle(const QDomElement& msg_element) const { +QString RdfParser::xmlMessageTitle(const QDomElement& msg_element) const { return msg_element.elementsByTagNameNS(m_rssNamespace, QSL("title")).at(0).toElement().text(); } -QString RdfParser::messageDescription(const QDomElement& msg_element) const { +QString RdfParser::xmlMessageDescription(const QDomElement& msg_element) const { QString description = msg_element.elementsByTagNameNS(m_rssCoNamespace, QSL("encoded")).at(0).toElement().text(); if (description.simplified().isEmpty()) { @@ -43,22 +43,22 @@ QString RdfParser::messageDescription(const QDomElement& msg_element) const { return description; } -QString RdfParser::messageAuthor(const QDomElement& msg_element) const { +QString RdfParser::xmlMessageAuthor(const QDomElement& msg_element) const { return msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("creator")).at(0).toElement().text(); } -QDateTime RdfParser::messageDateCreated(const QDomElement& msg_element) const { +QDateTime RdfParser::xmlMessageDateCreated(const QDomElement& msg_element) const { return TextFactory::parseDateTime(msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("date")).at(0).toElement().text()); } -QString RdfParser::messageId(const QDomElement& msg_element) const { +QString RdfParser::xmlMessageId(const QDomElement& msg_element) const { return msg_element.elementsByTagNameNS(m_dcElNamespace, QSL("identifier")).at(0).toElement().text(); } -QString RdfParser::messageUrl(const QDomElement& msg_element) const { +QString RdfParser::xmlMessageUrl(const QDomElement& msg_element) const { return msg_element.elementsByTagNameNS(m_rssNamespace, QSL("link")).at(0).toElement().text(); } -QList RdfParser::messageEnclosures(const QDomElement& msg_element) const { +QList RdfParser::xmlMessageEnclosures(const QDomElement& msg_element) const { return {}; } diff --git a/src/librssguard/services/standard/parsers/rdfparser.h b/src/librssguard/services/standard/parsers/rdfparser.h index f35d4bcec..3b67d7ad9 100644 --- a/src/librssguard/services/standard/parsers/rdfparser.h +++ b/src/librssguard/services/standard/parsers/rdfparser.h @@ -17,14 +17,14 @@ class RdfParser : public FeedParser { QString rssNamespace() const; protected: - virtual QString messageTitle(const QDomElement& msg_element) const; - virtual QString messageDescription(const QDomElement& msg_element) const; - virtual QString messageAuthor(const QDomElement& msg_element) const; - virtual QDateTime messageDateCreated(const QDomElement& msg_element) const; - virtual QString messageId(const QDomElement& msg_element) const; - virtual QString messageUrl(const QDomElement& msg_element) const; - virtual QList messageEnclosures(const QDomElement& msg_element) const; - virtual QDomNodeList messageElements(); + virtual QString xmlMessageTitle(const QDomElement& msg_element) const; + virtual QString xmlMessageDescription(const QDomElement& msg_element) const; + virtual QString xmlMessageAuthor(const QDomElement& msg_element) const; + virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const; + virtual QString xmlMessageId(const QDomElement& msg_element) const; + virtual QString xmlMessageUrl(const QDomElement& msg_element) const; + virtual QList xmlMessageEnclosures(const QDomElement& msg_element) const; + virtual QDomNodeList xmlMessageElements(); private: QString m_rdfNamespace; diff --git a/src/librssguard/services/standard/parsers/rssparser.cpp b/src/librssguard/services/standard/parsers/rssparser.cpp index be4d1bbcc..d3b4621e5 100644 --- a/src/librssguard/services/standard/parsers/rssparser.cpp +++ b/src/librssguard/services/standard/parsers/rssparser.cpp @@ -14,7 +14,7 @@ RssParser::RssParser(const QString& data) : FeedParser(data) {} -QDomNodeList RssParser::messageElements() { +QDomNodeList RssParser::xmlMessageElements() { QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel")); if (channel_elem.isNull()) { @@ -25,21 +25,21 @@ QDomNodeList RssParser::messageElements() { } } -QString RssParser::messageTitle(const QDomElement& msg_element) const { +QString RssParser::xmlMessageTitle(const QDomElement& msg_element) const { return msg_element.namedItem(QSL("title")).toElement().text(); } -QString RssParser::messageDescription(const QDomElement& msg_element) const { - QString description = rawXmlChild(msg_element.elementsByTagName(QSL("encoded")).at(0).toElement()); +QString RssParser::xmlMessageDescription(const QDomElement& msg_element) const { + QString description = xmlRawChild(msg_element.elementsByTagName(QSL("encoded")).at(0).toElement()); if (description.isEmpty()) { - description = rawXmlChild(msg_element.elementsByTagName(QSL("description")).at(0).toElement()); + description = xmlRawChild(msg_element.elementsByTagName(QSL("description")).at(0).toElement()); } return description; } -QString RssParser::messageAuthor(const QDomElement& msg_element) const { +QString RssParser::xmlMessageAuthor(const QDomElement& msg_element) const { QString author = msg_element.namedItem(QSL("author")).toElement().text(); if (author.isEmpty()) { @@ -49,7 +49,7 @@ QString RssParser::messageAuthor(const QDomElement& msg_element) const { return author; } -QDateTime RssParser::messageDateCreated(const QDomElement& msg_element) const { +QDateTime RssParser::xmlMessageDateCreated(const QDomElement& msg_element) const { QDateTime date_created = TextFactory::parseDateTime(msg_element.namedItem(QSL("pubDate")).toElement().text()); if (date_created.isNull()) { @@ -59,11 +59,11 @@ QDateTime RssParser::messageDateCreated(const QDomElement& msg_element) const { return date_created; } -QString RssParser::messageId(const QDomElement& msg_element) const { +QString RssParser::xmlMessageId(const QDomElement& msg_element) const { return msg_element.namedItem(QSL("guid")).toElement().text(); } -QString RssParser::messageUrl(const QDomElement& msg_element) const { +QString RssParser::xmlMessageUrl(const QDomElement& msg_element) const { QString url = msg_element.namedItem(QSL("link")).toElement().text(); if (url.isEmpty()) { @@ -74,7 +74,7 @@ QString RssParser::messageUrl(const QDomElement& msg_element) const { return url; } -QList RssParser::messageEnclosures(const QDomElement& msg_element) const { +QList RssParser::xmlMessageEnclosures(const QDomElement& msg_element) const { QString elem_enclosure = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("url")); QString elem_enclosure_type = msg_element.namedItem(QSL("enclosure")).toElement().attribute(QSL("type")); diff --git a/src/librssguard/services/standard/parsers/rssparser.h b/src/librssguard/services/standard/parsers/rssparser.h index 325720832..3475fd521 100644 --- a/src/librssguard/services/standard/parsers/rssparser.h +++ b/src/librssguard/services/standard/parsers/rssparser.h @@ -14,14 +14,14 @@ class RssParser : public FeedParser { explicit RssParser(const QString& data); protected: - virtual QDomNodeList messageElements(); - virtual QString messageTitle(const QDomElement& msg_element) const; - virtual QString messageDescription(const QDomElement& msg_element) const; - virtual QString messageAuthor(const QDomElement& msg_element) const; - virtual QDateTime messageDateCreated(const QDomElement& msg_element) const; - virtual QString messageId(const QDomElement& msg_element) const; - virtual QString messageUrl(const QDomElement& msg_element) const; - virtual QList messageEnclosures(const QDomElement& msg_element) const; + virtual QDomNodeList xmlMessageElements(); + virtual QString xmlMessageTitle(const QDomElement& msg_element) const; + virtual QString xmlMessageDescription(const QDomElement& msg_element) const; + virtual QString xmlMessageAuthor(const QDomElement& msg_element) const; + virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const; + virtual QString xmlMessageId(const QDomElement& msg_element) const; + virtual QString xmlMessageUrl(const QDomElement& msg_element) const; + virtual QList xmlMessageEnclosures(const QDomElement& msg_element) const; }; #endif // RSSPARSER_H