From 2c3fa36b9a4fb55d25faf52675ea3a5bed2af709 Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Wed, 10 Mar 2021 10:10:33 +0100 Subject: [PATCH] Fixed #377 --- resources/docs/Message-filters.md | 43 +++++++++++++++++-- src/librssguard/core/message.cpp | 2 +- src/librssguard/core/message.h | 1 + src/librssguard/core/messageobject.cpp | 8 ++++ src/librssguard/core/messageobject.h | 4 ++ .../services/feedly/feedlynetwork.cpp | 1 + .../services/gmail/gmailnetworkfactory.cpp | 3 +- .../services/greader/greadernetwork.cpp | 1 + .../inoreader/inoreadernetworkfactory.cpp | 3 +- .../owncloud/owncloudnetworkfactory.cpp | 1 + .../services/standard/atomparser.cpp | 9 ++++ .../services/standard/jsonparser.cpp | 1 + .../services/standard/rdfparser.cpp | 9 ++++ .../services/standard/rssparser.cpp | 10 +++++ .../services/tt-rss/ttrssnetworkfactory.cpp | 1 + .../services/tt-rss/ttrssserviceroot.cpp | 6 +-- 16 files changed, 94 insertions(+), 9 deletions(-) diff --git a/resources/docs/Message-filters.md b/resources/docs/Message-filters.md index 6170f26f5..20a50fb01 100755 --- a/resources/docs/Message-filters.md +++ b/resources/docs/Message-filters.md @@ -50,14 +50,15 @@ Here is the reference of methods and properties of some types available in your | `String url` | URL of the message. | | `String author` | Author of the message. | | `String contents` | Contents of the message. | +| `String rawContents` | This is RAW contents of the message as it was obtained from remote service/feed. You can expect raw `XML` or `JSON` element data here. Note that this attribute has some value only if `alreadyStoredInDb` returns `false`. In other words, this attribute is not persistently stored inside RSS Guard's DB. | | `Number score` | Arbitrary number in range <0.0, 100.0>. You can use this number to sort messages in a custom fashion as this attribute also has its own column in messages list. | | `Date created` | Date/time of the message. | | `Boolean isRead` | Is message read? | | `Boolean isImportant` | Is message important? | -| `Boolean isDeleted` | Is message placed in recycle bin? Available in RSS Guard 3.8.4+. | +| `Boolean isDeleted` | Is message placed in recycle bin? | | `Boolean isDuplicateWithAttribute(DuplicationAttributeCheck)` | Allows you to test if this particular message is already stored in RSS Guard's DB. | -| `Boolean assignLabel(String)` | Assigns label to this message. The passed `String` value is the `customId` property of `Label` type. See its API reference for relevant info. Available in RSS Guard 3.8.1+. | -| `Boolean deassignLabel(String)` | Removes label from this message. The passed `String` value is the `customId` property of `Label` type. See its API reference for relevant info. Available in RSS Guard 3.8.1+. | +| `Boolean assignLabel(String)` | Assigns label to this message. The passed `String` value is the `customId` property of `Label` type. See its API reference for relevant info. | +| `Boolean deassignLabel(String)` | Removes label from this message. The passed `String` value is the `customId` property of `Label` type. See its API reference for relevant info. | | `Boolean alreadyStoredInDb` | `READ-ONLY` Returns true if this message is already stored in DB. This function is the way to check if the filter is being run automatically for newly downloaded messages or manually for already existing messages. ### `Label` class @@ -107,6 +108,42 @@ function filterMessage() { } ``` +Dump RAW data of each message to RSS Guard's [debug output](Documentation.md#generating-debug-log-file). +```js +function filterMessage() { + console.log(msg.rawContents); + return MessageObject.Accept; +} +``` +The above script produces this kind of debug output when running for Tiny Tiny RSS. +``` +... +... +time=" 34.360" type="debug" -> feed-downloader: Hooking message took 4 microseconds. +time=" 34.361" type="debug" -> {"always_display_attachments":false,"attachments":[],"author":"Aleš Kapica","comments_count":0,"comments_link":"","content":"

\nNaposledy jsem psal o čuňačení v MediaWiki asi před půl rokem, kdy jsem chtěl upozornit na to, že jsem přepracoval svoji původní šablonu Images tak, aby bylo možné používat výřezy z obrázků a stránek generovaných z DjVu a PDF dokumentů. Blogpost nebyl nijak extra hodnocen, takže mě vcelku nepřekvapuje, jak se do hlavní vývojové větve MediaWiki dostávají čím dál větší prasečiny.\n

","feed_id":"5903","feed_title":"abclinuxu - blogy","flavor_image":"","flavor_stream":"","guid":"{\"ver\":2,\"uid\":\"52\",\"hash\":\"SHA1:5b49e4d8f612984889ba25e7834e80604c795ff8\"}","id":6958843,"is_updated":false,"labels":[],"lang":"","link":"http://www.abclinuxu.cz/blog/kenyho_stesky/2021/1/cunacime-v-mediawiki-responzivni-obsah-ii","marked":false,"note":null,"published":false,"score":0,"tags":[""],"title":"Čuňačíme v MediaWiki - responzivní obsah II.","unread":true,"updated":1610044674} +time=" 34.361" type="debug" -> feed-downloader: Running filter script, it took 348 microseconds. +time=" 34.361" type="debug" -> feed-downloader: Hooking message took 4 microseconds. +time=" 34.361" type="debug" -> {"always_display_attachments":false,"attachments":[],"author":"kol-ouch","comments_count":0,"comments_link":"","content":"Ahoj, 1. 6. se blíží, tak začínám řešit co s bambilionem fotek na google photos. \n

\nZa sebe můžu říct, že gp mi vyhovují - ne snad úplně tím, že jsou zadarmo, ale hlavně způsobem práce s fotkami, možnostmi vyhledávání v nich podle obsahu, vykopírování textu z nich, provázaností s mapami, recenzemi, možnostmi sdílení, automatickým seskupováním a podobně.","feed_id":"5903","feed_title":"abclinuxu - blogy","flavor_image":"","flavor_stream":"","guid":"{\"ver\":2,\"uid\":\"52\",\"hash\":\"SHA1:1277107408b159882b95ca7151a0ec0160a3971a\"}","id":6939327,"is_updated":false,"labels":[],"lang":"","link":"http://www.abclinuxu.cz/blog/Co_to_je/2021/1/kam-s-fotkama","marked":false,"note":null,"published":false,"score":0,"tags":[""],"title":"Kam s fotkama?","unread":true,"updated":1609750800} +... +... +``` + +``` +... +... +For RSS 2.0 message, the result might look like this. +time=" 3.568" type="debug" -> feed-downloader: Hooking message took 6 microseconds. +time=" 3.568" type="debug" -> +<![CDATA[Man Utd's Cavani 'not comfortable' in England, says father]]> + +https://www.bbc.co.uk/sport/football/56341983 +https://www.bbc.co.uk/sport/football/56341983 +Tue, 09 Mar 2021 23:46:03 GMT + + +time=" 3.568" type="debug" -> feed-downloader: Running filter script, it took 416 microseconds. +``` + Write details of available labels and assign the first label to the message. ```js function filterMessage() { diff --git a/src/librssguard/core/message.cpp b/src/librssguard/core/message.cpp index 880517be7..66da7a477 100644 --- a/src/librssguard/core/message.cpp +++ b/src/librssguard/core/message.cpp @@ -63,7 +63,7 @@ QString Enclosures::encodeEnclosuresToString(const QList& enclosures) } Message::Message() { - m_title = m_url = m_author = m_contents = m_feedId = m_customId = m_customHash = ""; + m_title = m_url = m_author = m_contents = m_rawContents = m_feedId = m_customId = m_customHash = ""; m_enclosures = QList(); m_accountId = m_id = 0; m_score = 0.0; diff --git a/src/librssguard/core/message.h b/src/librssguard/core/message.h index 1a9a55674..dcd15ba4e 100644 --- a/src/librssguard/core/message.h +++ b/src/librssguard/core/message.h @@ -45,6 +45,7 @@ class RSSGUARD_DLLSPEC Message { QString m_url; QString m_author; QString m_contents; + QString m_rawContents; QDateTime m_created; QString m_feedId; int m_accountId; diff --git a/src/librssguard/core/messageobject.cpp b/src/librssguard/core/messageobject.cpp index 4c6f7e45d..f999c9fb2 100755 --- a/src/librssguard/core/messageobject.cpp +++ b/src/librssguard/core/messageobject.cpp @@ -151,6 +151,14 @@ void MessageObject::setContents(const QString& contents) { m_message->m_contents = contents; } +QString MessageObject::rawContents() const { + return m_message->m_rawContents; +} + +void MessageObject::setRawContents(const QString& raw_contents) { + m_message->m_rawContents = raw_contents; +} + QDateTime MessageObject::created() const { return m_message->m_created; } diff --git a/src/librssguard/core/messageobject.h b/src/librssguard/core/messageobject.h index 26ec8e3e0..7cf18c6a4 100755 --- a/src/librssguard/core/messageobject.h +++ b/src/librssguard/core/messageobject.h @@ -18,6 +18,7 @@ class MessageObject : public QObject { Q_PROPERTY(QString url READ url WRITE setUrl) Q_PROPERTY(QString author READ author WRITE setAuthor) Q_PROPERTY(QString contents READ contents WRITE setContents) + Q_PROPERTY(QString rawContents READ rawContents WRITE setRawContents) Q_PROPERTY(QDateTime created READ created WRITE setCreated) Q_PROPERTY(double score READ score WRITE setScore) Q_PROPERTY(bool isRead READ isRead WRITE setIsRead) @@ -101,6 +102,9 @@ class MessageObject : public QObject { QString contents() const; void setContents(const QString& contents); + QString rawContents() const; + void setRawContents(const QString& raw_contents); + QDateTime created() const; void setCreated(const QDateTime& created); diff --git a/src/librssguard/services/feedly/feedlynetwork.cpp b/src/librssguard/services/feedly/feedlynetwork.cpp index c619068aa..e5f34ff74 100755 --- a/src/librssguard/services/feedly/feedlynetwork.cpp +++ b/src/librssguard/services/feedly/feedlynetwork.cpp @@ -238,6 +238,7 @@ QList FeedlyNetwork::decodeStreamContents(const QByteArray& stream_cont message.m_title = entry_obj["title"].toString(); message.m_author = entry_obj["author"].toString(); message.m_contents = entry_obj["content"].toObject()["content"].toString(); + message.m_rawContents = QJsonDocument(entry_obj).toJson(QJsonDocument::JsonFormat::Compact); if (message.m_contents.isEmpty()) { message.m_contents = entry_obj["summary"].toObject()["content"].toString(); diff --git a/src/librssguard/services/gmail/gmailnetworkfactory.cpp b/src/librssguard/services/gmail/gmailnetworkfactory.cpp index 962aba7dc..add256255 100755 --- a/src/librssguard/services/gmail/gmailnetworkfactory.cpp +++ b/src/librssguard/services/gmail/gmailnetworkfactory.cpp @@ -2,12 +2,12 @@ #include "services/gmail/gmailnetworkfactory.h" +#include "database/databasequeries.h" #include "definitions/definitions.h" #include "exceptions/applicationexception.h" #include "gui/dialogs/formmain.h" #include "gui/tabwidget.h" #include "miscellaneous/application.h" -#include "database/databasequeries.h" #include "miscellaneous/textfactory.h" #include "network-web/networkfactory.h" #include "network-web/oauth2service.h" @@ -397,6 +397,7 @@ bool GmailNetworkFactory::fillFullMessage(Message& msg, const QJsonObject& json, } msg.m_isRead = true; + msg.m_rawContents = QJsonDocument(json).toJson(QJsonDocument::JsonFormat::Compact); // Assign correct main labels/states. for (const QVariant& label : json["labelIds"].toArray().toVariantList()) { diff --git a/src/librssguard/services/greader/greadernetwork.cpp b/src/librssguard/services/greader/greadernetwork.cpp index c288d2087..40659d153 100755 --- a/src/librssguard/services/greader/greadernetwork.cpp +++ b/src/librssguard/services/greader/greadernetwork.cpp @@ -552,6 +552,7 @@ QList GreaderNetwork::decodeStreamContents(ServiceRoot* root, } message.m_contents = message_obj["summary"].toObject()["content"].toString(); + message.m_rawContents = QJsonDocument(message_obj).toJson(QJsonDocument::JsonFormat::Compact); message.m_feedId = stream_id; messages.append(message); diff --git a/src/librssguard/services/inoreader/inoreadernetworkfactory.cpp b/src/librssguard/services/inoreader/inoreadernetworkfactory.cpp index b2ef61e44..c479384bd 100755 --- a/src/librssguard/services/inoreader/inoreadernetworkfactory.cpp +++ b/src/librssguard/services/inoreader/inoreadernetworkfactory.cpp @@ -3,11 +3,11 @@ #include "services/inoreader/inoreadernetworkfactory.h" #include "3rd-party/boolinq/boolinq.h" +#include "database/databasequeries.h" #include "definitions/definitions.h" #include "gui/dialogs/formmain.h" #include "gui/tabwidget.h" #include "miscellaneous/application.h" -#include "database/databasequeries.h" #include "network-web/networkfactory.h" #include "network-web/oauth2service.h" #include "network-web/silentnetworkaccessmanager.h" @@ -364,6 +364,7 @@ QList InoreaderNetworkFactory::decodeMessages(ServiceRoot* root, const } message.m_contents = message_obj["summary"].toObject()["content"].toString(); + message.m_rawContents = QJsonDocument(message_obj).toJson(QJsonDocument::JsonFormat::Compact); message.m_feedId = stream_id; messages.append(message); diff --git a/src/librssguard/services/owncloud/owncloudnetworkfactory.cpp b/src/librssguard/services/owncloud/owncloudnetworkfactory.cpp index a15630fdd..4c314a1ac 100755 --- a/src/librssguard/services/owncloud/owncloudnetworkfactory.cpp +++ b/src/librssguard/services/owncloud/owncloudnetworkfactory.cpp @@ -582,6 +582,7 @@ QListOwnCloudGetMessagesResponse::messages() const { msg.m_createdFromFeed = true; msg.m_customId = message_map["id"].toVariant().toString(); msg.m_customHash = message_map["guidHash"].toString(); + msg.m_rawContents = QJsonDocument(message_map).toJson(QJsonDocument::JsonFormat::Compact); QString enclosure_link = message_map["enclosureLink"].toString(); diff --git a/src/librssguard/services/standard/atomparser.cpp b/src/librssguard/services/standard/atomparser.cpp index 425653847..4f4c9fb1a 100755 --- a/src/librssguard/services/standard/atomparser.cpp +++ b/src/librssguard/services/standard/atomparser.cpp @@ -5,6 +5,7 @@ #include "miscellaneous/application.h" #include "miscellaneous/textfactory.h" #include "network-web/webfactory.h" +#include "services/standard/definitions.h" #include "exceptions/applicationexception.h" @@ -64,6 +65,14 @@ Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime cur new_message.m_contents = summary; new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element)); + QString raw_contents; + QTextStream str(&raw_contents); + + str.setCodec(DEFAULT_FEED_ENCODING); + + msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream); + new_message.m_rawContents = raw_contents; + QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", ")); if (updated.isEmpty()) { diff --git a/src/librssguard/services/standard/jsonparser.cpp b/src/librssguard/services/standard/jsonparser.cpp index 2ad6a5cbf..231b117c1 100644 --- a/src/librssguard/services/standard/jsonparser.cpp +++ b/src/librssguard/services/standard/jsonparser.cpp @@ -26,6 +26,7 @@ QList JsonParser::messages() const { msg.m_title = msg_obj["title"].toString(); msg.m_url = msg_obj["url"].toString(); msg.m_contents = msg_obj.contains("content_html") ? msg_obj["content_html"].toString() : msg_obj["content_text"].toString(); + msg.m_rawContents = QJsonDocument(msg_obj).toJson(QJsonDocument::JsonFormat::Compact); msg.m_created = TextFactory::parseDateTime(msg_obj.contains("date_modified") ? msg_obj["date_modified"].toString() diff --git a/src/librssguard/services/standard/rdfparser.cpp b/src/librssguard/services/standard/rdfparser.cpp index ed926caab..b0adeb37b 100644 --- a/src/librssguard/services/standard/rdfparser.cpp +++ b/src/librssguard/services/standard/rdfparser.cpp @@ -5,6 +5,7 @@ #include "miscellaneous/application.h" #include "miscellaneous/textfactory.h" #include "network-web/webfactory.h" +#include "services/standard/definitions.h" #include @@ -48,6 +49,14 @@ QList RdfParser::parseXmlData(const QString& data) { new_message.m_contents = elem_description; } + QString raw_contents; + QTextStream str(&raw_contents); + + str.setCodec(DEFAULT_FEED_ENCODING); + + message_item.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream); + new_message.m_rawContents = raw_contents; + // Deal with link and author. new_message.m_url = message_item.namedItem(QSL("link")).toElement().text(); new_message.m_author = message_item.namedItem(QSL("creator")).toElement().text(); diff --git a/src/librssguard/services/standard/rssparser.cpp b/src/librssguard/services/standard/rssparser.cpp index dc981184e..d31bacb83 100644 --- a/src/librssguard/services/standard/rssparser.cpp +++ b/src/librssguard/services/standard/rssparser.cpp @@ -7,8 +7,10 @@ #include "miscellaneous/iofactory.h" #include "miscellaneous/textfactory.h" #include "network-web/webfactory.h" +#include "services/standard/definitions.h" #include +#include RssParser::RssParser(const QString& data) : FeedParser(data) {} @@ -82,6 +84,14 @@ Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime curr new_message.m_enclosures.append(mrssGetEnclosures(msg_element)); } + QString raw_contents; + QTextStream str(&raw_contents); + + str.setCodec(DEFAULT_FEED_ENCODING); + + msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream); + new_message.m_rawContents = raw_contents; + new_message.m_author = msg_element.namedItem(QSL("author")).toElement().text(); if (new_message.m_author.isEmpty()) { diff --git a/src/librssguard/services/tt-rss/ttrssnetworkfactory.cpp b/src/librssguard/services/tt-rss/ttrssnetworkfactory.cpp index a29fd3483..5141c79d3 100755 --- a/src/librssguard/services/tt-rss/ttrssnetworkfactory.cpp +++ b/src/librssguard/services/tt-rss/ttrssnetworkfactory.cpp @@ -767,6 +767,7 @@ QList TtRssGetHeadlinesResponse::messages(ServiceRoot* root) const { message.m_isRead = !mapped["unread"].toBool(); message.m_isImportant = mapped["marked"].toBool(); message.m_contents = mapped["content"].toString(); + message.m_rawContents = QJsonDocument(mapped).toJson(QJsonDocument::JsonFormat::Compact); for (const QJsonValue& lbl_val : mapped["labels"].toArray()) { QString lbl_custom_id = QString::number(lbl_val.toArray().at(0).toInt()); diff --git a/src/librssguard/services/tt-rss/ttrssserviceroot.cpp b/src/librssguard/services/tt-rss/ttrssserviceroot.cpp index b93dca382..c74d0e7e0 100644 --- a/src/librssguard/services/tt-rss/ttrssserviceroot.cpp +++ b/src/librssguard/services/tt-rss/ttrssserviceroot.cpp @@ -2,8 +2,8 @@ #include "services/tt-rss/ttrssserviceroot.h" -#include "miscellaneous/application.h" #include "database/databasequeries.h" +#include "miscellaneous/application.h" #include "miscellaneous/iconfactory.h" #include "miscellaneous/mutex.h" #include "miscellaneous/settings.h" @@ -220,7 +220,7 @@ QList TtRssServiceRoot::obtainNewMessages(const QList& feeds, bo int skip = 0; do { - TtRssGetHeadlinesResponse headlines = network()->getHeadlines(customId().toInt(), limit, skip, + TtRssGetHeadlinesResponse headlines = network()->getHeadlines(feed->customNumericId(), limit, skip, true, true, false, network()->downloadOnlyUnreadMessages(), networkProxy()); @@ -232,7 +232,7 @@ QList TtRssServiceRoot::obtainNewMessages(const QList& feeds, bo continue; } else { - QList new_messages = headlines.messages(getParentServiceRoot()); + QList new_messages = headlines.messages(this); messages << new_messages; newly_added_messages = new_messages.size();