From 6e51580dcf0bb9df45404ef6906a8f2391b9f0d0 Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Thu, 25 Jun 2020 15:14:04 +0200 Subject: [PATCH] Greatly enhanced feed filter engine capabilities. Will have to make a configuration GUI now. --- resources/sql.qrc | 2 + src/librssguard/core/feeddownloader.cpp | 35 +++++--- src/librssguard/core/message.cpp | 79 ++++++++++++++++++- src/librssguard/core/message.h | 34 +++++++- src/librssguard/core/messagefilter.cpp | 50 ++++++++++-- src/librssguard/definitions/definitions.h | 2 +- .../miscellaneous/databasequeries.cpp | 27 ++++++- .../gmail/network/gmailnetworkfactory.cpp | 3 +- 8 files changed, 202 insertions(+), 30 deletions(-) diff --git a/resources/sql.qrc b/resources/sql.qrc index 756e5d000..1d36007d5 100755 --- a/resources/sql.qrc +++ b/resources/sql.qrc @@ -14,6 +14,7 @@ sql/db_update_mysql_11_12.sql sql/db_update_mysql_12_13.sql sql/db_update_mysql_13_14.sql + sql/db_update_mysql_14_15.sql sql/db_init_sqlite.sql sql/db_update_sqlite_1_2.sql @@ -29,5 +30,6 @@ sql/db_update_sqlite_11_12.sql sql/db_update_sqlite_12_13.sql sql/db_update_sqlite_13_14.sql + sql/db_update_sqlite_14_15.sql \ No newline at end of file diff --git a/src/librssguard/core/feeddownloader.cpp b/src/librssguard/core/feeddownloader.cpp index 2614e0046..769baba9b 100644 --- a/src/librssguard/core/feeddownloader.cpp +++ b/src/librssguard/core/feeddownloader.cpp @@ -4,6 +4,7 @@ #include "core/messagefilter.h" #include "definitions/definitions.h" +#include "miscellaneous/application.h" #include "services/abstract/cacheforserviceroot.h" #include "services/abstract/feed.h" @@ -79,11 +80,12 @@ void FeedDownloader::updateOneFeed(Feed* feed) { << QThread::currentThreadId() << "\'."; bool error_during_obtaining = false; + QElapsedTimer tmr; tmr.start(); QList msgs = feed->obtainNewMessages(&error_during_obtaining); qDebug().nospace() << "Downloaded " << msgs.size() << " messages for feed ID " << feed->customId() << " URL: " << feed->url() << " title: " << feed->title() << " in thread: \'" - << QThread::currentThreadId() << "\'."; + << QThread::currentThreadId() << "\'. Operation took " << tmr.nsecsElapsed() / 1000 << " microseconds."; // Now, sanitize messages (tweak encoding etc.). for (auto& msg : msgs) { @@ -102,43 +104,52 @@ void FeedDownloader::updateOneFeed(Feed* feed) { } if (!feed->messageFilters().isEmpty()) { + tmr.restart(); + + bool is_main_thread = QThread::currentThread() == qApp->thread(); + QSqlDatabase database = is_main_thread ? + qApp->database()->connection(metaObject()->className()) : + qApp->database()->connection(QSL("feed_upd")); + // Perform per-message filtering. QJSEngine filter_engine; // Create JavaScript communication wrapper for the message. - MessageObject msg_obj; + MessageObject msg_obj(&database, feed->customId(), feed->getParentServiceRoot()->accountId()); // Register the wrapper. auto js_object = filter_engine.newQObject(&msg_obj); filter_engine.globalObject().setProperty("msg", js_object); + qDebug().nospace() << "Setting up JS evaluation took " << tmr.nsecsElapsed() / 1000 << " microseconds."; + for (int i = 0; i < msgs.size(); i++) { + tmr.restart(); + // Attach live message object to wrapper. msg_obj.setMessage(&msgs[i]); + qDebug().nospace() << "Hooking message took " << tmr.nsecsElapsed() / 1000 << " microseconds."; auto feed_filters = feed->messageFilters(); - for (int i = 0; i < feed_filters.size(); i++) { - QPointer filter = feed_filters.at(i); + for (int j = 0; j < feed_filters.size(); j++) { + QPointer filter = feed_filters.at(j); if (filter.isNull()) { qWarning("Message filter was probably deleted, removing its pointer from list of filters."); - feed_filters.removeAt(i--); + feed_filters.removeAt(j--); continue; } MessageFilter* msg_filter = filter.data(); - // Call the filtering logic, given function must return integer value from - // FilteringAction enumeration. - // - // 1. All Qt properties of MessageObject class are accessible. - // For example msg.title.includes("A") returns true if message's title includes "A" etc. - // 2. Some Qt properties of MessageObject are writable, so you can alter your message! - // For example msg.isImportant = true. + tmr.restart(); + FilteringAction decision = msg_filter->filterMessage(&filter_engine); + qDebug().nospace() << "Running filter script, it took " << tmr.nsecsElapsed() / 1000 << " microseconds."; + switch (decision) { case FilteringAction::Accept: diff --git a/src/librssguard/core/message.cpp b/src/librssguard/core/message.cpp index b372c3345..045da982e 100644 --- a/src/librssguard/core/message.cpp +++ b/src/librssguard/core/message.cpp @@ -4,6 +4,9 @@ #include "miscellaneous/textfactory.h" +#include +#include +#include #include Enclosure::Enclosure(QString url, QString mime) : m_url(std::move(url)), m_mimeType(std::move(mime)) {} @@ -129,15 +132,77 @@ uint qHash(const Message& key) { return (uint(key.m_accountId) * 10000) + uint(key.m_id); } -MessageObject::MessageObject(QObject* parent) : QObject(parent), m_message(nullptr) {} +MessageObject::MessageObject(QSqlDatabase* db, const QString& feed_custom_id, int account_id, QObject* parent) + : QObject(parent), m_db(db), m_feedCustomId(feed_custom_id), m_accountId(account_id), m_message(nullptr) {} void MessageObject::setMessage(Message* message) { m_message = message; } bool MessageObject::isDuplicateWithAttribute(int attribute_check) const { - // TODO: Check database according to duplication attribute_check. - return int(attribute_check) == 4; + if (attribute_check <= 0) { + qCritical("Bad DuplicationAttributeCheck value '%d' was passed from JS filter script.", attribute_check); + return true; + } + + // Check database according to duplication attribute_check. + DuplicationAttributeCheck attrs = static_cast(attribute_check); + QSqlQuery q(*m_db); + QStringList where_clauses; + QList> bind_values; + + // Now we construct the query according to parameter. + if ((attrs& DuplicationAttributeCheck::SameTitle) == DuplicationAttributeCheck::SameTitle) { + where_clauses.append(QSL("title = :title")); + bind_values.append({":title", title()}); + } + + if ((attrs& DuplicationAttributeCheck::SameUrl) == DuplicationAttributeCheck::SameUrl) { + where_clauses.append(QSL("url = :url")); + bind_values.append({":url", url()}); + } + + if ((attrs& DuplicationAttributeCheck::SameAuthor) == DuplicationAttributeCheck::SameAuthor) { + where_clauses.append(QSL("author = :author")); + bind_values.append({":author", author()}); + } + + if ((attrs& DuplicationAttributeCheck::SameDateCreated) == DuplicationAttributeCheck::SameDateCreated) { + where_clauses.append(QSL("date_created = :date_created")); + bind_values.append({":date_created", created().toMSecsSinceEpoch()}); + } + + where_clauses.append(QSL("account_id = :account_id")); + bind_values.append({":account_id", accountId()}); + + if ((attrs& DuplicationAttributeCheck::AllFeedsSameAccount) != DuplicationAttributeCheck::AllFeedsSameAccount) { + // Limit to current feed. + where_clauses.append(QSL("feed = :feed")); + bind_values.append({":feed", feedCustomId()}); + } + + QString full_query = QSL("SELECT COUNT(*) FROM Messages WHERE ") + where_clauses.join(QSL(" AND ")) + QSL(";"); + + q.setForwardOnly(true); + q.prepare(full_query); + + for (const auto& bind : bind_values) { + q.bindValue(bind.first, bind.second); + } + + if (q.exec() && q.next()) { + if (q.record().value(0).toInt() > 0) { + // Whoops, we have the "same" message in database. + qDebug("Message '%s' was identified as duplicate by filter script.", qPrintable(title())); + return true; + } + } + else if (q.lastError().isValid()) { + qWarning("Error when checking for duplicate messages via filtering system, error: '%s'.", + qPrintable(q.lastError().text())); + } + + return false; } QString MessageObject::title() const { @@ -195,3 +260,11 @@ bool MessageObject::isImportant() const { void MessageObject::setIsImportant(bool is_important) { m_message->m_isImportant = is_important; } + +QString MessageObject::feedCustomId() const { + return m_feedCustomId; +} + +int MessageObject::accountId() const { + return m_accountId; +} diff --git a/src/librssguard/core/message.h b/src/librssguard/core/message.h index 3098b619c..512ce152f 100644 --- a/src/librssguard/core/message.h +++ b/src/librssguard/core/message.h @@ -10,6 +10,8 @@ #include #include +class QSqlDatabase; + // Represents single enclosure. struct Enclosure { public: @@ -79,15 +81,35 @@ enum class FilteringAction { }; enum class DuplicationAttributeCheck { + // Message with same title in DB. SameTitle = 1, + + // Message with same URL in DB. SameUrl = 2, + + // Message with same author in DB. SameAuthor = 4, - SameFeed = 8, - SameDateCreated = 16 + + // Messages with same creation date in DB. + SameDateCreated = 8, + + // Compare with all messages from the account not only with messages from same feed. + AllFeedsSameAccount = 16 }; +inline DuplicationAttributeCheck operator|(DuplicationAttributeCheck lhs, DuplicationAttributeCheck rhs) { + return static_cast(int(lhs) | int(rhs)); +} + +inline DuplicationAttributeCheck operator&(DuplicationAttributeCheck lhs, DuplicationAttributeCheck rhs) { + return static_cast(int(lhs) & int(rhs)); +} + class MessageObject : public QObject { Q_OBJECT + + Q_PROPERTY(QString feedCustomId READ feedCustomId) + Q_PROPERTY(int accountId READ accountId) Q_PROPERTY(QString title READ title WRITE setTitle) Q_PROPERTY(QString url READ url WRITE setUrl) Q_PROPERTY(QString author READ author WRITE setAuthor) @@ -97,7 +119,7 @@ class MessageObject : public QObject { Q_PROPERTY(bool isImportant READ isImportant WRITE setIsImportant) public: - explicit MessageObject(QObject* parent = nullptr); + explicit MessageObject(QSqlDatabase* db, const QString& feed_custom_id, int account_id, QObject* parent = nullptr); void setMessage(Message* message); @@ -107,6 +129,9 @@ class MessageObject : public QObject { Q_INVOKABLE bool isDuplicateWithAttribute(int attribute_check) const; // Generic Message's properties bindings. + QString feedCustomId() const; + int accountId() const; + QString title() const; void setTitle(const QString& title); @@ -129,6 +154,9 @@ class MessageObject : public QObject { void setIsImportant(bool is_important); private: + QSqlDatabase* m_db; + QString m_feedCustomId; + int m_accountId; Message* m_message; }; diff --git a/src/librssguard/core/messagefilter.cpp b/src/librssguard/core/messagefilter.cpp index 6142795b4..966e43f08 100755 --- a/src/librssguard/core/messagefilter.cpp +++ b/src/librssguard/core/messagefilter.cpp @@ -9,18 +9,52 @@ MessageFilter::MessageFilter(int id, QObject* parent) : QObject(parent), m_id(id) {} FilteringAction MessageFilter::filterMessage(QJSEngine* engine) { + // NOTE: Filter is represented by JavaScript code, each filter must define + // function with "filterMessage()" prototype. There is a global "msg" object + // representing "message" available. + // + // All standard classes/functions as specified by ECMA-262 are available. + // + // MessageObject "msg" global object has some writable properties such as "title" or "author", + // see core/message.h file for more info. + // + // Note that function "filterMessage() must return integer values corresponding + // to enumeration "FilteringAction" (see file core/message.h). + // Also, there is a method MessageObject.isDuplicateWithAttribute(int) which is callable + // with "msg" variable and this method checks if given message already exists in + // RSS Guard's database. Method is parameterized and the parameter is integer representation + // of DuplicationAttributeCheck enumeration (see file core/message.h). + // + // Example filtering script might look like this: + /* - * "(function() { " + function helper() { + if (msg.title.includes("A")) { + msg.isImportant = true; + } - //"return msg.isDuplicateWithAttribute(4) ? 1 : 2; " - "msg.isImportant = true;" - "return 1;" - "})"*/ + return 1; + } + + function filterMessage() { + return helper(); + } + */ QJSValue filter_func = engine->evaluate(m_script); - auto filter_output = filter_func.call().toInt(); - FilteringAction decision = FilteringAction(filter_output); - return decision; + if (filter_func.isError()) { + qCritical("Error when evaluating script from filter '%d'. Error is: '%s'", id(), qPrintable(filter_func.toString())); + return FilteringAction::Accept; + } + + auto filter_output = engine->evaluate(QSL("filterMessage()")); + + if (filter_output.isError()) { + qCritical("Error when calling filtering function '%d'. Error is: '%s'", id(), qPrintable(filter_output.toString())); + return FilteringAction::Accept; + } + + return FilteringAction(filter_output.toInt()); } int MessageFilter::id() const { diff --git a/src/librssguard/definitions/definitions.h b/src/librssguard/definitions/definitions.h index 47d86c556..e05c8a557 100755 --- a/src/librssguard/definitions/definitions.h +++ b/src/librssguard/definitions/definitions.h @@ -115,7 +115,7 @@ #define APP_DB_SQLITE_FILE "database.db" // Keep this in sync with schema versions declared in SQL initialization code. -#define APP_DB_SCHEMA_VERSION "14" +#define APP_DB_SCHEMA_VERSION "15" #define APP_DB_UPDATE_FILE_PATTERN "db_update_%1_%2_%3.sql" #define APP_DB_COMMENT_SPLIT "-- !\n" #define APP_DB_NAME_PLACEHOLDER "##" diff --git a/src/librssguard/miscellaneous/databasequeries.cpp b/src/librssguard/miscellaneous/databasequeries.cpp index 60dd38950..8eb6f081e 100755 --- a/src/librssguard/miscellaneous/databasequeries.cpp +++ b/src/librssguard/miscellaneous/databasequeries.cpp @@ -1428,9 +1428,32 @@ QList DatabaseQueries::getMessageFilters(const QSqlDatabase& db, } QMultiMap DatabaseQueries::messageFiltersInFeeds(const QSqlDatabase& db, int account_id, bool* ok) { - // TODO: return list of relations + QSqlQuery q(db); + QMultiMap filters_in_feeds; - return {}; + q.prepare("SELECT filter, feed_custom_id FROM MessageFiltersInFeeds WHERE account_id = :account_id;"); + + q.bindValue(QSL(":account_id"), account_id); + q.setForwardOnly(true); + + if (q.exec()) { + while (q.next()) { + auto rec = q.record(); + + filters_in_feeds.insert(rec.value(1).toString(), rec.value(0).toInt()); + } + + if (ok != nullptr) { + *ok = true; + } + } + else { + if (ok != nullptr) { + *ok = false; + } + } + + return filters_in_feeds; } QList DatabaseQueries::getStandardAccounts(const QSqlDatabase& db, bool* ok) { diff --git a/src/librssguard/services/gmail/network/gmailnetworkfactory.cpp b/src/librssguard/services/gmail/network/gmailnetworkfactory.cpp index 9fb008440..bbaef5d24 100644 --- a/src/librssguard/services/gmail/network/gmailnetworkfactory.cpp +++ b/src/librssguard/services/gmail/network/gmailnetworkfactory.cpp @@ -433,7 +433,8 @@ bool GmailNetworkFactory::obtainAndDecodeFullMessages(const QList& lite } } -QList GmailNetworkFactory::decodeLiteMessages(const QString& messages_json_data, const QString& stream_id, +QList GmailNetworkFactory::decodeLiteMessages(const QString& messages_json_data, + const QString& stream_id, QString& next_page_token) { QList messages; QJsonObject top_object = QJsonDocument::fromJson(messages_json_data.toUtf8()).object();