Greatly enhanced feed filter engine capabilities. Will have to make a configuration GUI now.

This commit is contained in:
Martin Rotter 2020-06-25 15:14:04 +02:00
parent 5b81d97bda
commit 6e51580dcf
8 changed files with 202 additions and 30 deletions

View File

@ -14,6 +14,7 @@
<file>sql/db_update_mysql_11_12.sql</file>
<file>sql/db_update_mysql_12_13.sql</file>
<file>sql/db_update_mysql_13_14.sql</file>
<file>sql/db_update_mysql_14_15.sql</file>
<file>sql/db_init_sqlite.sql</file>
<file>sql/db_update_sqlite_1_2.sql</file>
@ -29,5 +30,6 @@
<file>sql/db_update_sqlite_11_12.sql</file>
<file>sql/db_update_sqlite_12_13.sql</file>
<file>sql/db_update_sqlite_13_14.sql</file>
<file>sql/db_update_sqlite_14_15.sql</file>
</qresource>
</RCC>

View File

@ -4,6 +4,7 @@
#include "core/messagefilter.h"
#include "definitions/definitions.h"
#include "miscellaneous/application.h"
#include "services/abstract/cacheforserviceroot.h"
#include "services/abstract/feed.h"
@ -79,11 +80,12 @@ void FeedDownloader::updateOneFeed(Feed* feed) {
<< QThread::currentThreadId() << "\'.";
bool error_during_obtaining = false;
QElapsedTimer tmr; tmr.start();
QList<Message> msgs = feed->obtainNewMessages(&error_during_obtaining);
qDebug().nospace() << "Downloaded " << msgs.size() << " messages for feed ID "
<< feed->customId() << " URL: " << feed->url() << " title: " << feed->title() << " in thread: \'"
<< QThread::currentThreadId() << "\'.";
<< QThread::currentThreadId() << "\'. Operation took " << tmr.nsecsElapsed() / 1000 << " microseconds.";
// Now, sanitize messages (tweak encoding etc.).
for (auto& msg : msgs) {
@ -102,43 +104,52 @@ void FeedDownloader::updateOneFeed(Feed* feed) {
}
if (!feed->messageFilters().isEmpty()) {
tmr.restart();
bool is_main_thread = QThread::currentThread() == qApp->thread();
QSqlDatabase database = is_main_thread ?
qApp->database()->connection(metaObject()->className()) :
qApp->database()->connection(QSL("feed_upd"));
// Perform per-message filtering.
QJSEngine filter_engine;
// Create JavaScript communication wrapper for the message.
MessageObject msg_obj;
MessageObject msg_obj(&database, feed->customId(), feed->getParentServiceRoot()->accountId());
// Register the wrapper.
auto js_object = filter_engine.newQObject(&msg_obj);
filter_engine.globalObject().setProperty("msg", js_object);
qDebug().nospace() << "Setting up JS evaluation took " << tmr.nsecsElapsed() / 1000 << " microseconds.";
for (int i = 0; i < msgs.size(); i++) {
tmr.restart();
// Attach live message object to wrapper.
msg_obj.setMessage(&msgs[i]);
qDebug().nospace() << "Hooking message took " << tmr.nsecsElapsed() / 1000 << " microseconds.";
auto feed_filters = feed->messageFilters();
for (int i = 0; i < feed_filters.size(); i++) {
QPointer<MessageFilter> filter = feed_filters.at(i);
for (int j = 0; j < feed_filters.size(); j++) {
QPointer<MessageFilter> filter = feed_filters.at(j);
if (filter.isNull()) {
qWarning("Message filter was probably deleted, removing its pointer from list of filters.");
feed_filters.removeAt(i--);
feed_filters.removeAt(j--);
continue;
}
MessageFilter* msg_filter = filter.data();
// Call the filtering logic, given function must return integer value from
// FilteringAction enumeration.
//
// 1. All Qt properties of MessageObject class are accessible.
// For example msg.title.includes("A") returns true if message's title includes "A" etc.
// 2. Some Qt properties of MessageObject are writable, so you can alter your message!
// For example msg.isImportant = true.
tmr.restart();
FilteringAction decision = msg_filter->filterMessage(&filter_engine);
qDebug().nospace() << "Running filter script, it took " << tmr.nsecsElapsed() / 1000 << " microseconds.";
switch (decision) {
case FilteringAction::Accept:

View File

@ -4,6 +4,9 @@
#include "miscellaneous/textfactory.h"
#include <QSqlDatabase>
#include <QSqlError>
#include <QSqlQuery>
#include <QVariant>
Enclosure::Enclosure(QString url, QString mime) : m_url(std::move(url)), m_mimeType(std::move(mime)) {}
@ -129,15 +132,77 @@ uint qHash(const Message& key) {
return (uint(key.m_accountId) * 10000) + uint(key.m_id);
}
MessageObject::MessageObject(QObject* parent) : QObject(parent), m_message(nullptr) {}
MessageObject::MessageObject(QSqlDatabase* db, const QString& feed_custom_id, int account_id, QObject* parent)
: QObject(parent), m_db(db), m_feedCustomId(feed_custom_id), m_accountId(account_id), m_message(nullptr) {}
void MessageObject::setMessage(Message* message) {
m_message = message;
}
bool MessageObject::isDuplicateWithAttribute(int attribute_check) const {
// TODO: Check database according to duplication attribute_check.
return int(attribute_check) == 4;
if (attribute_check <= 0) {
qCritical("Bad DuplicationAttributeCheck value '%d' was passed from JS filter script.", attribute_check);
return true;
}
// Check database according to duplication attribute_check.
DuplicationAttributeCheck attrs = static_cast<DuplicationAttributeCheck>(attribute_check);
QSqlQuery q(*m_db);
QStringList where_clauses;
QList<QPair<QString, QVariant>> bind_values;
// Now we construct the query according to parameter.
if ((attrs& DuplicationAttributeCheck::SameTitle) == DuplicationAttributeCheck::SameTitle) {
where_clauses.append(QSL("title = :title"));
bind_values.append({":title", title()});
}
if ((attrs& DuplicationAttributeCheck::SameUrl) == DuplicationAttributeCheck::SameUrl) {
where_clauses.append(QSL("url = :url"));
bind_values.append({":url", url()});
}
if ((attrs& DuplicationAttributeCheck::SameAuthor) == DuplicationAttributeCheck::SameAuthor) {
where_clauses.append(QSL("author = :author"));
bind_values.append({":author", author()});
}
if ((attrs& DuplicationAttributeCheck::SameDateCreated) == DuplicationAttributeCheck::SameDateCreated) {
where_clauses.append(QSL("date_created = :date_created"));
bind_values.append({":date_created", created().toMSecsSinceEpoch()});
}
where_clauses.append(QSL("account_id = :account_id"));
bind_values.append({":account_id", accountId()});
if ((attrs& DuplicationAttributeCheck::AllFeedsSameAccount) != DuplicationAttributeCheck::AllFeedsSameAccount) {
// Limit to current feed.
where_clauses.append(QSL("feed = :feed"));
bind_values.append({":feed", feedCustomId()});
}
QString full_query = QSL("SELECT COUNT(*) FROM Messages WHERE ") + where_clauses.join(QSL(" AND ")) + QSL(";");
q.setForwardOnly(true);
q.prepare(full_query);
for (const auto& bind : bind_values) {
q.bindValue(bind.first, bind.second);
}
if (q.exec() && q.next()) {
if (q.record().value(0).toInt() > 0) {
// Whoops, we have the "same" message in database.
qDebug("Message '%s' was identified as duplicate by filter script.", qPrintable(title()));
return true;
}
}
else if (q.lastError().isValid()) {
qWarning("Error when checking for duplicate messages via filtering system, error: '%s'.",
qPrintable(q.lastError().text()));
}
return false;
}
QString MessageObject::title() const {
@ -195,3 +260,11 @@ bool MessageObject::isImportant() const {
void MessageObject::setIsImportant(bool is_important) {
m_message->m_isImportant = is_important;
}
QString MessageObject::feedCustomId() const {
return m_feedCustomId;
}
int MessageObject::accountId() const {
return m_accountId;
}

View File

@ -10,6 +10,8 @@
#include <QSqlRecord>
#include <QStringList>
class QSqlDatabase;
// Represents single enclosure.
struct Enclosure {
public:
@ -79,15 +81,35 @@ enum class FilteringAction {
};
enum class DuplicationAttributeCheck {
// Message with same title in DB.
SameTitle = 1,
// Message with same URL in DB.
SameUrl = 2,
// Message with same author in DB.
SameAuthor = 4,
SameFeed = 8,
SameDateCreated = 16
// Messages with same creation date in DB.
SameDateCreated = 8,
// Compare with all messages from the account not only with messages from same feed.
AllFeedsSameAccount = 16
};
inline DuplicationAttributeCheck operator|(DuplicationAttributeCheck lhs, DuplicationAttributeCheck rhs) {
return static_cast<DuplicationAttributeCheck>(int(lhs) | int(rhs));
}
inline DuplicationAttributeCheck operator&(DuplicationAttributeCheck lhs, DuplicationAttributeCheck rhs) {
return static_cast<DuplicationAttributeCheck>(int(lhs) & int(rhs));
}
class MessageObject : public QObject {
Q_OBJECT
Q_PROPERTY(QString feedCustomId READ feedCustomId)
Q_PROPERTY(int accountId READ accountId)
Q_PROPERTY(QString title READ title WRITE setTitle)
Q_PROPERTY(QString url READ url WRITE setUrl)
Q_PROPERTY(QString author READ author WRITE setAuthor)
@ -97,7 +119,7 @@ class MessageObject : public QObject {
Q_PROPERTY(bool isImportant READ isImportant WRITE setIsImportant)
public:
explicit MessageObject(QObject* parent = nullptr);
explicit MessageObject(QSqlDatabase* db, const QString& feed_custom_id, int account_id, QObject* parent = nullptr);
void setMessage(Message* message);
@ -107,6 +129,9 @@ class MessageObject : public QObject {
Q_INVOKABLE bool isDuplicateWithAttribute(int attribute_check) const;
// Generic Message's properties bindings.
QString feedCustomId() const;
int accountId() const;
QString title() const;
void setTitle(const QString& title);
@ -129,6 +154,9 @@ class MessageObject : public QObject {
void setIsImportant(bool is_important);
private:
QSqlDatabase* m_db;
QString m_feedCustomId;
int m_accountId;
Message* m_message;
};

View File

@ -9,18 +9,52 @@
MessageFilter::MessageFilter(int id, QObject* parent) : QObject(parent), m_id(id) {}
FilteringAction MessageFilter::filterMessage(QJSEngine* engine) {
// NOTE: Filter is represented by JavaScript code, each filter must define
// function with "filterMessage()" prototype. There is a global "msg" object
// representing "message" available.
//
// All standard classes/functions as specified by ECMA-262 are available.
//
// MessageObject "msg" global object has some writable properties such as "title" or "author",
// see core/message.h file for more info.
//
// Note that function "filterMessage() must return integer values corresponding
// to enumeration "FilteringAction" (see file core/message.h).
// Also, there is a method MessageObject.isDuplicateWithAttribute(int) which is callable
// with "msg" variable and this method checks if given message already exists in
// RSS Guard's database. Method is parameterized and the parameter is integer representation
// of DuplicationAttributeCheck enumeration (see file core/message.h).
//
// Example filtering script might look like this:
/*
* "(function() { "
function helper() {
if (msg.title.includes("A")) {
msg.isImportant = true;
}
//"return msg.isDuplicateWithAttribute(4) ? 1 : 2; "
"msg.isImportant = true;"
"return 1;"
"})"*/
return 1;
}
function filterMessage() {
return helper();
}
*/
QJSValue filter_func = engine->evaluate(m_script);
auto filter_output = filter_func.call().toInt();
FilteringAction decision = FilteringAction(filter_output);
return decision;
if (filter_func.isError()) {
qCritical("Error when evaluating script from filter '%d'. Error is: '%s'", id(), qPrintable(filter_func.toString()));
return FilteringAction::Accept;
}
auto filter_output = engine->evaluate(QSL("filterMessage()"));
if (filter_output.isError()) {
qCritical("Error when calling filtering function '%d'. Error is: '%s'", id(), qPrintable(filter_output.toString()));
return FilteringAction::Accept;
}
return FilteringAction(filter_output.toInt());
}
int MessageFilter::id() const {

View File

@ -115,7 +115,7 @@
#define APP_DB_SQLITE_FILE "database.db"
// Keep this in sync with schema versions declared in SQL initialization code.
#define APP_DB_SCHEMA_VERSION "14"
#define APP_DB_SCHEMA_VERSION "15"
#define APP_DB_UPDATE_FILE_PATTERN "db_update_%1_%2_%3.sql"
#define APP_DB_COMMENT_SPLIT "-- !\n"
#define APP_DB_NAME_PLACEHOLDER "##"

View File

@ -1428,9 +1428,32 @@ QList<MessageFilter*> DatabaseQueries::getMessageFilters(const QSqlDatabase& db,
}
QMultiMap<QString, int> DatabaseQueries::messageFiltersInFeeds(const QSqlDatabase& db, int account_id, bool* ok) {
// TODO: return list of relations
QSqlQuery q(db);
QMultiMap<QString, int> filters_in_feeds;
return {};
q.prepare("SELECT filter, feed_custom_id FROM MessageFiltersInFeeds WHERE account_id = :account_id;");
q.bindValue(QSL(":account_id"), account_id);
q.setForwardOnly(true);
if (q.exec()) {
while (q.next()) {
auto rec = q.record();
filters_in_feeds.insert(rec.value(1).toString(), rec.value(0).toInt());
}
if (ok != nullptr) {
*ok = true;
}
}
else {
if (ok != nullptr) {
*ok = false;
}
}
return filters_in_feeds;
}
QList<ServiceRoot*> DatabaseQueries::getStandardAccounts(const QSqlDatabase& db, bool* ok) {

View File

@ -433,7 +433,8 @@ bool GmailNetworkFactory::obtainAndDecodeFullMessages(const QList<Message>& lite
}
}
QList<Message> GmailNetworkFactory::decodeLiteMessages(const QString& messages_json_data, const QString& stream_id,
QList<Message> GmailNetworkFactory::decodeLiteMessages(const QString& messages_json_data,
const QString& stream_id,
QString& next_page_token) {
QList<Message> messages;
QJsonObject top_object = QJsonDocument::fromJson(messages_json_data.toUtf8()).object();