From a785da367ed5fac21033c94b67c258ded1c5bb7f Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Fri, 30 Jul 2021 14:14:46 +0200 Subject: [PATCH] adaptive sync algorithm --- src/librssguard/core/feeddownloader.cpp | 6 +- src/librssguard/core/message.cpp | 17 +- src/librssguard/core/message.h | 4 +- src/librssguard/database/databasequeries.cpp | 14 -- src/librssguard/network-web/webfactory.cpp | 4 +- .../services/greader/definitions.h | 1 + .../services/greader/greadernetwork.cpp | 151 ++++++++++++------ .../services/greader/greadernetwork.h | 3 +- 8 files changed, 128 insertions(+), 72 deletions(-) diff --git a/src/librssguard/core/feeddownloader.cpp b/src/librssguard/core/feeddownloader.cpp index 44e4c9092..3c593313c 100644 --- a/src/librssguard/core/feeddownloader.cpp +++ b/src/librssguard/core/feeddownloader.cpp @@ -192,7 +192,7 @@ void FeedDownloader::updateOneFeed(Feed* feed, // Now, sanitize messages (tweak encoding etc.). for (auto& msg : msgs) { msg.m_accountId = acc_id; - msg.sanitize(); + msg.sanitize(feed); } if (!feed->messageFilters().isEmpty()) { @@ -349,8 +349,12 @@ void FeedDownloader::updateOneFeed(Feed* feed, << feed->customId() << "' URL: '" << feed->source() << "' title: '" << feed->title() << "' in thread: '" << QThread::currentThreadId() << "'."; + tmr.restart(); auto updated_messages = feed->updateMessages(msgs, false); + qDebugNN << LOGSEC_FEEDDOWNLOADER + << "Updating messages in DB took " << tmr.nsecsElapsed() / 1000 << " microseconds."; + feed->setStatus(updated_messages.first > 0 || updated_messages.second > 0 ? Feed::Status::NewMessages : Feed::Status::Normal); diff --git a/src/librssguard/core/message.cpp b/src/librssguard/core/message.cpp index 566d06cb1..3957aa250 100644 --- a/src/librssguard/core/message.cpp +++ b/src/librssguard/core/message.cpp @@ -4,6 +4,7 @@ #include "3rd-party/boolinq/boolinq.h" #include "miscellaneous/textfactory.h" +#include "services/abstract/feed.h" #include "services/abstract/label.h" #include @@ -72,7 +73,7 @@ Message::Message() { m_assignedLabels = QList(); } -void Message::sanitize() { +void Message::sanitize(const Feed* feed) { // Sanitize title. m_title = m_title @@ -84,6 +85,20 @@ void Message::sanitize() { // Remove all newlines and leading white space. .remove(QRegularExpression(QSL("([\\n\\r])|(^\\s)"))); + + // Check if messages contain relative URLs and if they do, then replace them. + if (m_url.startsWith(QL1S("//"))) { + m_url = QString(URI_SCHEME_HTTPS) + m_url.mid(2); + } + else if (QUrl(m_url).isRelative()) { + QUrl base(feed->source()); + + if (base.isValid()) { + base = QUrl(base.scheme() + QSL("://") + base.host()); + + m_url = base.resolved(m_url).toString(); + } + } } Message Message::fromSqlRecord(const QSqlRecord& record, bool* result) { diff --git a/src/librssguard/core/message.h b/src/librssguard/core/message.h index 42d2d664a..5bd5f00f5 100644 --- a/src/librssguard/core/message.h +++ b/src/librssguard/core/message.h @@ -29,12 +29,14 @@ class RSSGUARD_DLLSPEC Enclosures { static QString encodeEnclosuresToString(const QList& enclosures); }; +class Feed; + // Represents single message. class RSSGUARD_DLLSPEC Message { public: explicit Message(); - void sanitize(); + void sanitize(const Feed* feed); // Creates Message from given record, which contains // row from query SELECT * FROM Messages WHERE ....; diff --git a/src/librssguard/database/databasequeries.cpp b/src/librssguard/database/databasequeries.cpp index b3016e4fa..5bde0afb5 100755 --- a/src/librssguard/database/databasequeries.cpp +++ b/src/librssguard/database/databasequeries.cpp @@ -1094,20 +1094,6 @@ QPair DatabaseQueries::updateMessages(QSqlDatabase db, } for (Message message : messages) { - // Check if messages contain relative URLs and if they do, then replace them. - if (message.m_url.startsWith(QL1S("//"))) { - message.m_url = QString(URI_SCHEME_HTTPS) + message.m_url.mid(2); - } - else if (QUrl(message.m_url).isRelative()) { - QUrl base(feed->source()); - - if (base.isValid()) { - base = QUrl(base.scheme() + QSL("://") + base.host()); - - message.m_url = base.resolved(message.m_url).toString(); - } - } - int id_existing_message = -1; qint64 date_existing_message = 0; bool is_read_existing_message = false; diff --git a/src/librssguard/network-web/webfactory.cpp b/src/librssguard/network-web/webfactory.cpp index a9aa45f99..c23d4c747 100644 --- a/src/librssguard/network-web/webfactory.cpp +++ b/src/librssguard/network-web/webfactory.cpp @@ -195,9 +195,11 @@ QString WebFactory::unescapeHtml(const QString& html) { pos++; } - qDebugNN << LOGSEC_CORE + /* + qDebugNN << LOGSEC_CORE << "Unescaped string" << QUOTE_W_SPACE(html) << "to" << QUOTE_W_SPACE_DOT(output); + */ return output; } diff --git a/src/librssguard/services/greader/definitions.h b/src/librssguard/services/greader/definitions.h index bb069c79b..b3fadfe13 100755 --- a/src/librssguard/services/greader/definitions.h +++ b/src/librssguard/services/greader/definitions.h @@ -33,6 +33,7 @@ // Misc. #define GREADER_API_EDIT_TAG_BATCH 200 +#define GREADER_GLOBAL_UPDATE_THRES 0.5 // The Old Reader. #define TOR_SPONSORED_STREAM_ID "tor/sponsored" diff --git a/src/librssguard/services/greader/greadernetwork.cpp b/src/librssguard/services/greader/greadernetwork.cpp index 624f30bd8..035e7c606 100755 --- a/src/librssguard/services/greader/greadernetwork.cpp +++ b/src/librssguard/services/greader/greadernetwork.cpp @@ -20,7 +20,7 @@ GreaderNetwork::GreaderNetwork(QObject* parent) : QObject(parent), m_service(GreaderServiceRoot::Service::FreshRss), m_username(QString()), m_password(QString()), m_baseUrl(QString()), m_batchSize(GREADER_DEFAULT_BATCH_SIZE), m_downloadOnlyUnreadMessages(false), - m_prefetchedStarredMessages({}) { + m_prefetchedMessages({}), m_performGlobalFetching(false) { clearCredentials(); } @@ -126,7 +126,15 @@ void GreaderNetwork::prepareFeedFetching(GreaderServiceRoot* root, const QHash>& stated_msgs, const QHash& tagged_msgs, const QNetworkProxy& proxy) { - m_prefetchedStarredMessages.clear(); + m_prefetchedMessages.clear(); + + double perc_of_fetching = (feeds.size() * 1.0) / root->getSubTreeFeeds().size(); + + m_performGlobalFetching = perc_of_fetching > GREADER_GLOBAL_UPDATE_THRES; + + qDebugNN << LOGSEC_GREADER + << "Percentage of feeds for fetching:" + << QUOTE_W_SPACE_DOT(perc_of_fetching); auto remote_starred_ids_list = itemIds(GREADER_API_FULL_STATE_IMPORTANT, false, proxy); @@ -135,8 +143,8 @@ void GreaderNetwork::prepareFeedFetching(GreaderServiceRoot* root, } QSet remote_starred_ids(remote_starred_ids_list.begin(), remote_starred_ids_list.end()); - QList> all_states = stated_msgs.values(); QSet local_starred_ids; + QList> all_states = stated_msgs.values(); for (auto& lst : all_states) { auto s = lst.value(ServiceRoot::BagOfMessages::Starred); @@ -144,12 +152,50 @@ void GreaderNetwork::prepareFeedFetching(GreaderServiceRoot* root, local_starred_ids.unite(QSet(s.begin(), s.end())); } - QSet starred_to_download((remote_starred_ids - local_starred_ids).unite(local_starred_ids - remote_starred_ids)); - QList starred_to_download_list(starred_to_download.begin(), starred_to_download.end()); + auto starred_to_download((remote_starred_ids - local_starred_ids).unite(local_starred_ids - remote_starred_ids)); + auto to_download = starred_to_download; + + if (m_performGlobalFetching) { + qWarningNN << LOGSEC_GREADER << "Performing global contents fetching."; + + auto remote_all_ids_list = itemIds(GREADER_API_FULL_STATE_READING_LIST, false, proxy); + auto remote_unread_ids_list = itemIds(GREADER_API_FULL_STATE_READING_LIST, true, proxy); + + for (int i = 0; i < remote_all_ids_list.size(); i++) { + remote_all_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_all_ids_list.at(i))); + } + + for (int i = 0; i < remote_unread_ids_list.size(); i++) { + remote_unread_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_unread_ids_list.at(i))); + } + + QSet remote_all_ids(remote_all_ids_list.begin(), remote_all_ids_list.end()); + QSet remote_unread_ids(remote_unread_ids_list.begin(), remote_unread_ids_list.end()); + QSet remote_read_ids = remote_all_ids - remote_unread_ids; + QSet local_unread_ids; + QSet local_read_ids; + + for (auto& lst : all_states) { + auto u = lst.value(ServiceRoot::BagOfMessages::Unread); + auto r = lst.value(ServiceRoot::BagOfMessages::Read); + + local_unread_ids.unite(QSet(u.begin(), u.end())); + local_read_ids.unite(QSet(r.begin(), r.end())); + } + + auto not_downloaded = remote_all_ids - local_read_ids - local_unread_ids; + auto moved_unread = local_unread_ids.intersect(remote_read_ids); + auto moved_read = local_read_ids.intersect(remote_unread_ids); + + to_download += not_downloaded + moved_read + moved_unread; + } + else { + qWarningNN << LOGSEC_GREADER << "Performing feed-based contents fetching."; + } Feed::Status error; - m_prefetchedStarredMessages = itemContents(root, starred_to_download_list, error, proxy); + m_prefetchedMessages = itemContents(root, QList(to_download.begin(), to_download.end()), error, proxy); } QList GreaderNetwork::getMessagesIntelligently(ServiceRoot* root, @@ -158,62 +204,61 @@ QList GreaderNetwork::getMessagesIntelligently(ServiceRoot* root, const QHash& tagged_messages, Feed::Status& error, const QNetworkProxy& proxy) { - // 1. Get unread IDs for a feed. - // 2. Get read IDs for a feed. - // 3. Download messages/contents for missing or changed IDs. - // 4. Add prefetched starred msgs. - auto remote_all_ids_list = itemIds(stream_id, false, proxy); - auto remote_unread_ids_list = itemIds(stream_id, true, proxy); + QList msgs; - // Convert item IDs to long form. - for (int i = 0; i < remote_all_ids_list.size(); i++) { - remote_all_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_all_ids_list.at(i))); + if (!m_performGlobalFetching) { + // 1. Get unread IDs for a feed. + // 2. Get read IDs for a feed. + // 3. Download messages/contents for missing or changed IDs. + // 4. Add prefetched starred msgs. + auto remote_all_ids_list = itemIds(stream_id, false, proxy); + auto remote_unread_ids_list = itemIds(stream_id, true, proxy); + + // Convert item IDs to long form. + for (int i = 0; i < remote_all_ids_list.size(); i++) { + remote_all_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_all_ids_list.at(i))); + } + + for (int i = 0; i < remote_unread_ids_list.size(); i++) { + remote_unread_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_unread_ids_list.at(i))); + } + + QSet remote_all_ids(remote_all_ids_list.begin(), remote_all_ids_list.end()); + + // 1. + auto local_unread_ids_list = stated_messages.value(ServiceRoot::BagOfMessages::Unread); + QSet remote_unread_ids(remote_unread_ids_list.begin(), remote_unread_ids_list.end()); + QSet local_unread_ids(local_unread_ids_list.begin(), + local_unread_ids_list.end()); + + // 2. + auto local_read_ids_list = stated_messages.value(ServiceRoot::BagOfMessages::Read); + QSet remote_read_ids = remote_all_ids - remote_unread_ids; + QSet local_read_ids(local_read_ids_list.begin(), + local_read_ids_list.end()); + + // 3. + auto not_downloaded = remote_all_ids - local_read_ids - local_unread_ids; + auto moved_unread = local_unread_ids.intersect(remote_read_ids); + auto moved_read = local_read_ids.intersect(remote_unread_ids); + auto to_download = not_downloaded + moved_read + moved_unread; + QList to_download_list(to_download.begin(), to_download.end()); + + if (!to_download_list.isEmpty()) { + msgs = itemContents(root, to_download_list, error, proxy); + } } - for (int i = 0; i < remote_unread_ids_list.size(); i++) { - remote_unread_ids_list.replace(i, convertShortStreamIdToLongStreamId(remote_unread_ids_list.at(i))); - } - - QSet remote_all_ids(remote_all_ids_list.begin(), remote_all_ids_list.end()); - - //remote_all_ids += QSet(remote_starred_ids_list.begin(), remote_starred_ids_list.end()); - - // 1. - auto local_unread_ids_list = stated_messages.value(ServiceRoot::BagOfMessages::Unread); - QSet remote_unread_ids(remote_unread_ids_list.begin(), remote_unread_ids_list.end()); - QSet local_unread_ids(local_unread_ids_list.begin(), - local_unread_ids_list.end()); - - // 2. - auto local_read_ids_list = stated_messages.value(ServiceRoot::BagOfMessages::Read); - QSet remote_read_ids = remote_all_ids - remote_unread_ids; - QSet local_read_ids(local_read_ids_list.begin(), - local_read_ids_list.end()); - - // 3. - auto not_downloaded = remote_all_ids - local_read_ids - local_unread_ids; - auto moved_unread = local_unread_ids.intersect(remote_read_ids); - auto moved_read = local_read_ids.intersect(remote_unread_ids); - auto to_download = not_downloaded + moved_read + moved_unread; - - if (to_download.isEmpty()) { - return {}; - } - - QList to_download_list(to_download.begin(), to_download.end()); - auto msgs = itemContents(root, to_download_list, error, proxy); - - // Filter out (starred) messages from other feeds. - // TODO: Cache them instead? - for (int i = 0; i < m_prefetchedStarredMessages.size(); i++) { - auto prefetched_msg = m_prefetchedStarredMessages.at(i); + // Add prefetched messages. + for (int i = 0; i < m_prefetchedMessages.size(); i++) { + auto prefetched_msg = m_prefetchedMessages.at(i); if (prefetched_msg.m_feedId == stream_id && !boolinq::from(msgs).any([&prefetched_msg](const Message& ms) { return ms.m_customId == prefetched_msg.m_customId; })) { msgs.append(prefetched_msg); - m_prefetchedStarredMessages.removeAt(i--); + m_prefetchedMessages.removeAt(i--); } } diff --git a/src/librssguard/services/greader/greadernetwork.h b/src/librssguard/services/greader/greadernetwork.h index bdb686bcb..d25132dfa 100755 --- a/src/librssguard/services/greader/greadernetwork.h +++ b/src/librssguard/services/greader/greadernetwork.h @@ -115,7 +115,8 @@ class GreaderNetwork : public QObject { QString m_authSid; QString m_authAuth; QString m_authToken; - QList m_prefetchedStarredMessages; + QList m_prefetchedMessages; + bool m_performGlobalFetching; }; #endif // GREADERNETWORK_H