refactor(greader): incrementally fetch the unread items by difference set (#570)

* refactor(greader): incrementally fetch the unread items by difference set * refactor(greader): incrementally fetch the unread items by difference set
2024-01-30 19:49:47 +08:00 · 2024-01-30 19:49:47 +08:00 · c08753ef57
commit c08753ef57
parent ca9b27a472
3 changed files with 124 additions and 96 deletions
--- a/app/src/main/java/me/ash/reader/domain/repository/ArticleDao.kt
+++ b/app/src/main/java/me/ash/reader/domain/repository/ArticleDao.kt
@ -18,6 +18,32 @@ import java.util.Date
@Dao
 interface ArticleDao {

+    @Query(
+        """
+        UPDATE article SET isStarred = :isStarred 
+        WHERE accountId = :accountId
+        AND id in (:ids)
+        """
+    )
+    fun markAsStarredByIdSet(
+        accountId: Int,
+        ids: Set<String>,
+        isStarred: Boolean,
+    ): Int
+
+    @Query(
+        """
+        UPDATE article SET isUnread = :isUnread 
+        WHERE accountId = :accountId
+        AND id in (:ids)
+        """
+    )
+    fun markAsReadByIdSet(
+        accountId: Int,
+        ids: Set<String>,
+        isUnread: Boolean,
+    ): Int
+
    @Transaction
    @RewriteQueriesToDropUnusedColumns
    @Query(
--- a/app/src/main/java/me/ash/reader/domain/service/GoogleReaderRssService.kt
+++ b/app/src/main/java/me/ash/reader/domain/service/GoogleReaderRssService.kt
@ -14,7 +14,6 @@ import me.ash.reader.R
 import me.ash.reader.domain.model.account.Account
 import me.ash.reader.domain.model.account.security.GoogleReaderSecurityKey
 import me.ash.reader.domain.model.article.Article
-import me.ash.reader.domain.model.article.ArticleMeta
 import me.ash.reader.domain.model.feed.Feed
 import me.ash.reader.domain.model.group.Group
 import me.ash.reader.domain.repository.AccountDao
@ -185,53 +184,20 @@ class GoogleReaderRssService @Inject constructor(
    }

    /**
-     * Google Reader API synchronous processing with object's ID to ensure idempotence
-     * and handle foreign key relationships such as read status, starred status, etc.
+     * This is a reference to Reeder's synchronization logic,
+     * which syncs well across multiple devices.
     *
-     * 1. /reader/api/0/tag/list
-     *     - Full list of categories/folders and tags/labels - and for InnoReader compatibility,
-     *     including the number of unread items in each tags/labels.
+     * 1. Fetch tags (not supported yet)
+     * 2. Fetch folder and subscription list
+     * 3. Fetch all unread item id list
+     * 4. Fetch all starred item id list
+     * 5. Fetch unread contents of items with differences
+     * 6. Fetch starred contents of items with differences
+     * 7. Fetch read contents of items with differences
+     * 8. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
     *
-     * 2. /reader/api/0/subscription/list
-     *     - Full list of subscriptions/feeds, including their category/folder.
-     *     - This is where you get a distinction between categories/folders and tags/labels.
-     *
-     * 3. /reader/api/0/stream/contents/user/-/state/com.google/reading-list
-     * (with some filters in parameter to exclude read items with xt,
-     * and get only the new ones with ot, cf. log below)
-     *     - List of new unread items and their content
-     *     - The response contains among other things the read/unread state,
-     *     the starred/not-starred state, and the tags/labels for each entry.
-     *     - Since this request is very expensive for the client, the network, and the server,
-     *     it is important to use the filters appropriately.
-     *     - If there is no new item since the last synchronisation, the response should be empty,
-     *     and therefore efficient.
-     *
-     * 4. /reader/api/0/stream/items/ids
-     * (with a filter in parameter to exclude read items with xt)
-     *     - Longer list of unread items IDs
-     *     - This allows updating the read/unread status of the local cache of articles - assuming
-     *     the ones not in the list are read.
-     *
-     * 5. /reader/api/0/stream/contents/user/-/state/com.google/starred
-     * (with some filters in parameter to exclude read items with xt,
-     * and get only the new ones with ot)
-     *     - List of new unread starred items and their content
-     *     - If there is no new unread starred item since the last synchronisation,
-     *     the response should be empty, and therefore efficient
-     *     - This is a bit redundant with request 3 and 6,
-     *     but with the advantage of being able to retrieve a larger amount of unread starred items.
-     *
-     * 6. /reader/api/0/stream/contents/user/-/state/com.google/starred
-     * (with some other filters, which includes read starred items)
-     *     - List of starred items (also read ones) and their content.
-     *
-     * 7. /reader/api/0/stream/items/ids
-     * (with a filter to get only starred ones)
-     *     - Longer list of starred items IDs
-     *     - This allows updating the starred/non-starred status of
-     *     the local cache of articles - assuming the ones not in the list are not starred
-     *     - Similar than request 4 but for the starred status.
+     * The following link contains other great synchronization logic,
+     * but it was not adopted due to the solidified domain model of this application.
     *
     * @link https://github.com/FreshRSS/FreshRSS/issues/2566#issuecomment-541317776
     * @link https://github.com/bazqux/bazqux-api?tab=readme-ov-file
@ -243,19 +209,15 @@ class GoogleReaderRssService @Inject constructor(

            try {
                val preTime = System.currentTimeMillis()
+                val preDate = Date(preTime)
                val accountId = context.currentAccountId
                val account = accountDao.queryById(accountId)!!
                val googleReaderAPI = getGoogleReaderAPI()
                val groupIds = mutableSetOf<String>()
                val feedIds = mutableSetOf<String>()
-                val lastUpdateAt = Calendar.getInstance().apply {
-                    if (account.updateAt != null) {
-                        time = account.updateAt!!
-                        add(Calendar.HOUR, -1)
-                    } else {
-                        time = Date(preTime)
-                        add(Calendar.MONTH, -1)
-                    }
+                val lastMonthAt = Calendar.getInstance().apply {
+                    time = preDate
+                    add(Calendar.MONTH, -1)
                }.time.time / 1000

                // 1. Fetch tags (not supported yet)
@ -304,50 +266,90 @@ class GoogleReaderRssService @Inject constructor(
                    feedDao.update(*it.toTypedArray())
                }

-                // 2. Fetch latest unread item contents since last sync
-                var unreadIds = fetchItemIdsAndContinue {
-                    googleReaderAPI.getUnreadItemIds(since = lastUpdateAt, continuationId = it)
-                }
-                fetchItemsContents(
-                    itemIds = unreadIds,
-                    googleReaderAPI = googleReaderAPI,
-                    accountId = accountId,
-                    feedIds = feedIds,
-                    unreadIds = unreadIds,
-                    starredIds = listOf())
+                val localAllItems = articleDao.queryMetadataAll(accountId)
+                val localUnreadIds =
+                    localAllItems.filter { it.isUnread }.map { it.id.dollarLast() }.toSet()
+                val localStarredIds =
+                    localAllItems.filter { it.isStarred }.map { it.id.dollarLast() }.toSet()

-                // 3. Fetch all starred item contents
+                // 3. Fetch all unread item id list
+                val unreadIds = fetchItemIdsAndContinue {
+                    googleReaderAPI.getUnreadItemIds(continuationId = it)
+                }.toSet()
+                Log.i("RLog", "sync unreadIds size: ${unreadIds.size}")
+                val toBeUnread = unreadIds - localUnreadIds
+                Log.i("RLog", "sync toBeUnread size: ${toBeUnread.size}")
+                if (toBeUnread.isNotEmpty()) {
+                    toBeUnread.chunked(999).forEach {
+                        articleDao.markAsReadByIdSet(
+                            accountId = accountId,
+                            ids = it.toSet(),
+                            isUnread = true,
+                        )
+                    }
+                }
+
+                // 4. Fetch all starred item id list
                val starredIds = fetchItemIdsAndContinue {
                    googleReaderAPI.getStarredItemIds(continuationId = it)
+                }.toSet()
+                Log.i("RLog", "sync starredIds size: ${starredIds.size}")
+                val toBeStarred = starredIds - localStarredIds
+                Log.i("RLog", "sync toBeStarred size: ${toBeStarred.size}")
+                if (toBeStarred.isNotEmpty()) {
+                    toBeStarred.chunked(999).forEach {
+                        articleDao.markAsStarredByIdSet(
+                            accountId = accountId,
+                            ids = it.toSet(),
+                            isStarred = true,
+                        )
+                    }
                }
+
+                // 5. Fetch unread contents of items with differences
                fetchItemsContents(
-                    itemIds = starredIds,
+                    itemIds = toBeUnread,
+                    googleReaderAPI = googleReaderAPI,
+                    accountId = accountId,
+                    feedIds = feedIds,
+                    unreadIds = toBeUnread,
+                    starredIds = starredIds,
+                    preDate = preDate,
+                )
+
+                // 6. Fetch starred contents of items with differences
+                fetchItemsContents(
+                    itemIds = toBeStarred,
                    googleReaderAPI = googleReaderAPI,
                    accountId = accountId,
                    feedIds = feedIds,
                    unreadIds = unreadIds,
-                    starredIds = starredIds
+                    starredIds = toBeStarred,
+                    preDate = preDate,
                )

-                // 4. Mark/unmarked items read/starred (/tagged)
-                // Fetch all unread item id list
-                unreadIds = fetchItemIdsAndContinue {
-                    googleReaderAPI.getUnreadItemIds(continuationId = it)
-                }
-                val articlesMeta = articleDao.queryMetadataAll(accountId)
-                for (meta: ArticleMeta in articlesMeta) {
-                    val articleId = meta.id.dollarLast()
-                    val shouldBeRead = !unreadIds.contains(articleId)
-                    val shouldBeUnStarred = !starredIds.contains(articleId)
-                    if (meta.isUnread && shouldBeRead) {
-                        articleDao.markAsReadByArticleId(accountId, meta.id, true)
-                    }
-                    if (meta.isStarred && shouldBeUnStarred) {
-                        articleDao.markAsStarredByArticleId(accountId, meta.id, false)
-                    }
+                // 7. Fetch read contents of items with differences
+                val readIds = fetchItemIdsAndContinue {
+                    googleReaderAPI.getReadItemIds(since = lastMonthAt, continuationId = it)
+                }.toSet()
+                Log.i("RLog", "sync readIds size: ${readIds.size}")
+                val localReadIds = articleDao.queryMetadataAll(accountId).filter { !it.isUnread }
+                    .map { it.id.dollarLast() }.toSet()
+                val toBeRead = readIds - unreadIds - localReadIds
+                Log.i("RLog", "sync toBeRead size: ${toBeRead.size}")
+                if (toBeRead.isNotEmpty()) {
+                    fetchItemsContents(
+                        itemIds = toBeRead,
+                        googleReaderAPI = googleReaderAPI,
+                        accountId = accountId,
+                        feedIds = feedIds,
+                        unreadIds = setOf(),
+                        starredIds = starredIds,
+                        preDate = preDate,
+                    )
                }

-                // 5. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
+                // 8. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
                groupDao.queryAll(accountId)
                    .filter { it.id !in groupIds }
                    .forEach { super.deleteGroup(it, true) }
@ -355,10 +357,9 @@ class GoogleReaderRssService @Inject constructor(
                    .filter { it.id !in feedIds }
                    .forEach { super.deleteFeed(it, true) }

-                // 6. Record the time of this synchronization
                Log.i("RLog", "onCompletion: ${System.currentTimeMillis() - preTime}")
                accountDao.update(account.apply {
-                    updateAt = Date(preTime)
+                    updateAt = Date()
                })
                ListenableWorker.Result.success(SyncWorker.setIsSyncing(false))
            } catch (e: Exception) {
@ -381,20 +382,21 @@ class GoogleReaderRssService @Inject constructor(
    }

    private suspend fun fetchItemsContents(
-        itemIds: List<String>?,
+        itemIds: Set<String>,
        googleReaderAPI: GoogleReaderAPI,
        accountId: Int,
        feedIds: MutableSet<String>,
-        unreadIds: List<String>?,
-        starredIds: List<String?>?,
+        unreadIds: Set<String>,
+        starredIds: Set<String>,
+        preDate: Date,
    ) {
-        itemIds?.chunked(100)?.forEach { chunkedIds ->
+        itemIds.chunked(100).forEach { chunkedIds ->
            articleDao.insert(
                *googleReaderAPI.getItemsContents(chunkedIds).items?.map {
                    val articleId = it.id!!.ofItemStreamIdToId()
                    Article(
                        id = accountId.spacerDollar(articleId),
-                        date = it.published?.run { Date(this * 1000) } ?: Date(),
+                        date = it.published?.run { Date(this * 1000) } ?: preDate,
                        title = it.title.decodeHTML() ?: context.getString(R.string.empty),
                        author = it.author,
                        rawDescription = it.summary?.content ?: "",
@ -406,9 +408,9 @@ class GoogleReaderRssService @Inject constructor(
                        feedId = accountId.spacerDollar(it.origin?.streamId?.ofFeedStreamIdToId()
                            ?: feedIds.first()),
                        accountId = accountId,
-                        isUnread = unreadIds?.contains(articleId) ?: true,
-                        isStarred = starredIds?.contains(articleId) ?: false,
-                        updateAt = it.crawlTimeMsec?.run { Date(this.toLong()) } ?: Date(),
+                        isUnread = unreadIds.contains(articleId),
+                        isStarred = starredIds.contains(articleId),
+                        updateAt = it.crawlTimeMsec?.run { Date(this.toLong()) } ?: preDate,
                    )
                }?.toTypedArray() ?: emptyArray()
            )
--- a/app/src/main/java/me/ash/reader/infrastructure/di/OkHttpClientModule.kt
+++ b/app/src/main/java/me/ash/reader/infrastructure/di/OkHttpClientModule.kt
@ -122,4 +122,4 @@ object UserAgentInterceptor : Interceptor {
    }
 }

-const val USER_AGENT_STRING = "ReadYou / ${BuildConfig.VERSION_NAME}(${BuildConfig.VERSION_CODE})"
+const val USER_AGENT_STRING = "ReadYou/${BuildConfig.VERSION_NAME}(${BuildConfig.VERSION_CODE})"