refactor(greader): incrementally fetch the unread items by difference set (#570)

* refactor(greader): incrementally fetch the unread items by difference set

* refactor(greader): incrementally fetch the unread items by difference set
This commit is contained in:
Ash 2024-01-30 19:49:47 +08:00 committed by GitHub
parent ca9b27a472
commit c08753ef57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 124 additions and 96 deletions

View File

@ -18,6 +18,32 @@ import java.util.Date
@Dao
interface ArticleDao {
@Query(
"""
UPDATE article SET isStarred = :isStarred
WHERE accountId = :accountId
AND id in (:ids)
"""
)
fun markAsStarredByIdSet(
accountId: Int,
ids: Set<String>,
isStarred: Boolean,
): Int
@Query(
"""
UPDATE article SET isUnread = :isUnread
WHERE accountId = :accountId
AND id in (:ids)
"""
)
fun markAsReadByIdSet(
accountId: Int,
ids: Set<String>,
isUnread: Boolean,
): Int
@Transaction
@RewriteQueriesToDropUnusedColumns
@Query(

View File

@ -14,7 +14,6 @@ import me.ash.reader.R
import me.ash.reader.domain.model.account.Account
import me.ash.reader.domain.model.account.security.GoogleReaderSecurityKey
import me.ash.reader.domain.model.article.Article
import me.ash.reader.domain.model.article.ArticleMeta
import me.ash.reader.domain.model.feed.Feed
import me.ash.reader.domain.model.group.Group
import me.ash.reader.domain.repository.AccountDao
@ -185,53 +184,20 @@ class GoogleReaderRssService @Inject constructor(
}
/**
* Google Reader API synchronous processing with object's ID to ensure idempotence
* and handle foreign key relationships such as read status, starred status, etc.
* This is a reference to Reeder's synchronization logic,
* which syncs well across multiple devices.
*
* 1. /reader/api/0/tag/list
* - Full list of categories/folders and tags/labels - and for InnoReader compatibility,
* including the number of unread items in each tags/labels.
* 1. Fetch tags (not supported yet)
* 2. Fetch folder and subscription list
* 3. Fetch all unread item id list
* 4. Fetch all starred item id list
* 5. Fetch unread contents of items with differences
* 6. Fetch starred contents of items with differences
* 7. Fetch read contents of items with differences
* 8. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
*
* 2. /reader/api/0/subscription/list
* - Full list of subscriptions/feeds, including their category/folder.
* - This is where you get a distinction between categories/folders and tags/labels.
*
* 3. /reader/api/0/stream/contents/user/-/state/com.google/reading-list
* (with some filters in parameter to exclude read items with xt,
* and get only the new ones with ot, cf. log below)
* - List of new unread items and their content
* - The response contains among other things the read/unread state,
* the starred/not-starred state, and the tags/labels for each entry.
* - Since this request is very expensive for the client, the network, and the server,
* it is important to use the filters appropriately.
* - If there is no new item since the last synchronisation, the response should be empty,
* and therefore efficient.
*
* 4. /reader/api/0/stream/items/ids
* (with a filter in parameter to exclude read items with xt)
* - Longer list of unread items IDs
* - This allows updating the read/unread status of the local cache of articles - assuming
* the ones not in the list are read.
*
* 5. /reader/api/0/stream/contents/user/-/state/com.google/starred
* (with some filters in parameter to exclude read items with xt,
* and get only the new ones with ot)
* - List of new unread starred items and their content
* - If there is no new unread starred item since the last synchronisation,
* the response should be empty, and therefore efficient
* - This is a bit redundant with request 3 and 6,
* but with the advantage of being able to retrieve a larger amount of unread starred items.
*
* 6. /reader/api/0/stream/contents/user/-/state/com.google/starred
* (with some other filters, which includes read starred items)
* - List of starred items (also read ones) and their content.
*
* 7. /reader/api/0/stream/items/ids
* (with a filter to get only starred ones)
* - Longer list of starred items IDs
* - This allows updating the starred/non-starred status of
* the local cache of articles - assuming the ones not in the list are not starred
* - Similar than request 4 but for the starred status.
* The following link contains other great synchronization logic,
* but it was not adopted due to the solidified domain model of this application.
*
* @link https://github.com/FreshRSS/FreshRSS/issues/2566#issuecomment-541317776
* @link https://github.com/bazqux/bazqux-api?tab=readme-ov-file
@ -243,19 +209,15 @@ class GoogleReaderRssService @Inject constructor(
try {
val preTime = System.currentTimeMillis()
val preDate = Date(preTime)
val accountId = context.currentAccountId
val account = accountDao.queryById(accountId)!!
val googleReaderAPI = getGoogleReaderAPI()
val groupIds = mutableSetOf<String>()
val feedIds = mutableSetOf<String>()
val lastUpdateAt = Calendar.getInstance().apply {
if (account.updateAt != null) {
time = account.updateAt!!
add(Calendar.HOUR, -1)
} else {
time = Date(preTime)
add(Calendar.MONTH, -1)
}
val lastMonthAt = Calendar.getInstance().apply {
time = preDate
add(Calendar.MONTH, -1)
}.time.time / 1000
// 1. Fetch tags (not supported yet)
@ -304,50 +266,90 @@ class GoogleReaderRssService @Inject constructor(
feedDao.update(*it.toTypedArray())
}
// 2. Fetch latest unread item contents since last sync
var unreadIds = fetchItemIdsAndContinue {
googleReaderAPI.getUnreadItemIds(since = lastUpdateAt, continuationId = it)
}
fetchItemsContents(
itemIds = unreadIds,
googleReaderAPI = googleReaderAPI,
accountId = accountId,
feedIds = feedIds,
unreadIds = unreadIds,
starredIds = listOf())
val localAllItems = articleDao.queryMetadataAll(accountId)
val localUnreadIds =
localAllItems.filter { it.isUnread }.map { it.id.dollarLast() }.toSet()
val localStarredIds =
localAllItems.filter { it.isStarred }.map { it.id.dollarLast() }.toSet()
// 3. Fetch all starred item contents
// 3. Fetch all unread item id list
val unreadIds = fetchItemIdsAndContinue {
googleReaderAPI.getUnreadItemIds(continuationId = it)
}.toSet()
Log.i("RLog", "sync unreadIds size: ${unreadIds.size}")
val toBeUnread = unreadIds - localUnreadIds
Log.i("RLog", "sync toBeUnread size: ${toBeUnread.size}")
if (toBeUnread.isNotEmpty()) {
toBeUnread.chunked(999).forEach {
articleDao.markAsReadByIdSet(
accountId = accountId,
ids = it.toSet(),
isUnread = true,
)
}
}
// 4. Fetch all starred item id list
val starredIds = fetchItemIdsAndContinue {
googleReaderAPI.getStarredItemIds(continuationId = it)
}.toSet()
Log.i("RLog", "sync starredIds size: ${starredIds.size}")
val toBeStarred = starredIds - localStarredIds
Log.i("RLog", "sync toBeStarred size: ${toBeStarred.size}")
if (toBeStarred.isNotEmpty()) {
toBeStarred.chunked(999).forEach {
articleDao.markAsStarredByIdSet(
accountId = accountId,
ids = it.toSet(),
isStarred = true,
)
}
}
// 5. Fetch unread contents of items with differences
fetchItemsContents(
itemIds = starredIds,
itemIds = toBeUnread,
googleReaderAPI = googleReaderAPI,
accountId = accountId,
feedIds = feedIds,
unreadIds = toBeUnread,
starredIds = starredIds,
preDate = preDate,
)
// 6. Fetch starred contents of items with differences
fetchItemsContents(
itemIds = toBeStarred,
googleReaderAPI = googleReaderAPI,
accountId = accountId,
feedIds = feedIds,
unreadIds = unreadIds,
starredIds = starredIds
starredIds = toBeStarred,
preDate = preDate,
)
// 4. Mark/unmarked items read/starred (/tagged)
// Fetch all unread item id list
unreadIds = fetchItemIdsAndContinue {
googleReaderAPI.getUnreadItemIds(continuationId = it)
}
val articlesMeta = articleDao.queryMetadataAll(accountId)
for (meta: ArticleMeta in articlesMeta) {
val articleId = meta.id.dollarLast()
val shouldBeRead = !unreadIds.contains(articleId)
val shouldBeUnStarred = !starredIds.contains(articleId)
if (meta.isUnread && shouldBeRead) {
articleDao.markAsReadByArticleId(accountId, meta.id, true)
}
if (meta.isStarred && shouldBeUnStarred) {
articleDao.markAsStarredByArticleId(accountId, meta.id, false)
}
// 7. Fetch read contents of items with differences
val readIds = fetchItemIdsAndContinue {
googleReaderAPI.getReadItemIds(since = lastMonthAt, continuationId = it)
}.toSet()
Log.i("RLog", "sync readIds size: ${readIds.size}")
val localReadIds = articleDao.queryMetadataAll(accountId).filter { !it.isUnread }
.map { it.id.dollarLast() }.toSet()
val toBeRead = readIds - unreadIds - localReadIds
Log.i("RLog", "sync toBeRead size: ${toBeRead.size}")
if (toBeRead.isNotEmpty()) {
fetchItemsContents(
itemIds = toBeRead,
googleReaderAPI = googleReaderAPI,
accountId = accountId,
feedIds = feedIds,
unreadIds = setOf(),
starredIds = starredIds,
preDate = preDate,
)
}
// 5. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
// 8. Remove orphaned groups and feeds, after synchronizing the starred/un-starred
groupDao.queryAll(accountId)
.filter { it.id !in groupIds }
.forEach { super.deleteGroup(it, true) }
@ -355,10 +357,9 @@ class GoogleReaderRssService @Inject constructor(
.filter { it.id !in feedIds }
.forEach { super.deleteFeed(it, true) }
// 6. Record the time of this synchronization
Log.i("RLog", "onCompletion: ${System.currentTimeMillis() - preTime}")
accountDao.update(account.apply {
updateAt = Date(preTime)
updateAt = Date()
})
ListenableWorker.Result.success(SyncWorker.setIsSyncing(false))
} catch (e: Exception) {
@ -381,20 +382,21 @@ class GoogleReaderRssService @Inject constructor(
}
private suspend fun fetchItemsContents(
itemIds: List<String>?,
itemIds: Set<String>,
googleReaderAPI: GoogleReaderAPI,
accountId: Int,
feedIds: MutableSet<String>,
unreadIds: List<String>?,
starredIds: List<String?>?,
unreadIds: Set<String>,
starredIds: Set<String>,
preDate: Date,
) {
itemIds?.chunked(100)?.forEach { chunkedIds ->
itemIds.chunked(100).forEach { chunkedIds ->
articleDao.insert(
*googleReaderAPI.getItemsContents(chunkedIds).items?.map {
val articleId = it.id!!.ofItemStreamIdToId()
Article(
id = accountId.spacerDollar(articleId),
date = it.published?.run { Date(this * 1000) } ?: Date(),
date = it.published?.run { Date(this * 1000) } ?: preDate,
title = it.title.decodeHTML() ?: context.getString(R.string.empty),
author = it.author,
rawDescription = it.summary?.content ?: "",
@ -406,9 +408,9 @@ class GoogleReaderRssService @Inject constructor(
feedId = accountId.spacerDollar(it.origin?.streamId?.ofFeedStreamIdToId()
?: feedIds.first()),
accountId = accountId,
isUnread = unreadIds?.contains(articleId) ?: true,
isStarred = starredIds?.contains(articleId) ?: false,
updateAt = it.crawlTimeMsec?.run { Date(this.toLong()) } ?: Date(),
isUnread = unreadIds.contains(articleId),
isStarred = starredIds.contains(articleId),
updateAt = it.crawlTimeMsec?.run { Date(this.toLong()) } ?: preDate,
)
}?.toTypedArray() ?: emptyArray()
)

View File

@ -122,4 +122,4 @@ object UserAgentInterceptor : Interceptor {
}
}
const val USER_AGENT_STRING = "ReadYou / ${BuildConfig.VERSION_NAME}(${BuildConfig.VERSION_CODE})"
const val USER_AGENT_STRING = "ReadYou/${BuildConfig.VERSION_NAME}(${BuildConfig.VERSION_CODE})"