Guess RSS type by using konsume-xml instead of reading content by hand

This commit is contained in:
Shinokuni 2021-08-15 16:28:17 +02:00
parent 1533f67de8
commit 655ab2bc83
3 changed files with 41 additions and 4 deletions

View File

@ -2,6 +2,7 @@ package com.readrops.api.localfeed
import android.accounts.NetworkErrorException
import androidx.annotation.WorkerThread
import com.gitlab.mvysny.konsumexml.konsumeXml
import com.readrops.api.localfeed.json.JSONFeedAdapter
import com.readrops.api.localfeed.json.JSONItemsAdapter
import com.readrops.api.utils.ApiUtils
@ -49,10 +50,17 @@ class LocalRSSDataSource(private val httpClient: OkHttpClient): KoinComponent {
var type = LocalRSSHelper.getRSSType(contentType)
val bodyArray = response.peekBody(Long.MAX_VALUE).bytes()
val konsumer = ByteArrayInputStream(bodyArray).konsumeXml()
// if we can't guess type based on content-type header, we use the content
if (type == LocalRSSHelper.RSSType.UNKNOWN)
type = LocalRSSHelper.getRSSContentType(ByteArrayInputStream(bodyArray))
if (type == LocalRSSHelper.RSSType.UNKNOWN) {
val rootKonsumer = konsumer.nextElement(LocalRSSHelper.RSS_ROOT_NAMES)
if (rootKonsumer != null) {
type = LocalRSSHelper.guessRSSType(rootKonsumer)
konsumer.close()
}
}
// if we can't guess type even with the content, we are unable to go further
if (type == LocalRSSHelper.RSSType.UNKNOWN) throw UnknownFormatException("Unable to guess $url RSS type")
@ -85,8 +93,15 @@ class LocalRSSDataSource(private val httpClient: OkHttpClient): KoinComponent {
var type = LocalRSSHelper.getRSSType(contentType)
if (type == LocalRSSHelper.RSSType.UNKNOWN)
type = LocalRSSHelper.getRSSContentType(response.body?.byteStream()!!) // stream is closed in helper method
if (type == LocalRSSHelper.RSSType.UNKNOWN) {
val konsumer = response.body!!.byteStream().konsumeXml().apply {
val rootKonsumer = nextElement(LocalRSSHelper.RSS_ROOT_NAMES)
rootKonsumer?.let { type = LocalRSSHelper.guessRSSType(rootKonsumer) }
}
konsumer.close()
}
type != LocalRSSHelper.RSSType.UNKNOWN
} else false

View File

@ -1,5 +1,8 @@
package com.readrops.api.localfeed
import com.gitlab.mvysny.konsumexml.Konsumer
import com.gitlab.mvysny.konsumexml.Names
import com.readrops.api.utils.extensions.checkRoot
import java.io.InputStream
object LocalRSSHelper {
@ -14,6 +17,11 @@ object LocalRSSHelper {
private const val RSS_2_REGEX = "rss.*version=\"2.0\""
private const val ATOM_REGEX = "<feed.* xmlns=\"http://www.w3.org/2005/Atom\""
const val RSS_1_ROOT_NAME = "rdf"
const val RSS_2_ROOT_NAME = "rss"
const val ATOM_ROOT_NAME = "feed"
val RSS_ROOT_NAMES = Names.of(RSS_1_ROOT_NAME, RSS_2_ROOT_NAME, ATOM_ROOT_NAME)
/**
* Guess RSS type based on content-type header
*/
@ -56,6 +64,13 @@ object LocalRSSHelper {
return if (type != null) getRSSType(type) != RSSType.UNKNOWN else false
}
fun guessRSSType(konsumer: Konsumer): RSSType = when {
konsumer.checkRoot(RSS_1_ROOT_NAME) -> RSSType.RSS_1
konsumer.checkRoot(RSS_2_ROOT_NAME) -> RSSType.RSS_2
konsumer.checkRoot(ATOM_ROOT_NAME) -> RSSType.ATOM
else -> RSSType.UNKNOWN
}
enum class RSSType {
RSS_1,
RSS_2,

View File

@ -18,4 +18,11 @@ fun Konsumer.nullableText(): String? {
fun Konsumer.nullableTextRecursively(): String? {
val text = textRecursively()
return if (text.isNotEmpty()) text.trim() else null
}
fun Konsumer.checkRoot(name: String): Boolean = try {
checkCurrent(name)
true
} catch (e: Exception) {
false
}