Add adapter for RSS item parsing

This commit is contained in:
Shinokuni 2020-09-12 19:34:03 +02:00
parent 4bf56b8f7f
commit 20e814f36d
11 changed files with 317 additions and 1 deletions

View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
version="2.0">
<channel>
<item>
<title>title</title>
<link>link</link>
<dc:creator><![CDATA[creator]]></dc:creator>
<dc:date>2020-08-05T14:03:48Z</dc:date>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
<enclosure length="0" type="image/jpg" url="https://image1.jpg" />
<media:content medium="image" url="https://image2.jpg"/>
</item>
</channel>
</rss>

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
version="2.0">
<channel>
<item>
<title>title</title>
<link>link</link>
<dc:creator><![CDATA[creator]]></dc:creator>
<dc:date>2020-08-05T14:03:48Z</dc:date>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
<media:content medium="image" url="https://image2.jpg" />
</item>
</channel>
</rss>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<item>
<title>title</title>
<link>link</link>
<dc:creator><![CDATA[creator]]></dc:creator>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
</item>
</channel>
</rss>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<item>
<title>title</title>
<dc:creator><![CDATA[creator]]></dc:creator>
<dc:date>2020-08-05T14:03:48Z</dc:date>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
</item>
</channel>
</rss>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<item>
<link>link</link>
<dc:creator><![CDATA[creator]]></dc:creator>
<dc:date>2020-08-05T14:03:48Z</dc:date>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
</item>
</channel>
</rss>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<item>
<title>title</title>
<link>link</link>
<guid>guid</guid>
<dc:creator><![CDATA[creator]]></dc:creator>
<dc:date>2020-08-05T14:03:48Z</dc:date>
<category><![CDATA[Category 1]]></category>
<category><![CDATA[Category 2]]></category>
<category><![CDATA[Category 3]]></category>
<category><![CDATA[Category 4]]></category>
<category><![CDATA[Category 5]]></category>
<category><![CDATA[Category 6]]></category>
<guid isPermaLink="false">guid</guid>
<description><![CDATA[description]]></description>
<content:encoded><![CDATA[content:encoded]]></content:encoded>
</item>
</channel>
</rss>

View File

@ -9,6 +9,7 @@
<link>https://www.bbc.com/news/world-africa-53887947</link> <link>https://www.bbc.com/news/world-africa-53887947</link>
<pubDate>Tue, 25 Aug 2020 17:15:49 +0000</pubDate> <pubDate>Tue, 25 Aug 2020 17:15:49 +0000</pubDate>
<comments>https://news.ycombinator.com/item?id=24273602</comments> <comments>https://news.ycombinator.com/item?id=24273602</comments>
<author>Author 1</author>
<description><![CDATA[<a href="https://news.ycombinator.com/item?id=24273602">Comments</a>]]></description> <description><![CDATA[<a href="https://news.ycombinator.com/item?id=24273602">Comments</a>]]></description>
</item> </item>
<item> <item>

View File

@ -0,0 +1,82 @@
package com.readrops.api.localfeed.rss
import android.content.Context
import androidx.test.ext.junit.runners.AndroidJUnit4
import androidx.test.platform.app.InstrumentationRegistry
import com.readrops.api.utils.DateUtils
import com.readrops.api.utils.ParseException
import junit.framework.TestCase.assertEquals
import junit.framework.TestCase.assertNotNull
import org.junit.Assert
import org.junit.Test
import org.junit.runner.RunWith
@RunWith(AndroidJUnit4::class)
class RSSItemsAdapterTest {
private val context: Context = InstrumentationRegistry.getInstrumentation().context
private val adapter = RSSItemsAdapter()
@Test
fun normalCasesTest() {
val stream = context.resources.assets.open("localfeed/rss_feed.xml")
val items = adapter.fromXml(stream)
assertEquals(items.size, 7)
val item = items[0]
assertEquals(item.title, "Africa declared free of wild polio")
assertEquals(item.link, "https://www.bbc.com/news/world-africa-53887947")
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("Tue, 25 Aug 2020 17:15:49 +0000"))
assertEquals(item.author, "Author 1")
assertNotNull(item.description)
assertEquals(item.guid, "https://www.bbc.com/news/world-africa-53887947")
}
@Test
fun otherNamespacesTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_other_namespaces.xml")
val item = adapter.fromXml(stream)[0]
assertEquals(item.guid, "guid")
assertEquals(item.author, "creator")
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("2020-08-05T14:03:48Z"))
assertEquals(item.content, "content:encoded")
}
@Test
fun noTitleTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_title.xml")
Assert.assertThrows("Item title can't be null", ParseException::class.java) { adapter.fromXml(stream) }
}
@Test
fun noLinkTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_link.xml")
Assert.assertThrows("Item link can't be null", ParseException::class.java) { adapter.fromXml(stream) }
}
@Test
fun noDateTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_date.xml")
Assert.assertThrows("Item date can't be null", ParseException::class.java) { adapter.fromXml(stream) }
}
@Test
fun enclosureTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_enclosure.xml")
val item = adapter.fromXml(stream)[0]
assertEquals(item.imageLink, "https://image1.jpg")
}
@Test
fun mediaContentTest() {
val stream = context.resources.assets.open("localfeed/rss/rss_items_media_content.xml")
val item = adapter.fromXml(stream)[0]
assertEquals(item.imageLink, "https://image2.jpg")
}
}

View File

@ -97,6 +97,13 @@ class LocalRSSDataSource(private val httpClient: OkHttpClient) {
} }
private fun parseItems(inputStream: InputStream, type: LocalRSSHelper.RSSType): List<Item> { private fun parseItems(inputStream: InputStream, type: LocalRSSHelper.RSSType): List<Item> {
return listOf() return if (type != LocalRSSHelper.RSSType.JSONFEED) {
val adapter = XmlAdapter.xmlItemsAdapterFactory(type)
//adapter.fromXml(inputStream)
listOf()
} else {
listOf()
}
} }
} }

View File

@ -1,7 +1,9 @@
package com.readrops.api.localfeed package com.readrops.api.localfeed
import com.readrops.api.localfeed.rss.RSSFeedAdapter import com.readrops.api.localfeed.rss.RSSFeedAdapter
import com.readrops.api.localfeed.rss.RSSItemsAdapter
import com.readrops.db.entities.Feed import com.readrops.db.entities.Feed
import com.readrops.db.entities.Item
import java.io.InputStream import java.io.InputStream
interface XmlAdapter<T> { interface XmlAdapter<T> {
@ -15,6 +17,13 @@ interface XmlAdapter<T> {
else -> throw Exception("Unknown RSS type : $type") else -> throw Exception("Unknown RSS type : $type")
} }
} }
fun xmlItemsAdapterFactory(type: LocalRSSHelper.RSSType): XmlAdapter<List<Item>> {
return when (type) {
LocalRSSHelper.RSSType.RSS_2 -> RSSItemsAdapter()
else -> throw Exception("Unknown RSS type : $type")
}
}
} }
} }

View File

@ -0,0 +1,82 @@
package com.readrops.api.localfeed.rss
import com.gitlab.mvysny.konsumexml.*
import com.readrops.api.localfeed.XmlAdapter
import com.readrops.api.utils.*
import com.readrops.db.entities.Item
import java.io.InputStream
class RSSItemsAdapter : XmlAdapter<List<Item>> {
override fun fromXml(inputStream: InputStream): List<Item> {
val konsume = inputStream.konsumeXml()
val items = mutableListOf<Item>()
return try {
konsume.child("rss") {
child("channel") {
allChildrenAutoIgnore("item") {
val enclosures = arrayListOf<String>()
val mediaContents = arrayListOf<String>()
val item = Item().apply {
allChildrenAutoIgnore(names) {
when (tagName) {
"title" -> title = nonNullText()
"link" -> link = nonNullText()
"author" -> author = nullableText()
"dc:creator" -> author = nullableText()
"pubDate" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
"dc:date" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
"guid" -> guid = nullableText()
"description" -> description = nullableText()
"content:encoded" -> content = nullableText()
"enclosure" -> parseEnclosure(this, enclosures)
"media:content" -> parseMediaContent(this, mediaContents)
}
}
}
validateItem(item)
if (item.guid == null) item.guid = item.link
if (enclosures.isNotEmpty()) item.imageLink = enclosures.first()
else if (mediaContents.isNotEmpty()) item.imageLink = mediaContents.first()
items += item
}
}
}
konsume.close()
items
} catch (e: KonsumerException) {
throw ParseException(e.message)
}
}
private fun parseEnclosure(konsume: Konsumer, enclosures: MutableList<String>) {
if (konsume.attributes.getValueOpt("type") != null
&& LibUtils.isMimeImage(konsume.attributes["type"]))
enclosures += konsume.attributes["url"]
}
private fun parseMediaContent(konsume: Konsumer, mediaContents: MutableList<String>) {
if (konsume.attributes.getValueOpt("medium") != null
&& LibUtils.isMimeImage(konsume.attributes["medium"]))
mediaContents += konsume.attributes["url"]
}
private fun validateItem(item: Item) {
when {
item.title == null -> throw ParseException("Item title can't be null")
item.link == null -> throw ParseException("Item link can't be null")
item.pubDate == null -> throw ParseException("Item date can't be null")
}
}
companion object {
val names = Names.of("title", "link", "author", "creator", "pubDate", "date",
"guid", "description", "encoded", "enclosure", "content")
}
}