mirror of https://github.com/readrops/Readrops.git
Add adapter for RSS item parsing
This commit is contained in:
parent
4bf56b8f7f
commit
20e814f36d
|
@ -0,0 +1,25 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
|
||||||
|
version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>title</title>
|
||||||
|
<link>link</link>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
<enclosure length="0" type="image/jpg" url="https://image1.jpg" />
|
||||||
|
<media:content medium="image" url="https://image2.jpg"/>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -0,0 +1,24 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
|
||||||
|
version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>title</title>
|
||||||
|
<link>link</link>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
<media:content medium="image" url="https://image2.jpg" />
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>title</title>
|
||||||
|
<link>link</link>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>title</title>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<link>link</link>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -0,0 +1,23 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>title</title>
|
||||||
|
<link>link</link>
|
||||||
|
<guid>guid</guid>
|
||||||
|
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||||
|
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||||
|
<category><![CDATA[Category 1]]></category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
<category><![CDATA[Category 3]]></category>
|
||||||
|
<category><![CDATA[Category 4]]></category>
|
||||||
|
<category><![CDATA[Category 5]]></category>
|
||||||
|
<category><![CDATA[Category 6]]></category>
|
||||||
|
<guid isPermaLink="false">guid</guid>
|
||||||
|
|
||||||
|
<description><![CDATA[description]]></description>
|
||||||
|
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -9,6 +9,7 @@
|
||||||
<link>https://www.bbc.com/news/world-africa-53887947</link>
|
<link>https://www.bbc.com/news/world-africa-53887947</link>
|
||||||
<pubDate>Tue, 25 Aug 2020 17:15:49 +0000</pubDate>
|
<pubDate>Tue, 25 Aug 2020 17:15:49 +0000</pubDate>
|
||||||
<comments>https://news.ycombinator.com/item?id=24273602</comments>
|
<comments>https://news.ycombinator.com/item?id=24273602</comments>
|
||||||
|
<author>Author 1</author>
|
||||||
<description><![CDATA[<a href="https://news.ycombinator.com/item?id=24273602">Comments</a>]]></description>
|
<description><![CDATA[<a href="https://news.ycombinator.com/item?id=24273602">Comments</a>]]></description>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
package com.readrops.api.localfeed.rss
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
|
import androidx.test.ext.junit.runners.AndroidJUnit4
|
||||||
|
import androidx.test.platform.app.InstrumentationRegistry
|
||||||
|
import com.readrops.api.utils.DateUtils
|
||||||
|
import com.readrops.api.utils.ParseException
|
||||||
|
import junit.framework.TestCase.assertEquals
|
||||||
|
import junit.framework.TestCase.assertNotNull
|
||||||
|
import org.junit.Assert
|
||||||
|
import org.junit.Test
|
||||||
|
import org.junit.runner.RunWith
|
||||||
|
|
||||||
|
@RunWith(AndroidJUnit4::class)
|
||||||
|
class RSSItemsAdapterTest {
|
||||||
|
|
||||||
|
private val context: Context = InstrumentationRegistry.getInstrumentation().context
|
||||||
|
|
||||||
|
private val adapter = RSSItemsAdapter()
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun normalCasesTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss_feed.xml")
|
||||||
|
|
||||||
|
val items = adapter.fromXml(stream)
|
||||||
|
assertEquals(items.size, 7)
|
||||||
|
|
||||||
|
val item = items[0]
|
||||||
|
|
||||||
|
assertEquals(item.title, "Africa declared free of wild polio")
|
||||||
|
assertEquals(item.link, "https://www.bbc.com/news/world-africa-53887947")
|
||||||
|
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("Tue, 25 Aug 2020 17:15:49 +0000"))
|
||||||
|
assertEquals(item.author, "Author 1")
|
||||||
|
assertNotNull(item.description)
|
||||||
|
assertEquals(item.guid, "https://www.bbc.com/news/world-africa-53887947")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun otherNamespacesTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_other_namespaces.xml")
|
||||||
|
val item = adapter.fromXml(stream)[0]
|
||||||
|
|
||||||
|
assertEquals(item.guid, "guid")
|
||||||
|
assertEquals(item.author, "creator")
|
||||||
|
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("2020-08-05T14:03:48Z"))
|
||||||
|
assertEquals(item.content, "content:encoded")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun noTitleTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_title.xml")
|
||||||
|
Assert.assertThrows("Item title can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun noLinkTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_link.xml")
|
||||||
|
Assert.assertThrows("Item link can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun noDateTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_date.xml")
|
||||||
|
Assert.assertThrows("Item date can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun enclosureTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_enclosure.xml")
|
||||||
|
val item = adapter.fromXml(stream)[0]
|
||||||
|
|
||||||
|
assertEquals(item.imageLink, "https://image1.jpg")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun mediaContentTest() {
|
||||||
|
val stream = context.resources.assets.open("localfeed/rss/rss_items_media_content.xml")
|
||||||
|
val item = adapter.fromXml(stream)[0]
|
||||||
|
|
||||||
|
assertEquals(item.imageLink, "https://image2.jpg")
|
||||||
|
}
|
||||||
|
}
|
|
@ -97,6 +97,13 @@ class LocalRSSDataSource(private val httpClient: OkHttpClient) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun parseItems(inputStream: InputStream, type: LocalRSSHelper.RSSType): List<Item> {
|
private fun parseItems(inputStream: InputStream, type: LocalRSSHelper.RSSType): List<Item> {
|
||||||
return listOf()
|
return if (type != LocalRSSHelper.RSSType.JSONFEED) {
|
||||||
|
val adapter = XmlAdapter.xmlItemsAdapterFactory(type)
|
||||||
|
|
||||||
|
//adapter.fromXml(inputStream)
|
||||||
|
listOf()
|
||||||
|
} else {
|
||||||
|
listOf()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,7 +1,9 @@
|
||||||
package com.readrops.api.localfeed
|
package com.readrops.api.localfeed
|
||||||
|
|
||||||
import com.readrops.api.localfeed.rss.RSSFeedAdapter
|
import com.readrops.api.localfeed.rss.RSSFeedAdapter
|
||||||
|
import com.readrops.api.localfeed.rss.RSSItemsAdapter
|
||||||
import com.readrops.db.entities.Feed
|
import com.readrops.db.entities.Feed
|
||||||
|
import com.readrops.db.entities.Item
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
interface XmlAdapter<T> {
|
interface XmlAdapter<T> {
|
||||||
|
@ -15,6 +17,13 @@ interface XmlAdapter<T> {
|
||||||
else -> throw Exception("Unknown RSS type : $type")
|
else -> throw Exception("Unknown RSS type : $type")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun xmlItemsAdapterFactory(type: LocalRSSHelper.RSSType): XmlAdapter<List<Item>> {
|
||||||
|
return when (type) {
|
||||||
|
LocalRSSHelper.RSSType.RSS_2 -> RSSItemsAdapter()
|
||||||
|
else -> throw Exception("Unknown RSS type : $type")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
package com.readrops.api.localfeed.rss
|
||||||
|
|
||||||
|
import com.gitlab.mvysny.konsumexml.*
|
||||||
|
import com.readrops.api.localfeed.XmlAdapter
|
||||||
|
import com.readrops.api.utils.*
|
||||||
|
import com.readrops.db.entities.Item
|
||||||
|
import java.io.InputStream
|
||||||
|
|
||||||
|
class RSSItemsAdapter : XmlAdapter<List<Item>> {
|
||||||
|
|
||||||
|
override fun fromXml(inputStream: InputStream): List<Item> {
|
||||||
|
val konsume = inputStream.konsumeXml()
|
||||||
|
val items = mutableListOf<Item>()
|
||||||
|
|
||||||
|
return try {
|
||||||
|
konsume.child("rss") {
|
||||||
|
child("channel") {
|
||||||
|
allChildrenAutoIgnore("item") {
|
||||||
|
val enclosures = arrayListOf<String>()
|
||||||
|
val mediaContents = arrayListOf<String>()
|
||||||
|
|
||||||
|
val item = Item().apply {
|
||||||
|
allChildrenAutoIgnore(names) {
|
||||||
|
when (tagName) {
|
||||||
|
"title" -> title = nonNullText()
|
||||||
|
"link" -> link = nonNullText()
|
||||||
|
"author" -> author = nullableText()
|
||||||
|
"dc:creator" -> author = nullableText()
|
||||||
|
"pubDate" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
|
||||||
|
"dc:date" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
|
||||||
|
"guid" -> guid = nullableText()
|
||||||
|
"description" -> description = nullableText()
|
||||||
|
"content:encoded" -> content = nullableText()
|
||||||
|
"enclosure" -> parseEnclosure(this, enclosures)
|
||||||
|
"media:content" -> parseMediaContent(this, mediaContents)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateItem(item)
|
||||||
|
if (item.guid == null) item.guid = item.link
|
||||||
|
|
||||||
|
if (enclosures.isNotEmpty()) item.imageLink = enclosures.first()
|
||||||
|
else if (mediaContents.isNotEmpty()) item.imageLink = mediaContents.first()
|
||||||
|
|
||||||
|
items += item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
konsume.close()
|
||||||
|
items
|
||||||
|
} catch (e: KonsumerException) {
|
||||||
|
throw ParseException(e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun parseEnclosure(konsume: Konsumer, enclosures: MutableList<String>) {
|
||||||
|
if (konsume.attributes.getValueOpt("type") != null
|
||||||
|
&& LibUtils.isMimeImage(konsume.attributes["type"]))
|
||||||
|
enclosures += konsume.attributes["url"]
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun parseMediaContent(konsume: Konsumer, mediaContents: MutableList<String>) {
|
||||||
|
if (konsume.attributes.getValueOpt("medium") != null
|
||||||
|
&& LibUtils.isMimeImage(konsume.attributes["medium"]))
|
||||||
|
mediaContents += konsume.attributes["url"]
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun validateItem(item: Item) {
|
||||||
|
when {
|
||||||
|
item.title == null -> throw ParseException("Item title can't be null")
|
||||||
|
item.link == null -> throw ParseException("Item link can't be null")
|
||||||
|
item.pubDate == null -> throw ParseException("Item date can't be null")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
val names = Names.of("title", "link", "author", "creator", "pubDate", "date",
|
||||||
|
"guid", "description", "encoded", "enclosure", "content")
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue