mirror of https://github.com/readrops/Readrops.git
Add adapter for RSS item parsing
This commit is contained in:
parent
4bf56b8f7f
commit
20e814f36d
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
|
||||
version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
<link>link</link>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
<enclosure length="0" type="image/jpg" url="https://image1.jpg" />
|
||||
<media:content medium="image" url="https://image2.jpg"/>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,24 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/"
|
||||
version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
<link>link</link>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
<media:content medium="image" url="https://image2.jpg" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
<link>link</link>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<link>link</link>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
<link>link</link>
|
||||
<guid>guid</guid>
|
||||
<dc:creator><![CDATA[creator]]></dc:creator>
|
||||
<dc:date>2020-08-05T14:03:48Z</dc:date>
|
||||
<category><![CDATA[Category 1]]></category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category><![CDATA[Category 3]]></category>
|
||||
<category><![CDATA[Category 4]]></category>
|
||||
<category><![CDATA[Category 5]]></category>
|
||||
<category><![CDATA[Category 6]]></category>
|
||||
<guid isPermaLink="false">guid</guid>
|
||||
|
||||
<description><![CDATA[description]]></description>
|
||||
<content:encoded><![CDATA[content:encoded]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -9,6 +9,7 @@
|
|||
<link>https://www.bbc.com/news/world-africa-53887947</link>
|
||||
<pubDate>Tue, 25 Aug 2020 17:15:49 +0000</pubDate>
|
||||
<comments>https://news.ycombinator.com/item?id=24273602</comments>
|
||||
<author>Author 1</author>
|
||||
<description><![CDATA[<a href="https://news.ycombinator.com/item?id=24273602">Comments</a>]]></description>
|
||||
</item>
|
||||
<item>
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package com.readrops.api.localfeed.rss
|
||||
|
||||
import android.content.Context
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4
|
||||
import androidx.test.platform.app.InstrumentationRegistry
|
||||
import com.readrops.api.utils.DateUtils
|
||||
import com.readrops.api.utils.ParseException
|
||||
import junit.framework.TestCase.assertEquals
|
||||
import junit.framework.TestCase.assertNotNull
|
||||
import org.junit.Assert
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
|
||||
@RunWith(AndroidJUnit4::class)
|
||||
class RSSItemsAdapterTest {
|
||||
|
||||
private val context: Context = InstrumentationRegistry.getInstrumentation().context
|
||||
|
||||
private val adapter = RSSItemsAdapter()
|
||||
|
||||
@Test
|
||||
fun normalCasesTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss_feed.xml")
|
||||
|
||||
val items = adapter.fromXml(stream)
|
||||
assertEquals(items.size, 7)
|
||||
|
||||
val item = items[0]
|
||||
|
||||
assertEquals(item.title, "Africa declared free of wild polio")
|
||||
assertEquals(item.link, "https://www.bbc.com/news/world-africa-53887947")
|
||||
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("Tue, 25 Aug 2020 17:15:49 +0000"))
|
||||
assertEquals(item.author, "Author 1")
|
||||
assertNotNull(item.description)
|
||||
assertEquals(item.guid, "https://www.bbc.com/news/world-africa-53887947")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun otherNamespacesTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_other_namespaces.xml")
|
||||
val item = adapter.fromXml(stream)[0]
|
||||
|
||||
assertEquals(item.guid, "guid")
|
||||
assertEquals(item.author, "creator")
|
||||
assertEquals(item.pubDate, DateUtils.stringToLocalDateTime("2020-08-05T14:03:48Z"))
|
||||
assertEquals(item.content, "content:encoded")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun noTitleTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_title.xml")
|
||||
Assert.assertThrows("Item title can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun noLinkTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_link.xml")
|
||||
Assert.assertThrows("Item link can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun noDateTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_no_date.xml")
|
||||
Assert.assertThrows("Item date can't be null", ParseException::class.java) { adapter.fromXml(stream) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun enclosureTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_enclosure.xml")
|
||||
val item = adapter.fromXml(stream)[0]
|
||||
|
||||
assertEquals(item.imageLink, "https://image1.jpg")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun mediaContentTest() {
|
||||
val stream = context.resources.assets.open("localfeed/rss/rss_items_media_content.xml")
|
||||
val item = adapter.fromXml(stream)[0]
|
||||
|
||||
assertEquals(item.imageLink, "https://image2.jpg")
|
||||
}
|
||||
}
|
|
@ -97,6 +97,13 @@ class LocalRSSDataSource(private val httpClient: OkHttpClient) {
|
|||
}
|
||||
|
||||
private fun parseItems(inputStream: InputStream, type: LocalRSSHelper.RSSType): List<Item> {
|
||||
return listOf()
|
||||
return if (type != LocalRSSHelper.RSSType.JSONFEED) {
|
||||
val adapter = XmlAdapter.xmlItemsAdapterFactory(type)
|
||||
|
||||
//adapter.fromXml(inputStream)
|
||||
listOf()
|
||||
} else {
|
||||
listOf()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,9 @@
|
|||
package com.readrops.api.localfeed
|
||||
|
||||
import com.readrops.api.localfeed.rss.RSSFeedAdapter
|
||||
import com.readrops.api.localfeed.rss.RSSItemsAdapter
|
||||
import com.readrops.db.entities.Feed
|
||||
import com.readrops.db.entities.Item
|
||||
import java.io.InputStream
|
||||
|
||||
interface XmlAdapter<T> {
|
||||
|
@ -15,6 +17,13 @@ interface XmlAdapter<T> {
|
|||
else -> throw Exception("Unknown RSS type : $type")
|
||||
}
|
||||
}
|
||||
|
||||
fun xmlItemsAdapterFactory(type: LocalRSSHelper.RSSType): XmlAdapter<List<Item>> {
|
||||
return when (type) {
|
||||
LocalRSSHelper.RSSType.RSS_2 -> RSSItemsAdapter()
|
||||
else -> throw Exception("Unknown RSS type : $type")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package com.readrops.api.localfeed.rss
|
||||
|
||||
import com.gitlab.mvysny.konsumexml.*
|
||||
import com.readrops.api.localfeed.XmlAdapter
|
||||
import com.readrops.api.utils.*
|
||||
import com.readrops.db.entities.Item
|
||||
import java.io.InputStream
|
||||
|
||||
class RSSItemsAdapter : XmlAdapter<List<Item>> {
|
||||
|
||||
override fun fromXml(inputStream: InputStream): List<Item> {
|
||||
val konsume = inputStream.konsumeXml()
|
||||
val items = mutableListOf<Item>()
|
||||
|
||||
return try {
|
||||
konsume.child("rss") {
|
||||
child("channel") {
|
||||
allChildrenAutoIgnore("item") {
|
||||
val enclosures = arrayListOf<String>()
|
||||
val mediaContents = arrayListOf<String>()
|
||||
|
||||
val item = Item().apply {
|
||||
allChildrenAutoIgnore(names) {
|
||||
when (tagName) {
|
||||
"title" -> title = nonNullText()
|
||||
"link" -> link = nonNullText()
|
||||
"author" -> author = nullableText()
|
||||
"dc:creator" -> author = nullableText()
|
||||
"pubDate" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
|
||||
"dc:date" -> pubDate = DateUtils.stringToLocalDateTime(nonNullText())
|
||||
"guid" -> guid = nullableText()
|
||||
"description" -> description = nullableText()
|
||||
"content:encoded" -> content = nullableText()
|
||||
"enclosure" -> parseEnclosure(this, enclosures)
|
||||
"media:content" -> parseMediaContent(this, mediaContents)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validateItem(item)
|
||||
if (item.guid == null) item.guid = item.link
|
||||
|
||||
if (enclosures.isNotEmpty()) item.imageLink = enclosures.first()
|
||||
else if (mediaContents.isNotEmpty()) item.imageLink = mediaContents.first()
|
||||
|
||||
items += item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
konsume.close()
|
||||
items
|
||||
} catch (e: KonsumerException) {
|
||||
throw ParseException(e.message)
|
||||
}
|
||||
}
|
||||
|
||||
private fun parseEnclosure(konsume: Konsumer, enclosures: MutableList<String>) {
|
||||
if (konsume.attributes.getValueOpt("type") != null
|
||||
&& LibUtils.isMimeImage(konsume.attributes["type"]))
|
||||
enclosures += konsume.attributes["url"]
|
||||
}
|
||||
|
||||
private fun parseMediaContent(konsume: Konsumer, mediaContents: MutableList<String>) {
|
||||
if (konsume.attributes.getValueOpt("medium") != null
|
||||
&& LibUtils.isMimeImage(konsume.attributes["medium"]))
|
||||
mediaContents += konsume.attributes["url"]
|
||||
}
|
||||
|
||||
private fun validateItem(item: Item) {
|
||||
when {
|
||||
item.title == null -> throw ParseException("Item title can't be null")
|
||||
item.link == null -> throw ParseException("Item link can't be null")
|
||||
item.pubDate == null -> throw ParseException("Item date can't be null")
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
val names = Names.of("title", "link", "author", "creator", "pubDate", "date",
|
||||
"guid", "description", "encoded", "enclosure", "content")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue