Add xml content type detection for RSS1

This commit is contained in:
Shinokuni 2020-09-26 13:32:30 +02:00
parent a4d6139848
commit bcedd025ba
2 changed files with 19 additions and 8 deletions

View File

@ -1,7 +1,6 @@
package com.readrops.api.localfeed
import java.io.InputStream
import java.util.regex.Pattern
object LocalRSSHelper {
@ -11,8 +10,8 @@ object LocalRSSHelper {
private const val JSONFEED_CONTENT_TYPE = "application/feed+json"
private const val JSON_CONTENT_TYPE = "application/json"
private const val RSS_1_REGEX = "<rdf:RDF.*xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\""
private const val RSS_2_REGEX = "rss.*version=\"2.0\""
private const val ATOM_REGEX = "<feed.* xmlns=\"http://www.w3.org/2005/Atom\""
/**
@ -34,16 +33,17 @@ object LocalRSSHelper {
fun getRSSContentType(content: InputStream): RSSType {
val stringBuffer = StringBuffer()
val reader = content.bufferedReader()
var type = RSSType.UNKNOWN
// we get the first 10 lines which should be sufficient to get the type,
// otherwise iterating over the whole file could be too slow
for (i in 0..9) stringBuffer.append(reader.readLine())
if (Pattern.compile(RSS_2_REGEX).matcher(stringBuffer.toString()).find()) {
type = RSSType.RSS_2
} else if (Pattern.compile(ATOM_REGEX).matcher(stringBuffer.toString()).find()) {
type = RSSType.ATOM
val string = stringBuffer.toString()
val type = when {
RSS_1_REGEX.toRegex().containsMatchIn(string) -> RSSType.RSS_1
RSS_2_REGEX.toRegex().containsMatchIn(string) -> RSSType.RSS_2
ATOM_REGEX.toRegex().containsMatchIn(string) -> RSSType.ATOM
else -> RSSType.UNKNOWN
}
reader.close()

View File

@ -30,9 +30,20 @@ class LocalRSSHelperTest {
LocalRSSHelper.RSSType.UNKNOWN)
}
@Test
fun rss1ContentTest() {
assertEquals(LocalRSSHelper.getRSSContentType(ByteArrayInputStream(
"""<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss1full.xsl"?>
<?xml-stylesheet type="text/css" media="screen" href="http://rss.slashdot.org/~d/styles/itemcontent.css"?>
<rdf:RDF xmlns:admin="http://webns.net/mvcb/" xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"
""".trimIndent().toByteArray()
)), LocalRSSHelper.RSSType.RSS_1)
}
@Test
fun rssContentTest() {
fun rss2ContentTest() {
assertEquals(LocalRSSHelper.getRSSContentType(ByteArrayInputStream(
"""<rss
xmlns:content="http://purl.org/rss/1.0/modules/content/"