Add xml content type detection for RSS1

This commit is contained in:
Shinokuni 2020-09-26 13:32:30 +02:00
parent a4d6139848
commit bcedd025ba
2 changed files with 19 additions and 8 deletions

View File

@ -1,7 +1,6 @@
package com.readrops.api.localfeed package com.readrops.api.localfeed
import java.io.InputStream import java.io.InputStream
import java.util.regex.Pattern
object LocalRSSHelper { object LocalRSSHelper {
@ -11,8 +10,8 @@ object LocalRSSHelper {
private const val JSONFEED_CONTENT_TYPE = "application/feed+json" private const val JSONFEED_CONTENT_TYPE = "application/feed+json"
private const val JSON_CONTENT_TYPE = "application/json" private const val JSON_CONTENT_TYPE = "application/json"
private const val RSS_1_REGEX = "<rdf:RDF.*xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\""
private const val RSS_2_REGEX = "rss.*version=\"2.0\"" private const val RSS_2_REGEX = "rss.*version=\"2.0\""
private const val ATOM_REGEX = "<feed.* xmlns=\"http://www.w3.org/2005/Atom\"" private const val ATOM_REGEX = "<feed.* xmlns=\"http://www.w3.org/2005/Atom\""
/** /**
@ -34,16 +33,17 @@ object LocalRSSHelper {
fun getRSSContentType(content: InputStream): RSSType { fun getRSSContentType(content: InputStream): RSSType {
val stringBuffer = StringBuffer() val stringBuffer = StringBuffer()
val reader = content.bufferedReader() val reader = content.bufferedReader()
var type = RSSType.UNKNOWN
// we get the first 10 lines which should be sufficient to get the type, // we get the first 10 lines which should be sufficient to get the type,
// otherwise iterating over the whole file could be too slow // otherwise iterating over the whole file could be too slow
for (i in 0..9) stringBuffer.append(reader.readLine()) for (i in 0..9) stringBuffer.append(reader.readLine())
if (Pattern.compile(RSS_2_REGEX).matcher(stringBuffer.toString()).find()) { val string = stringBuffer.toString()
type = RSSType.RSS_2 val type = when {
} else if (Pattern.compile(ATOM_REGEX).matcher(stringBuffer.toString()).find()) { RSS_1_REGEX.toRegex().containsMatchIn(string) -> RSSType.RSS_1
type = RSSType.ATOM RSS_2_REGEX.toRegex().containsMatchIn(string) -> RSSType.RSS_2
ATOM_REGEX.toRegex().containsMatchIn(string) -> RSSType.ATOM
else -> RSSType.UNKNOWN
} }
reader.close() reader.close()

View File

@ -30,9 +30,20 @@ class LocalRSSHelperTest {
LocalRSSHelper.RSSType.UNKNOWN) LocalRSSHelper.RSSType.UNKNOWN)
} }
@Test
fun rss1ContentTest() {
assertEquals(LocalRSSHelper.getRSSContentType(ByteArrayInputStream(
"""<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss1full.xsl"?>
<?xml-stylesheet type="text/css" media="screen" href="http://rss.slashdot.org/~d/styles/itemcontent.css"?>
<rdf:RDF xmlns:admin="http://webns.net/mvcb/" xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"
""".trimIndent().toByteArray()
)), LocalRSSHelper.RSSType.RSS_1)
}
@Test @Test
fun rssContentTest() { fun rss2ContentTest() {
assertEquals(LocalRSSHelper.getRSSContentType(ByteArrayInputStream( assertEquals(LocalRSSHelper.getRSSContentType(ByteArrayInputStream(
"""<rss """<rss
xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:content="http://purl.org/rss/1.0/modules/content/"