From 6459957168f68915fc807531c2d70784fac08197 Mon Sep 17 00:00:00 2001 From: Shinokuni Date: Mon, 18 Nov 2024 17:57:07 +0100 Subject: [PATCH] Improve icons quality for local account feeds --- .../readrops/api/localfeed/LocalRSSHelper.kt | 4 +- .../java/com/readrops/api/utils/HtmlParser.kt | 124 +-- .../com/readrops/api/utils/HtmlParserTest.kt | 53 +- api/src/test/resources/utils/file.html | 731 ++++-------------- .../app/repositories/LocalRSSRepository.kt | 2 +- 5 files changed, 229 insertions(+), 685 deletions(-) diff --git a/api/src/main/java/com/readrops/api/localfeed/LocalRSSHelper.kt b/api/src/main/java/com/readrops/api/localfeed/LocalRSSHelper.kt index 40bb5f6a..86aa3571 100644 --- a/api/src/main/java/com/readrops/api/localfeed/LocalRSSHelper.kt +++ b/api/src/main/java/com/readrops/api/localfeed/LocalRSSHelper.kt @@ -3,7 +3,6 @@ package com.readrops.api.localfeed import com.gitlab.mvysny.konsumexml.Konsumer import com.gitlab.mvysny.konsumexml.Names import com.readrops.api.utils.extensions.checkRoot -import java.io.InputStream object LocalRSSHelper { @@ -26,12 +25,11 @@ object LocalRSSHelper { RSS_1_CONTENT_TYPE -> RSSType.RSS_1 RSS_2_CONTENT_TYPE -> RSSType.RSS_2 ATOM_CONTENT_TYPE -> RSSType.ATOM - JSON_CONTENT_TYPE, JSONFEED_CONTENT_TYPE -> RSSType.JSONFEED + JSONFEED_CONTENT_TYPE -> RSSType.JSONFEED else -> RSSType.UNKNOWN } } - @JvmStatic fun isRSSType(type: String?): Boolean = if (type != null) getRSSType(type) != RSSType.UNKNOWN else false diff --git a/api/src/main/java/com/readrops/api/utils/HtmlParser.kt b/api/src/main/java/com/readrops/api/utils/HtmlParser.kt index 02005881..b7f25b17 100644 --- a/api/src/main/java/com/readrops/api/utils/HtmlParser.kt +++ b/api/src/main/java/com/readrops/api/utils/HtmlParser.kt @@ -2,10 +2,13 @@ package com.readrops.api.utils import android.nfc.FormatException import com.readrops.api.localfeed.LocalRSSHelper +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext import okhttp3.OkHttpClient import okhttp3.Request import org.jsoup.Jsoup import org.jsoup.nodes.Document +import org.jsoup.nodes.Element data class ParsingResult( val url: String, @@ -14,75 +17,96 @@ data class ParsingResult( object HtmlParser { + @Throws(FormatException::class) suspend fun getFaviconLink(url: String, client: OkHttpClient): String? { val document = getHTMLHeadFromUrl(url, client) - val elements = document.select("link") - for (element in elements) { - if (element.attributes()["rel"].lowercase().contains("icon")) { - return element.absUrl("href") - } - } + val links = document.select("link") + .filter { element -> element.attributes()["rel"].contains("icon") } + .sortedWith(compareByDescending { + it.attributes()["rel"] == "apple-touch-icon" + }.thenByDescending { element -> + val sizes = element.attr("sizes") - return null + if (sizes.isNotEmpty()) { + try { + sizes.filter { it.isDigit() } + .toInt() + } catch (e: Exception) { + 0 + } + } else { + 0 + } + }) + + return links.firstOrNull() + ?.absUrl("href") } + @Throws(FormatException::class) suspend fun getFeedLink(url: String, client: OkHttpClient): List { - val results = mutableListOf() - val document = getHTMLHeadFromUrl(url, client) - val elements = document.select("link") - for (element in elements) { - val type = element.attributes()["type"] - - if (LocalRSSHelper.isRSSType(type)) { - results += ParsingResult( - url = element.absUrl("href"), - label = element.attributes()["title"] + return document.select("link") + .filter { element -> + val type = element.attributes()["type"] + LocalRSSHelper.isRSSType(type) + }.map { + ParsingResult( + url = it.absUrl("href"), + label = it.attributes()["title"] ) } - } - - return results } - private fun getHTMLHeadFromUrl(url: String, client: OkHttpClient): Document { - client.newCall(Request.Builder().url(url).build()).execute().use { response -> - if (response.header(ApiUtils.CONTENT_TYPE_HEADER)!!.contains(ApiUtils.HTML_CONTENT_TYPE) - ) { - val body = response.body!!.source() + private suspend fun getHTMLHeadFromUrl(url: String, client: OkHttpClient): Document = + withContext(Dispatchers.IO) { + client.newCall( + Request.Builder() + .url(url) + .build() + ).execute() + .use { response -> + if (response.header(ApiUtils.CONTENT_TYPE_HEADER)!! + .contains(ApiUtils.HTML_CONTENT_TYPE) + ) { + val body = response.body!!.source() - val stringBuilder = StringBuilder() - var collectionStarted = false + val stringBuilder = StringBuilder() + var collectionStarted = false - while (!body.exhausted()) { - val currentLine = body.readUtf8LineStrict() + while (!body.exhausted()) { + val currentLine = body.readUtf8LineStrict() - when { - currentLine.contains("") -> { - stringBuilder.append(currentLine) - collectionStarted = true + when { + currentLine.contains("") -> { + stringBuilder.append(currentLine) + collectionStarted = true + } + + currentLine.contains("") -> { + stringBuilder.append(currentLine) + break + } + + collectionStarted -> { + stringBuilder.append(currentLine) + } + } } - currentLine.contains("") -> { - stringBuilder.append(currentLine) - break - } - collectionStarted -> { - stringBuilder.append(currentLine) + + if (!stringBuilder.contains("") || !stringBuilder.contains("")) { + body.close() + throw FormatException("Failed to get HTML head from $url") } + + body.close() + Jsoup.parse(stringBuilder.toString(), url) + } else { + response.close() + throw FormatException("Response from $url is not a html file") } } - - if (!stringBuilder.contains("") || !stringBuilder.contains("")) - throw FormatException("Failed to get HTML head") - - body.close() - return Jsoup.parse(stringBuilder.toString(), url) - } else { - throw FormatException("The response is not a html file") - } } - } - } \ No newline at end of file diff --git a/api/src/test/java/com/readrops/api/utils/HtmlParserTest.kt b/api/src/test/java/com/readrops/api/utils/HtmlParserTest.kt index 0bf410c8..f7cd571e 100644 --- a/api/src/test/java/com/readrops/api/utils/HtmlParserTest.kt +++ b/api/src/test/java/com/readrops/api/utils/HtmlParserTest.kt @@ -2,19 +2,21 @@ package com.readrops.api.utils import android.nfc.FormatException import com.readrops.api.TestUtils -import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.test.runTest import okhttp3.OkHttpClient import okhttp3.mockwebserver.MockResponse import okhttp3.mockwebserver.MockWebServer import okio.Buffer +import org.junit.After +import org.junit.Before import org.junit.Rule import org.junit.Test import org.koin.dsl.module import org.koin.test.KoinTest import org.koin.test.KoinTestRule +import org.koin.test.get import java.net.HttpURLConnection import java.util.concurrent.TimeUnit -import kotlin.test.assertEquals import kotlin.test.assertNull import kotlin.test.assertTrue @@ -34,18 +36,18 @@ class HtmlParserTest : KoinTest { }) } - @Test + @Before fun before() { mockServer.start() } - @Test + @After fun after() { mockServer.shutdown() } @Test - fun getFeedLinkTest() { + fun getFeedLinkTest() = runTest { val stream = TestUtils.loadResource("utils/file.html") mockServer.enqueue( @@ -54,19 +56,14 @@ class HtmlParserTest : KoinTest { .setBody(Buffer().readFrom(stream)) ) - runBlocking { - val result = - HtmlParser.getFeedLink(mockServer.url("/rss").toString(), koinTestRule.koin.get()) + val links = HtmlParser.getFeedLink(mockServer.url("/rss").toString(), get()) - assertTrue { result.size == 1 } - assertTrue { result.first().url.endsWith("/rss") } - assertEquals("RSS", result.first().label) - - } + assertTrue { links.size == 2 } + assertTrue { links.all { it.label!!.contains("The Mozilla Blog") } } } @Test(expected = FormatException::class) - fun getFeedLinkWithoutHeadTest() { + fun getFeedLinkWithoutHeadTest() = runTest { val stream = TestUtils.loadResource("utils/file_without_head.html") mockServer.enqueue( @@ -75,21 +72,21 @@ class HtmlParserTest : KoinTest { .setBody(Buffer().readFrom(stream)) ) - runBlocking { HtmlParser.getFeedLink(mockServer.url("/rss").toString(), koinTestRule.koin.get()) } + HtmlParser.getFeedLink(mockServer.url("/rss").toString(), get()) } @Test(expected = FormatException::class) - fun getFeedLinkNoHtmlFileTest() { + fun getFeedLinkNoHtmlFileTest() = runTest { mockServer.enqueue( MockResponse().setResponseCode(HttpURLConnection.HTTP_OK) - .addHeader(ApiUtils.CONTENT_TYPE_HEADER, "application/rss+xml")) + .addHeader(ApiUtils.CONTENT_TYPE_HEADER, "application/rss+xml") + ) - - runBlocking { HtmlParser.getFeedLink(mockServer.url("/rss").toString(), koinTestRule.koin.get()) } + HtmlParser.getFeedLink(mockServer.url("/rss").toString(), get()) } @Test - fun getFaviconLinkTest() { + fun getFaviconLinkTest() = runTest { val stream = TestUtils.loadResource("utils/file.html") mockServer.enqueue( @@ -98,15 +95,12 @@ class HtmlParserTest : KoinTest { .setBody(Buffer().readFrom(stream)) ) - runBlocking { - val result = HtmlParser.getFaviconLink(mockServer.url("/rss").toString(), koinTestRule.koin.get()) - - assertTrue { result!!.contains("favicon.ico") } - } + val link = HtmlParser.getFaviconLink(mockServer.url("/rss").toString(), get()) + assertTrue { link!!.contains("apple-touch-icon") } } @Test - fun getFaviconLinkWithoutHeadTest() { + fun getFaviconLinkWithoutHeadTest() = runTest { val stream = TestUtils.loadResource("utils/file_without_icon.html") mockServer.enqueue( @@ -115,10 +109,7 @@ class HtmlParserTest : KoinTest { .setBody(Buffer().readFrom(stream)) ) - runBlocking { - val result = HtmlParser.getFaviconLink(mockServer.url("/rss").toString(), koinTestRule.koin.get()) - - assertNull(result) - } + val link = HtmlParser.getFaviconLink(mockServer.url("/rss").toString(), get()) + assertNull(link) } } \ No newline at end of file diff --git a/api/src/test/resources/utils/file.html b/api/src/test/resources/utils/file.html index d55ef435..9e061ad9 100644 --- a/api/src/test/resources/utils/file.html +++ b/api/src/test/resources/utils/file.html @@ -1,601 +1,132 @@ - - - - - - - - Hacker News - - -
- - - - - - - - - - - -
- - - - - - -
Hacker News - new | past | comments | ask | show | jobs | submit - - login - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1.A Brief History of Computers (lesswrong.com)
- 31 points by zdw 1 hour ago | hide | 3 comments -
2.Consumer Software Is Expected to Be Next Fast-Growing Segment (1994) (csmonitor.com)
- 9 points by 1970-01-01 1 hour ago | hide | 1 comment -
3.MSX-DOS (wikipedia.org)
- 82 points by pavlov 6 hours ago | hide | 26 comments -
4.New Yorkers Got Broken Promises. Developers Got 20M Sq. Ft (nytimes.com)
- 12 points by asnyder 20 minutes ago | hide | 1 comment -
5.Apple's interactive television box: Hacking the set top box System 7.1 in ROM (oldvcr.blogspot.com)
- 160 points by todsacerdoti 10 hours ago | hide | 20 comments -
6.Putting the “You” in CPU (cpu.land)
- 187 points by uneekname 10 hours ago | hide | 73 comments -
7.Botulinum toxin: Bioweapon and magic drug (nih.gov)
- 12 points by redbell 2 hours ago | hide | 10 comments -
8.Octos – HTML live wallpaper engine (github.com/underpig1)
- 85 points by underpig1 6 hours ago | hide | 23 comments -
9.More than you've ever wanted to know about errors in Rust (shuttle.rs)
- 13 points by asymmetric 2 hours ago | hide | 3 comments -
10.Embrace Complexity; Tighten Your Feedback Loops (ferd.ca)
- 27 points by lutzh 4 hours ago | hide | 1 comment -
11.AWS networking concepts in a diagram (miparnisariblog.wordpress.com)
- 171 points by mparnisari 10 hours ago | hide | 66 comments -
12.Plane – Open-source Jira alternative (plane.so)
- 240 points by prhrb 7 hours ago | hide | 93 comments -
13.Neurotechnology: Current Developments and Ethical Issues (frontiersin.org)
- 28 points by Quinzel 3 hours ago | hide | 15 comments -
14.What we talk about when we talk about System Design (maheshba.bitbucket.io)
- 166 points by scv119 11 hours ago | hide | 22 comments -
15.ElKaWe – Electrocaloric heat pumps (fraunhofer.de)
- 140 points by danans 10 hours ago | hide | 73 comments -
16.Over-grazing and desertification in the Syrian steppe root causes of war (2015) (theecologist.org)
- 64 points by joveian 6 hours ago | hide | 43 comments -
17.Redmine – open-source project management (redmine.org)
- 34 points by synergy20 2 hours ago | hide | 24 comments -
18.Google tries internet air-gap for some staff PCs (theregister.com)
- 67 points by beardyw 9 hours ago | hide | 73 comments -
19.I thought I wanted to be a professor, then I served on a hiring committee (2021) (science.org)
- 104 points by ykonstant 4 hours ago | hide | 72 comments -
20.Internet search tips (gwern.net)
- 161 points by herbertl 12 hours ago | hide | 58 comments -
21.Bayesian methods to provide probablistic solution for the Drake equation (2019) (sciencedirect.com)
- 22 points by benbreen 4 hours ago | hide | 18 comments -
22.Biotumen: Bitumen Reinvented (biofabrik.com)
- 40 points by patall 7 hours ago | hide | 11 comments -
23.Why even let users set their own passwords? (devever.net)
- 103 points by hlandau 2 hours ago | hide | 121 comments -
24.Confronting failure as a core life skill (buildinghealthier.substack.com)
- 168 points by blh75 15 hours ago | hide | 75 comments -
25.Hokusai’s Illustrated Warrior Vanguard of Japan and China (1836) (publicdomainreview.org)
- 19 points by tintinnabula 2 hours ago | hide | discuss -
26.Bun v0.7.0 (bun.sh)
- 163 points by sshroot 9 hours ago | hide | 107 comments -
27.Simpson Fan Grows Tomacco (2003) (simpsonsarchive.com)
- 81 points by pipeline_peak 6 hours ago | hide | 55 comments -
28.Discovery: Metals can heal themselves (sandia.gov)
- 77 points by bobvanluijt 13 hours ago | hide | 24 comments -
29.Pressure and vacuum marination does not work (2016) (genuineideas.com)
- 87 points by OJFord 13 hours ago | hide | 57 comments -
30.Scientists: Fishing boats compete with whales and penguins for Antarctic krill (mongabay.com)
- 5 points by PaulHoule 1 hour ago | hide | discuss -
-
- - - - - -
-
-
- Guidelines | FAQ | Lists | API | Security | Legal | Apply to YC | Contact

-
Search:
-
-
-
- - - + + + + + + + + + + + + + + + Home - The Mozilla Blog + + + + + + + + + + + + + + + + + + + + + + + + + + + The Mozilla Blog + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/app/src/main/java/com/readrops/app/repositories/LocalRSSRepository.kt b/app/src/main/java/com/readrops/app/repositories/LocalRSSRepository.kt index fc3b6ba5..27fa99b6 100644 --- a/app/src/main/java/com/readrops/app/repositories/LocalRSSRepository.kt +++ b/app/src/main/java/com/readrops/app/repositories/LocalRSSRepository.kt @@ -129,7 +129,7 @@ class LocalRSSRepository( feedUrl?.let { color = FeedColors.getFeedColor(it) } } } catch (e: Exception) { - Log.d("LocalRSSRepository", "insertFeed: ${e.message}") + Log.e("LocalRSSRepository", "insertFeed: ${e.message}") } id = database.feedDao().insert(this).toInt()