add best guess end to url parsing

This commit is contained in:
Adam Brown 2022-10-28 17:35:30 +01:00
parent 45962157f0
commit f694ffe786
1 changed files with 24 additions and 17 deletions

View File

@ -4,6 +4,8 @@ import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.common.RichText.Part.* import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.common.UserId import app.dapk.st.matrix.common.UserId
private const val INVALID_TRAILING_CHARS = ",.:;?"
class RichMessageParser { class RichMessageParser {
fun parse(source: String): RichText { fun parse(source: String): RichText {
@ -30,7 +32,6 @@ class RichMessageParser {
buffer.add(Normal(input.substring(openIndex, foundIndex))) buffer.add(Normal(input.substring(openIndex, foundIndex)))
} }
buffer.add(Person(UserId(tagName), tagName)) buffer.add(Person(UserId(tagName), tagName))
println(tagName)
openIndex = foundIndex + wholeTag.length openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex lastStartIndex = openIndex
continue continue
@ -48,9 +49,6 @@ class RichMessageParser {
val exitTag = "</$tagName>" val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = closeIndex) val exitIndex = input.indexOf(exitTag, startIndex = closeIndex)
println("$exitTag : $exitIndex")
if (exitIndex == -1) { if (exitIndex == -1) {
openIndex++ openIndex++
} else { } else {
@ -101,24 +99,24 @@ class RichMessageParser {
buffer.add(Normal(input.substring(lastStartIndex, urlIndex))) buffer.add(Normal(input.substring(lastStartIndex, urlIndex)))
} }
val substring1 = input.substring(urlIndex) val originalUrl = input.substring(urlIndex)
val urlEndIndex = substring1.indexOfFirst { it == '\n' || it == ' ' } val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
val urlContinuesUntilEnd = urlEndIndex == -1
when { when {
urlEndIndex == -1 -> { urlContinuesUntilEnd -> {
val last = substring1.last() val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
val url = substring1.removeSuffix(".").removeSuffix(",") buffer.add(Link(url = cleanedUrl, label = cleanedUrl))
buffer.add(Link(url = url, label = url)) if (cleanedUrl != originalUrl) {
if (last == '.' || last == ',') { buffer.add(Normal(originalUrl.last().toString()))
buffer.add(Normal(last.toString()))
} }
break break
} }
else -> { else -> {
val substring = input.substring(urlIndex, urlEndIndex) val originalUrl = input.substring(urlIndex, urlEndIndex)
val url = substring.removeSuffix(".").removeSuffix(",") val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
buffer.add(Link(url = url, label = url)) buffer.add(Link(url = cleanedUrl, label = cleanedUrl))
openIndex = if (substring.endsWith('.') || substring.endsWith(',')) urlEndIndex - 1 else urlEndIndex openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
lastStartIndex = openIndex lastStartIndex = openIndex
continue continue
} }
@ -143,4 +141,13 @@ class RichMessageParser {
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'") private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'")
private fun String.dropTextFallback() = this.lines().dropWhile { it.startsWith("> ") || it.isEmpty() }.joinToString("") private fun String.dropTextFallback() = this.lines().dropWhile { it.startsWith("> ") || it.isEmpty() }.joinToString("\n")
private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last()
return if (INVALID_TRAILING_CHARS.contains(last)) {
this.dropLast(1)
} else {
this
}
}