optimise plain text appending to avoid creating extra instances

This commit is contained in:
Adam Brown 2022-10-28 18:05:37 +01:00
parent f694ffe786
commit 89af610f58
2 changed files with 162 additions and 118 deletions

View File

@ -5,6 +5,8 @@ import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.common.UserId import app.dapk.st.matrix.common.UserId
private const val INVALID_TRAILING_CHARS = ",.:;?" private const val INVALID_TRAILING_CHARS = ",.:;?"
private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>'
class RichMessageParser { class RichMessageParser {
@ -12,15 +14,14 @@ class RichMessageParser {
val input = source val input = source
.removeHtmlEntities() .removeHtmlEntities()
.dropTextFallback() .dropTextFallback()
return kotlin.runCatching { val builder = PartBuilder()
val buffer = mutableSetOf<RichText.Part>()
var openIndex = 0 var openIndex = 0
var closeIndex = 0 var closeIndex = 0
var lastStartIndex = 0 var lastStartIndex = 0
while (openIndex != -1) { while (openIndex != -1) {
val foundIndex = input.indexOf('<', startIndex = openIndex) val foundIndex = input.indexOf(TAG_OPEN, startIndex = openIndex)
if (foundIndex != -1) { if (foundIndex != -1) {
closeIndex = input.indexOf('>', startIndex = foundIndex) closeIndex = input.indexOf(TAG_CLOSE, startIndex = foundIndex)
if (closeIndex == -1) { if (closeIndex == -1) {
openIndex++ openIndex++
} else { } else {
@ -29,9 +30,9 @@ class RichMessageParser {
if (tagName.startsWith('@')) { if (tagName.startsWith('@')) {
if (openIndex != foundIndex) { if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex))) builder.appendText(input.substring(openIndex, foundIndex))
} }
buffer.add(Person(UserId(tagName), tagName)) builder.appendPerson(UserId(tagName), tagName)
openIndex = foundIndex + wholeTag.length openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex lastStartIndex = openIndex
continue continue
@ -39,9 +40,9 @@ class RichMessageParser {
if (tagName == "br") { if (tagName == "br") {
if (openIndex != foundIndex) { if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex))) builder.appendText(input.substring(openIndex, foundIndex))
} }
buffer.add(Normal("\n")) builder.appendText("\n")
openIndex = foundIndex + wholeTag.length openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex lastStartIndex = openIndex
continue continue
@ -61,7 +62,7 @@ class RichMessageParser {
} }
if (openIndex != foundIndex) { if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex))) builder.appendText(input.substring(openIndex, foundIndex))
} }
val tagContent = input.substring(closeIndex + 1, exitIndex) val tagContent = input.substring(closeIndex + 1, exitIndex)
openIndex = exitIndex + exitTag.length openIndex = exitIndex + exitTag.length
@ -72,22 +73,22 @@ class RichMessageParser {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">") val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) { if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
buffer.add(Person(userId, "@${tagContent.removePrefix("@")}")) builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
if (input.getOrNull(openIndex) == ':') { if (input.getOrNull(openIndex) == ':') {
openIndex++ openIndex++
lastStartIndex = openIndex lastStartIndex = openIndex
} }
} else { } else {
buffer.add(Link(url = findHrefUrl, label = tagContent)) builder.appendLink(findHrefUrl, label = tagContent)
} }
} }
"b" -> buffer.add(Bold(tagContent)) "b" -> builder.appendBold(tagContent)
"strong" -> buffer.add(Bold(tagContent)) "strong" -> builder.appendBold(tagContent)
"i" -> buffer.add(Italic(tagContent)) "i" -> builder.appendItalic(tagContent)
"em" -> buffer.add(Italic(tagContent)) "em" -> builder.appendItalic(tagContent)
else -> buffer.add(Normal(tagContent)) else -> builder.appendText(tagContent)
} }
} }
} }
@ -96,7 +97,7 @@ class RichMessageParser {
val urlIndex = input.indexOf("http", startIndex = openIndex) val urlIndex = input.indexOf("http", startIndex = openIndex)
if (urlIndex != -1) { if (urlIndex != -1) {
if (lastStartIndex != urlIndex) { if (lastStartIndex != urlIndex) {
buffer.add(Normal(input.substring(lastStartIndex, urlIndex))) builder.appendText(input.substring(lastStartIndex, urlIndex))
} }
val originalUrl = input.substring(urlIndex) val originalUrl = input.substring(urlIndex)
@ -105,9 +106,9 @@ class RichMessageParser {
when { when {
urlContinuesUntilEnd -> { urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
buffer.add(Link(url = cleanedUrl, label = cleanedUrl)) builder.appendLink(url = cleanedUrl, label = null)
if (cleanedUrl != originalUrl) { if (cleanedUrl != originalUrl) {
buffer.add(Normal(originalUrl.last().toString())) builder.appendText(originalUrl.last().toString())
} }
break break
} }
@ -115,7 +116,7 @@ class RichMessageParser {
else -> { else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex) val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
buffer.add(Link(url = cleanedUrl, label = cleanedUrl)) builder.appendLink(url = cleanedUrl, label = null)
openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1 openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
lastStartIndex = openIndex lastStartIndex = openIndex
continue continue
@ -125,23 +126,20 @@ class RichMessageParser {
// exit // exit
if (lastStartIndex < input.length) { if (lastStartIndex < input.length) {
buffer.add(Normal(input.substring(lastStartIndex))) builder.appendText(input.substring(lastStartIndex))
} }
break break
} }
} }
RichText(buffer) return RichText(builder.build())
}.onFailure {
it.printStackTrace()
println(input)
}.getOrThrow()
} }
} }
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'") private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'")
private fun String.dropTextFallback() = this.lines().dropWhile { it.startsWith("> ") || it.isEmpty() }.joinToString("\n") private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")
private fun String.bestGuessStripTrailingUrlChar(): String { private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last() val last = this.last()
@ -151,3 +149,49 @@ private fun String.bestGuessStripTrailingUrlChar(): String {
this this
} }
} }
private class PartBuilder {
private var normalBuffer = StringBuilder()
private val parts = mutableSetOf<RichText.Part>()
fun appendText(value: String) {
normalBuffer.append(value.cleanFirstTextLine())
}
fun appendItalic(value: String) {
flushNormalBuffer()
parts.add(Italic(value.cleanFirstTextLine()))
}
fun appendBold(value: String) {
flushNormalBuffer()
parts.add(Bold(value.cleanFirstTextLine()))
}
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
fun appendPerson(userId: UserId, displayName: String) {
flushNormalBuffer()
parts.add(Person(userId, displayName))
}
fun appendLink(url: String, label: String?) {
flushNormalBuffer()
parts.add(Link(url, label ?: url))
}
fun build(): Set<RichText.Part> {
flushNormalBuffer()
return parts
}
private fun flushNormalBuffer() {
if (normalBuffer.isNotEmpty()) {
parts.add(Normal(normalBuffer.toString()))
normalBuffer.clear()
}
}
}

View File

@ -20,7 +20,7 @@ class RichMessageParserTest {
@Test @Test
fun `skips p tags`() = runParserTest( fun `skips p tags`() = runParserTest(
input = "Hello world! <p>foo bar</p> after paragraph", input = "Hello world! <p>foo bar</p> after paragraph",
expected = RichText(setOf(Normal("Hello world! "), Normal("foo bar"), Normal(" after paragraph"))) expected = RichText(setOf(Normal("Hello world! foo bar after paragraph")))
) )
@Test @Test
@ -66,7 +66,7 @@ class RichMessageParserTest {
@Test @Test
fun `replaces br tags`() = runParserTest( fun `replaces br tags`() = runParserTest(
input = "Hello world!<br />next line<br />another line", input = "Hello world!<br />next line<br />another line",
expected = RichText(setOf(Normal("Hello world!"), Normal("\n"), Normal("next line"), Normal("\n"), Normal("another line"))) expected = RichText(setOf(Normal("Hello world!\nnext line\nanother line")))
) )
@Test @Test