optimise plain text appending to avoid creating extra instances

This commit is contained in:
Adam Brown 2022-10-28 18:05:37 +01:00
parent f694ffe786
commit 89af610f58
2 changed files with 162 additions and 118 deletions

View File

@ -5,6 +5,8 @@ import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.common.UserId
private const val INVALID_TRAILING_CHARS = ",.:;?"
private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>'
class RichMessageParser {
@ -12,136 +14,132 @@ class RichMessageParser {
val input = source
.removeHtmlEntities()
.dropTextFallback()
return kotlin.runCatching {
val buffer = mutableSetOf<RichText.Part>()
var openIndex = 0
var closeIndex = 0
var lastStartIndex = 0
while (openIndex != -1) {
val foundIndex = input.indexOf('<', startIndex = openIndex)
if (foundIndex != -1) {
closeIndex = input.indexOf('>', startIndex = foundIndex)
if (closeIndex == -1) {
val builder = PartBuilder()
var openIndex = 0
var closeIndex = 0
var lastStartIndex = 0
while (openIndex != -1) {
val foundIndex = input.indexOf(TAG_OPEN, startIndex = openIndex)
if (foundIndex != -1) {
closeIndex = input.indexOf(TAG_CLOSE, startIndex = foundIndex)
if (closeIndex == -1) {
openIndex++
} else {
val wholeTag = input.substring(foundIndex, closeIndex + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
if (tagName.startsWith('@')) {
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
builder.appendPerson(UserId(tagName), tagName)
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
if (tagName == "br") {
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
builder.appendText("\n")
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = closeIndex)
if (exitIndex == -1) {
openIndex++
} else {
val wholeTag = input.substring(foundIndex, closeIndex + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
if (tagName.startsWith('@')) {
if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex)))
}
buffer.add(Person(UserId(tagName), tagName))
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
if (tagName == "br") {
if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex)))
}
buffer.add(Normal("\n"))
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = closeIndex)
if (exitIndex == -1) {
openIndex++
} else {
when (tagName) {
"mx-reply" -> {
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
continue
}
}
if (openIndex != foundIndex) {
buffer.add(Normal(input.substring(openIndex, foundIndex)))
}
val tagContent = input.substring(closeIndex + 1, exitIndex)
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
when (tagName) {
"a" -> {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
buffer.add(Person(userId, "@${tagContent.removePrefix("@")}"))
if (input.getOrNull(openIndex) == ':') {
openIndex++
lastStartIndex = openIndex
}
} else {
buffer.add(Link(url = findHrefUrl, label = tagContent))
}
}
"b" -> buffer.add(Bold(tagContent))
"strong" -> buffer.add(Bold(tagContent))
"i" -> buffer.add(Italic(tagContent))
"em" -> buffer.add(Italic(tagContent))
else -> buffer.add(Normal(tagContent))
}
}
}
} else {
// check for urls
val urlIndex = input.indexOf("http", startIndex = openIndex)
if (urlIndex != -1) {
if (lastStartIndex != urlIndex) {
buffer.add(Normal(input.substring(lastStartIndex, urlIndex)))
}
val originalUrl = input.substring(urlIndex)
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
val urlContinuesUntilEnd = urlEndIndex == -1
when {
urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
buffer.add(Link(url = cleanedUrl, label = cleanedUrl))
if (cleanedUrl != originalUrl) {
buffer.add(Normal(originalUrl.last().toString()))
}
break
}
else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
buffer.add(Link(url = cleanedUrl, label = cleanedUrl))
openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
when (tagName) {
"mx-reply" -> {
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
continue
}
}
}
// exit
if (lastStartIndex < input.length) {
buffer.add(Normal(input.substring(lastStartIndex)))
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
val tagContent = input.substring(closeIndex + 1, exitIndex)
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
when (tagName) {
"a" -> {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
if (input.getOrNull(openIndex) == ':') {
openIndex++
lastStartIndex = openIndex
}
} else {
builder.appendLink(findHrefUrl, label = tagContent)
}
}
"b" -> builder.appendBold(tagContent)
"strong" -> builder.appendBold(tagContent)
"i" -> builder.appendItalic(tagContent)
"em" -> builder.appendItalic(tagContent)
else -> builder.appendText(tagContent)
}
}
break
}
}
RichText(buffer)
}.onFailure {
it.printStackTrace()
println(input)
}.getOrThrow()
}
} else {
// check for urls
val urlIndex = input.indexOf("http", startIndex = openIndex)
if (urlIndex != -1) {
if (lastStartIndex != urlIndex) {
builder.appendText(input.substring(lastStartIndex, urlIndex))
}
val originalUrl = input.substring(urlIndex)
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
val urlContinuesUntilEnd = urlEndIndex == -1
when {
urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
if (cleanedUrl != originalUrl) {
builder.appendText(originalUrl.last().toString())
}
break
}
else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
lastStartIndex = openIndex
continue
}
}
}
// exit
if (lastStartIndex < input.length) {
builder.appendText(input.substring(lastStartIndex))
}
break
}
}
return RichText(builder.build())
}
}
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'")
private fun String.dropTextFallback() = this.lines().dropWhile { it.startsWith("> ") || it.isEmpty() }.joinToString("\n")
private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")
private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last()
@ -151,3 +149,49 @@ private fun String.bestGuessStripTrailingUrlChar(): String {
this
}
}
private class PartBuilder {
private var normalBuffer = StringBuilder()
private val parts = mutableSetOf<RichText.Part>()
fun appendText(value: String) {
normalBuffer.append(value.cleanFirstTextLine())
}
fun appendItalic(value: String) {
flushNormalBuffer()
parts.add(Italic(value.cleanFirstTextLine()))
}
fun appendBold(value: String) {
flushNormalBuffer()
parts.add(Bold(value.cleanFirstTextLine()))
}
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
fun appendPerson(userId: UserId, displayName: String) {
flushNormalBuffer()
parts.add(Person(userId, displayName))
}
fun appendLink(url: String, label: String?) {
flushNormalBuffer()
parts.add(Link(url, label ?: url))
}
fun build(): Set<RichText.Part> {
flushNormalBuffer()
return parts
}
private fun flushNormalBuffer() {
if (normalBuffer.isNotEmpty()) {
parts.add(Normal(normalBuffer.toString()))
normalBuffer.clear()
}
}
}

View File

@ -20,7 +20,7 @@ class RichMessageParserTest {
@Test
fun `skips p tags`() = runParserTest(
input = "Hello world! <p>foo bar</p> after paragraph",
expected = RichText(setOf(Normal("Hello world! "), Normal("foo bar"), Normal(" after paragraph")))
expected = RichText(setOf(Normal("Hello world! foo bar after paragraph")))
)
@Test
@ -66,7 +66,7 @@ class RichMessageParserTest {
@Test
fun `replaces br tags`() = runParserTest(
input = "Hello world!<br />next line<br />another line",
expected = RichText(setOf(Normal("Hello world!"), Normal("\n"), Normal("next line"), Normal("\n"), Normal("another line")))
expected = RichText(setOf(Normal("Hello world!\nnext line\nanother line")))
)
@Test