From 89af610f58df766c8074d8d9ba3f3874b1ea59be Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Fri, 28 Oct 2022 18:05:37 +0100 Subject: [PATCH] optimise plain text appending to avoid creating extra instances --- .../sync/internal/sync/RichMessageParser.kt | 276 ++++++++++-------- .../internal/sync/RichMessageParserTest.kt | 4 +- 2 files changed, 162 insertions(+), 118 deletions(-) diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParser.kt index a230e59..692d280 100644 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParser.kt +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParser.kt @@ -5,6 +5,8 @@ import app.dapk.st.matrix.common.RichText.Part.* import app.dapk.st.matrix.common.UserId private const val INVALID_TRAILING_CHARS = ",.:;?" +private const val TAG_OPEN = '<' +private const val TAG_CLOSE = '>' class RichMessageParser { @@ -12,136 +14,132 @@ class RichMessageParser { val input = source .removeHtmlEntities() .dropTextFallback() - return kotlin.runCatching { - val buffer = mutableSetOf() - var openIndex = 0 - var closeIndex = 0 - var lastStartIndex = 0 - while (openIndex != -1) { - val foundIndex = input.indexOf('<', startIndex = openIndex) - if (foundIndex != -1) { - closeIndex = input.indexOf('>', startIndex = foundIndex) - if (closeIndex == -1) { + val builder = PartBuilder() + var openIndex = 0 + var closeIndex = 0 + var lastStartIndex = 0 + while (openIndex != -1) { + val foundIndex = input.indexOf(TAG_OPEN, startIndex = openIndex) + if (foundIndex != -1) { + closeIndex = input.indexOf(TAG_CLOSE, startIndex = foundIndex) + if (closeIndex == -1) { + openIndex++ + } else { + val wholeTag = input.substring(foundIndex, closeIndex + 1) + val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) + + if (tagName.startsWith('@')) { + if (openIndex != foundIndex) { + builder.appendText(input.substring(openIndex, foundIndex)) + } + builder.appendPerson(UserId(tagName), tagName) + openIndex = foundIndex + wholeTag.length + lastStartIndex = openIndex + continue + } + + if (tagName == "br") { + if (openIndex != foundIndex) { + builder.appendText(input.substring(openIndex, foundIndex)) + } + builder.appendText("\n") + openIndex = foundIndex + wholeTag.length + lastStartIndex = openIndex + continue + } + + val exitTag = "" + val exitIndex = input.indexOf(exitTag, startIndex = closeIndex) + if (exitIndex == -1) { openIndex++ } else { - val wholeTag = input.substring(foundIndex, closeIndex + 1) - val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) - - if (tagName.startsWith('@')) { - if (openIndex != foundIndex) { - buffer.add(Normal(input.substring(openIndex, foundIndex))) - } - buffer.add(Person(UserId(tagName), tagName)) - openIndex = foundIndex + wholeTag.length - lastStartIndex = openIndex - continue - } - - if (tagName == "br") { - if (openIndex != foundIndex) { - buffer.add(Normal(input.substring(openIndex, foundIndex))) - } - buffer.add(Normal("\n")) - openIndex = foundIndex + wholeTag.length - lastStartIndex = openIndex - continue - } - - val exitTag = "" - val exitIndex = input.indexOf(exitTag, startIndex = closeIndex) - if (exitIndex == -1) { - openIndex++ - } else { - when (tagName) { - "mx-reply" -> { - openIndex = exitIndex + exitTag.length - lastStartIndex = openIndex - continue - } - } - - if (openIndex != foundIndex) { - buffer.add(Normal(input.substring(openIndex, foundIndex))) - } - val tagContent = input.substring(closeIndex + 1, exitIndex) - openIndex = exitIndex + exitTag.length - lastStartIndex = openIndex - - when (tagName) { - "a" -> { - val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">") - if (findHrefUrl.startsWith("https://matrix.to/#/@")) { - val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) - buffer.add(Person(userId, "@${tagContent.removePrefix("@")}")) - if (input.getOrNull(openIndex) == ':') { - openIndex++ - lastStartIndex = openIndex - } - } else { - buffer.add(Link(url = findHrefUrl, label = tagContent)) - } - } - - "b" -> buffer.add(Bold(tagContent)) - "strong" -> buffer.add(Bold(tagContent)) - "i" -> buffer.add(Italic(tagContent)) - "em" -> buffer.add(Italic(tagContent)) - - else -> buffer.add(Normal(tagContent)) - } - } - } - } else { - // check for urls - val urlIndex = input.indexOf("http", startIndex = openIndex) - if (urlIndex != -1) { - if (lastStartIndex != urlIndex) { - buffer.add(Normal(input.substring(lastStartIndex, urlIndex))) - } - - val originalUrl = input.substring(urlIndex) - val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' } - val urlContinuesUntilEnd = urlEndIndex == -1 - when { - urlContinuesUntilEnd -> { - val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() - buffer.add(Link(url = cleanedUrl, label = cleanedUrl)) - if (cleanedUrl != originalUrl) { - buffer.add(Normal(originalUrl.last().toString())) - } - break - } - - else -> { - val originalUrl = input.substring(urlIndex, urlEndIndex) - val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() - buffer.add(Link(url = cleanedUrl, label = cleanedUrl)) - openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1 + when (tagName) { + "mx-reply" -> { + openIndex = exitIndex + exitTag.length lastStartIndex = openIndex continue } } - } - // exit - if (lastStartIndex < input.length) { - buffer.add(Normal(input.substring(lastStartIndex))) + if (openIndex != foundIndex) { + builder.appendText(input.substring(openIndex, foundIndex)) + } + val tagContent = input.substring(closeIndex + 1, exitIndex) + openIndex = exitIndex + exitTag.length + lastStartIndex = openIndex + + when (tagName) { + "a" -> { + val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">") + if (findHrefUrl.startsWith("https://matrix.to/#/@")) { + val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) + builder.appendPerson(userId, "@${tagContent.removePrefix("@")}") + if (input.getOrNull(openIndex) == ':') { + openIndex++ + lastStartIndex = openIndex + } + } else { + builder.appendLink(findHrefUrl, label = tagContent) + } + } + + "b" -> builder.appendBold(tagContent) + "strong" -> builder.appendBold(tagContent) + "i" -> builder.appendItalic(tagContent) + "em" -> builder.appendItalic(tagContent) + + else -> builder.appendText(tagContent) + } } - break } - } - RichText(buffer) - }.onFailure { - it.printStackTrace() - println(input) - }.getOrThrow() - } + } else { + // check for urls + val urlIndex = input.indexOf("http", startIndex = openIndex) + if (urlIndex != -1) { + if (lastStartIndex != urlIndex) { + builder.appendText(input.substring(lastStartIndex, urlIndex)) + } + val originalUrl = input.substring(urlIndex) + val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' } + val urlContinuesUntilEnd = urlEndIndex == -1 + when { + urlContinuesUntilEnd -> { + val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() + builder.appendLink(url = cleanedUrl, label = null) + if (cleanedUrl != originalUrl) { + builder.appendText(originalUrl.last().toString()) + } + break + } + + else -> { + val originalUrl = input.substring(urlIndex, urlEndIndex) + val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() + builder.appendLink(url = cleanedUrl, label = null) + openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1 + lastStartIndex = openIndex + continue + } + } + } + + // exit + if (lastStartIndex < input.length) { + builder.appendText(input.substring(lastStartIndex)) + } + break + } + } + return RichText(builder.build()) + } } private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'") -private fun String.dropTextFallback() = this.lines().dropWhile { it.startsWith("> ") || it.isEmpty() }.joinToString("\n") +private fun String.dropTextFallback() = this.lines() + .dropWhile { it.startsWith("> ") || it.isEmpty() } + .joinToString(separator = "\n") private fun String.bestGuessStripTrailingUrlChar(): String { val last = this.last() @@ -150,4 +148,50 @@ private fun String.bestGuessStripTrailingUrlChar(): String { } else { this } +} + +private class PartBuilder { + + private var normalBuffer = StringBuilder() + + private val parts = mutableSetOf() + + fun appendText(value: String) { + normalBuffer.append(value.cleanFirstTextLine()) + } + + fun appendItalic(value: String) { + flushNormalBuffer() + parts.add(Italic(value.cleanFirstTextLine())) + } + + fun appendBold(value: String) { + flushNormalBuffer() + parts.add(Bold(value.cleanFirstTextLine())) + } + + private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this + + fun appendPerson(userId: UserId, displayName: String) { + flushNormalBuffer() + parts.add(Person(userId, displayName)) + } + + fun appendLink(url: String, label: String?) { + flushNormalBuffer() + parts.add(Link(url, label ?: url)) + } + + fun build(): Set { + flushNormalBuffer() + return parts + } + + private fun flushNormalBuffer() { + if (normalBuffer.isNotEmpty()) { + parts.add(Normal(normalBuffer.toString())) + normalBuffer.clear() + } + } + } \ No newline at end of file diff --git a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt index 22f122d..ac80b1f 100644 --- a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt +++ b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt @@ -20,7 +20,7 @@ class RichMessageParserTest { @Test fun `skips p tags`() = runParserTest( input = "Hello world!

foo bar

after paragraph", - expected = RichText(setOf(Normal("Hello world! "), Normal("foo bar"), Normal(" after paragraph"))) + expected = RichText(setOf(Normal("Hello world! foo bar after paragraph"))) ) @Test @@ -66,7 +66,7 @@ class RichMessageParserTest { @Test fun `replaces br tags`() = runParserTest( input = "Hello world!
next line
another line", - expected = RichText(setOf(Normal("Hello world!"), Normal("\n"), Normal("next line"), Normal("\n"), Normal("another line"))) + expected = RichText(setOf(Normal("Hello world!\nnext line\nanother line"))) ) @Test