diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt index 3190147..4c3f7b3 100644 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt @@ -5,16 +5,19 @@ import app.dapk.st.matrix.common.UserId private const val TAG_OPEN = '<' private const val TAG_CLOSE = '>' private const val NO_RESULT_FOUND = -1 -private const val LI_VALUE_CAPTURE = "value=\"" +private val SKIPPED_TAGS = setOf("mx-reply") internal class HtmlParser { + fun test(startingFrom: Int, input: String): Int { + return input.indexOf(TAG_OPEN, startingFrom) + } + fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag( fromIndex = searchIndex, onInvalidTag = { builder.appendText(input[it].toString()) }, onTag = { tagOpen, tagClose -> - val wholeTag = input.substring(tagOpen, tagClose + 1) - val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) + val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose) when { tagName.startsWith('@') -> { @@ -29,31 +32,40 @@ internal class HtmlParser { tagClose.next() } - else -> { - val exitTag = "" - val exitIndex = input.indexOf(exitTag, startIndex = tagClose) - val exitTagCloseIndex = exitIndex + exitTag.length - if (exitIndex == END_SEARCH) { - builder.appendText(input[searchIndex].toString()) - searchIndex.next() - } else { - when (tagName) { - "mx-reply" -> { - exitTagCloseIndex - } - - else -> { - appendTextBeforeTag(searchIndex, tagOpen, builder, input) - val tagContent = input.substring(tagClose + 1, exitIndex) - handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel) - } - } - } - } + else -> parseTagWithContent(tagName, input, tagClose, builder, searchIndex, tagOpen, wholeTag, nestingLevel) } } ) + private fun parseTagWithContent( + tagName: String, + input: String, + tagClose: Int, + builder: PartBuilder, + searchIndex: Int, + tagOpen: Int, + wholeTag: String, + nestingLevel: Int + ): Int { + val exitTag = "" + val exitIndex = input.indexOf(exitTag, startIndex = tagClose) + val exitTagCloseIndex = exitIndex + exitTag.length + return when { + exitIndex == NO_RESULT_FOUND -> { + builder.appendText(input[searchIndex].toString()) + searchIndex.next() + } + + SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex + + else -> { + appendTextBeforeTag(searchIndex, tagOpen, builder, input) + val tagContent = input.substring(tagClose + 1, exitIndex) + handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel) + } + } + } + private fun handleTagWithContent( input: String, tagName: String, @@ -64,14 +76,23 @@ internal class HtmlParser { nestingLevel: Int, ) = when (tagName) { "a" -> { - val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">") - if (findHrefUrl.startsWith("https://matrix.to/#/@")) { - val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) - builder.appendPerson(userId, "@${tagContent.removePrefix("@")}") - ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex) - } else { - builder.appendLink(findHrefUrl, label = tagContent) - exitTagCloseIndex + val findHrefUrl = wholeTag.findTagAttribute("href") + when { + findHrefUrl == null -> { + builder.appendText(tagContent) + exitTagCloseIndex + } + + findHrefUrl.startsWith("https://matrix.to/#/@") -> { + val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) + builder.appendPerson(userId, "@${tagContent.removePrefix("@")}") + ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex) + } + + else -> { + builder.appendLink(findHrefUrl, label = tagContent) + exitTagCloseIndex + } } } @@ -83,7 +104,7 @@ internal class HtmlParser { "p" -> { if (tagContent.isNotEmpty() && nestingLevel < 2) { var lastIndex = 0 - iterateIndex(start = 0) { searchIndex -> + iterateSearchIndex { searchIndex -> lastIndex = searchIndex parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1) } @@ -147,23 +168,13 @@ internal class HtmlParser { } private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) { - var index = 1 - iterateIndex(start = 0) { nextIndex -> + var listIndex = 1 + iterateSearchIndex { nextIndex -> singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent -> val content = when (parentTag) { "ol" -> { - index = wholeTag.indexOf(LI_VALUE_CAPTURE).let { - if (it == -1) { - index - } else { - val start = it + LI_VALUE_CAPTURE.length - wholeTag.substring(start).substringBefore('\"').toInt() - } - } - - "$index. $tagContent".also { - index++ - } + listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex + "$listIndex. $tagContent".also { listIndex++ } } else -> "- $tagContent" @@ -179,8 +190,7 @@ internal class HtmlParser { fromIndex = searchIndex, onInvalidTag = { builder.appendText(content[it].toString()) }, onTag = { tagOpen, tagClose -> - val wholeTag = content.substring(tagOpen, tagClose + 1) - val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) + val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose) if (tagName == wantedTagName) { val exitTag = "" @@ -201,16 +211,21 @@ internal class HtmlParser { ) } - fun test(startingFrom: Int, intput: String): Int { - return intput.indexOf(TAG_OPEN, startingFrom) + private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair { + val wholeTag = input.substring(tagOpen, tagClose + 1) + val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) + return wholeTag to tagName } +} - private fun iterateIndex(start: SearchIndex, action: (SearchIndex) -> SearchIndex): SearchIndex { - var nextIndex = start - while (nextIndex != END_SEARCH) { - nextIndex = action(nextIndex) +private fun String.findTagAttribute(name: String): String? { + val attribute = "$name=" + return this.indexOf(attribute).let { + if (it == NO_RESULT_FOUND) { + null + } else { + val start = it + attribute.length + this.substring(start).substringAfter('\"').substringBefore('\"') } - return nextIndex } - -} \ No newline at end of file +} diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt index ac537da..cb26b5b 100644 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt @@ -14,9 +14,11 @@ class RichMessageParser { val input = source .removeHtmlEntities() .dropTextFallback() - val builder = PartBuilder() - var nextIndex = 0 - while (nextIndex != END_SEARCH) { + return RichText(collectRichText(input).build()) + } + + private fun collectRichText(input: String) = PartBuilder().also { builder -> + iterateSearchIndex { nextIndex -> val htmlStart = htmlParser.test(nextIndex, input) val urlStart = urlParser.test(nextIndex, input) @@ -37,9 +39,8 @@ class RichMessageParser { if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) { builder.appendText(input.substring(nextIndex)) } - nextIndex = if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult) + if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult) } - return RichText(builder.build()) } private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length @@ -60,3 +61,11 @@ private fun String.removeHtmlEntities() = this.replace(""", "\"").replace(" private fun String.dropTextFallback() = this.lines() .dropWhile { it.startsWith("> ") || it.isEmpty() } .joinToString(separator = "\n") + +internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex { + var nextIndex = 0 + while (nextIndex != END_SEARCH) { + nextIndex = action(nextIndex) + } + return nextIndex +} \ No newline at end of file diff --git a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt index 0b19262..2c432c0 100644 --- a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt +++ b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt @@ -50,7 +50,7 @@ class RichMessageParserTest { @Test fun `replaces matrixdotto with person`() = runParserTest( - input = """Hello a-name: world""", expected = RichText(setOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world"))) )