From c9a29b0b25ba9ab9f0fd426c1c549f50ac795cdd Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Sat, 5 Nov 2022 09:30:02 +0000 Subject: [PATCH] add support for more types of html tags and nesting --- .../AccumulatingRichTextContentParser.kt | 85 ++++++ .../sync/internal/sync/message/HtmlParser.kt | 248 ------------------ .../sync/internal/sync/message/ParserScope.kt | 13 - .../sync/message/RichMessageParser.kt | 71 ----- .../sync/message/RichTextMessageParser.kt | 34 +++ ...{PartBuilder.kt => RichTextPartBuilder.kt} | 34 +-- .../sync/message/html/HtmlProcessor.kt | 27 ++ .../sync/message/html/ListAccumulator.kt | 23 ++ .../message/html/RichTextHtmlTagParser.kt | 95 +++++++ .../internal/sync/message/html/TagCaptor.kt | 78 ++++++ .../sync/message/{ => url}/UrlParser.kt | 30 +-- ...erTest.kt => RichTextMessageParserTest.kt} | 100 ++++--- 12 files changed, 417 insertions(+), 421 deletions(-) create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/AccumulatingRichTextContentParser.kt delete mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt delete mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/ParserScope.kt delete mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextMessageParser.kt rename matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/{PartBuilder.kt => RichTextPartBuilder.kt} (67%) create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/HtmlProcessor.kt create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/ListAccumulator.kt create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/RichTextHtmlTagParser.kt create mode 100644 matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/TagCaptor.kt rename matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/{ => url}/UrlParser.kt (59%) rename matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/{RichMessageParserTest.kt => RichTextMessageParserTest.kt} (91%) diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/AccumulatingRichTextContentParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/AccumulatingRichTextContentParser.kt new file mode 100644 index 0000000..17afd8f --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/AccumulatingRichTextContentParser.kt @@ -0,0 +1,85 @@ +package app.dapk.st.matrix.sync.internal.sync.message + +import app.dapk.st.matrix.sync.internal.sync.message.html.HtmlProcessor +import app.dapk.st.matrix.sync.internal.sync.message.url.UrlParser + +private const val MAX_NESTING_LIMIT = 20 + +class AccumulatingRichTextContentParser : AccumulatingContentParser { + + private val urlParser = UrlParser() + private val tagProcessor = HtmlProcessor() + + override fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator { + if (nestingLevel >= MAX_NESTING_LIMIT) { + accumulator.appendText(input) + } else { + iterate { index -> + process( + input, + index, + processTag = { + prependTextBeforeCapture(input, index, it, accumulator) + tagProcessor.process(input, it, accumulator, nestingLevel, nestedParser = this) + }, + processUrl = { + prependTextBeforeCapture(input, index, it, accumulator) + urlParser.parseUrl(input, it, accumulator) + } + ).also { + if (it == -1) { + appendRemainingText(index, input, accumulator) + } + } + } + } + return accumulator + } + + private inline fun iterate(action: (Int) -> Int) { + var result = 0 + while (result != -1) { + result = action(result) + } + } + + private fun process(input: String, searchIndex: Int, processTag: (Int) -> Int, processUrl: (Int) -> Int): Int { + val tagOpen = input.indexOf('<', startIndex = searchIndex) + val httpOpen = input.indexOf("http", startIndex = searchIndex) + return selectProcessor( + tagOpen, + httpOpen, + processTag = { processTag(tagOpen) }, + processUrl = { processUrl(httpOpen) } + ) + } + + private inline fun selectProcessor(tagOpen: Int, httpOpen: Int, processTag: () -> Int, processUrl: () -> Int) = when { + tagOpen == -1 && httpOpen == -1 -> -1 + tagOpen != -1 && httpOpen == -1 -> processTag() + tagOpen == -1 && httpOpen != -1 -> processUrl() + tagOpen == httpOpen -> { + // favour tags as urls can existing within tags + processTag() + } + + else -> { + when (tagOpen < httpOpen) { + true -> processTag() + false -> processUrl() + } + } + } + + private fun prependTextBeforeCapture(input: String, index: Int, captureIndex: Int, accumulator: ContentAccumulator) { + if (index < captureIndex) { + accumulator.appendText(input.substring(index, captureIndex)) + } + } + + private fun appendRemainingText(index: Int, input: String, accumulator: ContentAccumulator) { + if (index < input.length) { + accumulator.appendText(input.substring(index, input.length)) + } + } +} diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt deleted file mode 100644 index e43ab24..0000000 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/HtmlParser.kt +++ /dev/null @@ -1,248 +0,0 @@ -package app.dapk.st.matrix.sync.internal.sync.message - -import app.dapk.st.matrix.common.UserId - -private const val TAG_OPEN = '<' -private const val TAG_CLOSE = '>' -private const val NO_RESULT_FOUND = -1 -private val SKIPPED_TAGS = setOf("mx-reply") - -internal class HtmlParser { - - fun test(startingFrom: Int, input: String) = input.indexOf(TAG_OPEN, startingFrom) - - fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag( - fromIndex = searchIndex, - onInvalidTag = { builder.appendText(input[it].toString()) }, - onTag = { tagOpen, tagClose -> - val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose) - - when { - tagName.startsWith('@') -> { - appendTextBeforeTag(searchIndex, tagOpen, builder, input) - builder.appendPerson(UserId(tagName), tagName) - tagClose.next() - } - - tagName == "br" -> { - appendTextBeforeTag(searchIndex, tagOpen, builder, input) - builder.appendNewline() - tagClose.next() - } - - else -> parseTagWithContent(input, tagName, tagClose, searchIndex, tagOpen, wholeTag, builder, nestingLevel) - } - } - ) - - private fun parseTagWithContent( - input: String, - tagName: String, - tagClose: Int, - searchIndex: Int, - tagOpen: Int, - wholeTag: String, - builder: PartBuilder, - nestingLevel: Int - ): Int { - val exitTag = "" - val exitIndex = input.indexOf(exitTag, startIndex = tagClose) - val exitTagCloseIndex = exitIndex + exitTag.length - return when { - exitIndex == NO_RESULT_FOUND -> { - builder.appendText(input[searchIndex].toString()) - searchIndex.next() - } - - SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex - - else -> { - appendTextBeforeTag(searchIndex, tagOpen, builder, input) - val tagContent = input.substring(tagClose + 1, exitIndex) - handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel) - } - } - } - - private fun handleTagWithContent( - input: String, - tagName: String, - wholeTag: String, - builder: PartBuilder, - tagContent: String, - exitTagCloseIndex: Int, - nestingLevel: Int, - ) = when (tagName) { - "a" -> { - val findHrefUrl = wholeTag.findTagAttribute("href") - when { - findHrefUrl == null -> { - builder.appendText(tagContent) - exitTagCloseIndex - } - - findHrefUrl.startsWith("https://matrix.to/#/@") -> { - val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) - builder.appendPerson(userId, "@${tagContent.removePrefix("@")}") - ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex) - } - - else -> { - builder.appendLink(findHrefUrl, label = tagContent) - exitTagCloseIndex - } - } - } - - "b", "strong" -> { - builder.appendBold(tagContent) - exitTagCloseIndex - } - - "blockquote" -> { - if (tagContent.isNotEmpty() && nestingLevel < 3) { - var lastIndex = 0 - val trimmedTagContent = tagContent.trim() - builder.appendText("> ") - iterateSearchIndex { searchIndex -> - lastIndex = searchIndex - parseHtmlTags(trimmedTagContent, searchIndex, builder, nestingLevel = nestingLevel + 1) - } - - if (lastIndex < trimmedTagContent.length) { - builder.appendText(trimmedTagContent.substring(lastIndex)) - } - } - - builder.appendNewline() - exitTagCloseIndex - } - - "p" -> { - if (tagContent.isNotEmpty() && nestingLevel < 2) { - var lastIndex = 0 - iterateSearchIndex { searchIndex -> - lastIndex = searchIndex - parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1) - } - - if (lastIndex < tagContent.length) { - builder.appendText(tagContent.substring(lastIndex)) - } - } - - builder.appendNewline() - exitTagCloseIndex - } - - "ul", "ol" -> { - parseList(tagName, tagContent, builder) - exitTagCloseIndex - } - - "h1", "h2", "h3", "h4", "h5" -> { - builder.appendBold(tagContent.trim()) - builder.appendNewline() - exitTagCloseIndex - } - - "i", "em" -> { - builder.appendItalic(tagContent) - exitTagCloseIndex - } - - else -> { - builder.appendText(tagContent) - exitTagCloseIndex - } - } - - private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') { - exitTagCloseIndex.next() - } else { - exitTagCloseIndex - } - - private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) { - if (searchIndex != tagOpen) { - builder.appendText(input.substring(searchIndex, tagOpen)) - } - } - - private fun String.findTag(fromIndex: Int, onInvalidTag: (Int) -> Unit, onTag: (Int, Int) -> Int): Int { - return when (val foundIndex = this.indexOf(TAG_OPEN, startIndex = fromIndex)) { - NO_RESULT_FOUND -> END_SEARCH - - else -> when (val closeIndex = indexOf(TAG_CLOSE, startIndex = foundIndex)) { - NO_RESULT_FOUND -> { - onInvalidTag(fromIndex) - fromIndex + 1 - } - - else -> onTag(foundIndex, closeIndex) - } - } - } - - private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) { - var listIndex = 1 - iterateSearchIndex { nextIndex -> - singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent -> - val content = when (parentTag) { - "ol" -> { - listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex - "$listIndex. $tagContent".also { listIndex++ } - } - - else -> "- $tagContent" - } - builder.appendText(content) - builder.appendNewline() - } - } - } - - private fun singleTagParser(content: String, wantedTagName: String, searchIndex: Int, builder: PartBuilder, onTag: (String, String) -> Unit): SearchIndex { - return content.findTag( - fromIndex = searchIndex, - onInvalidTag = { builder.appendText(content[it].toString()) }, - onTag = { tagOpen, tagClose -> - val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose) - - if (tagName == wantedTagName) { - val exitTag = "" - val exitIndex = content.indexOf(exitTag, startIndex = tagClose) - val exitTagCloseIndex = exitIndex + exitTag.length - if (exitIndex == END_SEARCH) { - builder.appendText(content[searchIndex].toString()) - searchIndex.next() - } else { - val tagContent = content.substring(tagClose + 1, exitIndex) - onTag(wholeTag, tagContent) - exitTagCloseIndex - } - } else { - END_SEARCH - } - } - ) - } - - private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair { - val wholeTag = input.substring(tagOpen, tagClose + 1) - val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' }) - return wholeTag to tagName - } -} - -private fun String.findTagAttribute(name: String): String? { - val attribute = "$name=" - return this.indexOf(attribute).let { - if (it == NO_RESULT_FOUND) { - null - } else { - val start = it + attribute.length - this.substring(start).substringAfter('\"').substringBefore('\"') - } - } -} diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/ParserScope.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/ParserScope.kt deleted file mode 100644 index 8674f47..0000000 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/ParserScope.kt +++ /dev/null @@ -1,13 +0,0 @@ -package app.dapk.st.matrix.sync.internal.sync.message - -internal typealias SearchIndex = Int - -internal fun Int.next() = this + 1 - - -internal interface ParserScope { - fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) - - fun SearchIndex.next(): SearchIndex - -} \ No newline at end of file diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt deleted file mode 100644 index 97960b2..0000000 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichMessageParser.kt +++ /dev/null @@ -1,71 +0,0 @@ -package app.dapk.st.matrix.sync.internal.sync.message - -import app.dapk.st.matrix.common.RichText -import kotlin.math.max - -internal const val END_SEARCH = -1 - -class RichMessageParser { - - private val htmlParser = HtmlParser() - private val urlParser = UrlParser() - - fun parse(source: String): RichText { - val input = source - .removeHtmlEntities() - .dropTextFallback() - return RichText(collectRichText(input).build()) - } - - private fun collectRichText(input: String) = PartBuilder().also { builder -> - iterateSearchIndex { nextIndex -> - val htmlStart = htmlParser.test(nextIndex, input) - val urlStart = urlParser.test(nextIndex, input) - - val firstResult = if (htmlStart < urlStart) { - htmlParser.parseHtmlTags(input, nextIndex, builder) - } else { - urlParser.parseUrl(input, nextIndex, builder) - } - - val secondStartIndex = findUrlStartIndex(firstResult, nextIndex) - val secondResult = if (htmlStart < urlStart) { - urlParser.parseUrl(input, secondStartIndex, builder) - } else { - htmlParser.parseHtmlTags(input, secondStartIndex, builder) - } - - val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input) - if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) { - builder.appendText(input.substring(nextIndex)) - } - if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult) - } - } - - private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length - - private fun findUrlStartIndex(htmlResult: Int, searchIndex: Int) = when { - htmlResult == END_SEARCH && searchIndex == 0 -> 0 - htmlResult == END_SEARCH -> searchIndex - else -> htmlResult - } - - private fun hasReachedEnd(htmlResult: SearchIndex, urlResult: Int, input: String) = - (htmlResult == END_SEARCH && urlResult == END_SEARCH) || (htmlResult >= input.length || urlResult >= input.length) - -} - -private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&") - -private fun String.dropTextFallback() = this.lines() - .dropWhile { it.startsWith("> ") || it.isEmpty() } - .joinToString(separator = "\n") - -internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex { - var nextIndex = 0 - while (nextIndex != END_SEARCH) { - nextIndex = action(nextIndex) - } - return nextIndex -} \ No newline at end of file diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextMessageParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextMessageParser.kt new file mode 100644 index 0000000..45719e3 --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextMessageParser.kt @@ -0,0 +1,34 @@ +package app.dapk.st.matrix.sync.internal.sync.message + +import app.dapk.st.matrix.common.RichText + +fun interface NestedParser { + fun parse(content: String, accumulator: ContentAccumulator) +} + +fun interface TagParser { + fun parse(tagName: String, attributes: Map, content: String, accumulator: ContentAccumulator, parser: NestedParser) +} + +fun interface AccumulatingContentParser { + fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator +} + +class RichMessageParser( + private val accumulatingParser: AccumulatingContentParser = AccumulatingRichTextContentParser() +) { + + fun parse(source: String): RichText { + val input = source + .removeHtmlEntities() + .dropTextFallback() + return RichText(accumulatingParser.parse(input, RichTextPartBuilder(), nestingLevel = 0).build()) + } + +} + +private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&") + +private fun String.dropTextFallback() = this.lines() + .dropWhile { it.startsWith("> ") || it.isEmpty() } + .joinToString(separator = "\n") diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/PartBuilder.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextPartBuilder.kt similarity index 67% rename from matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/PartBuilder.kt rename to matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextPartBuilder.kt index e71a548..c7499e5 100644 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/PartBuilder.kt +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/RichTextPartBuilder.kt @@ -3,41 +3,50 @@ package app.dapk.st.matrix.sync.internal.sync.message import app.dapk.st.matrix.common.RichText import app.dapk.st.matrix.common.UserId -internal class PartBuilder { +interface ContentAccumulator { + fun appendText(value: String) + fun appendItalic(value: String) + fun appendBold(value: String) + fun appendPerson(userId: UserId, displayName: String) + fun appendLink(url: String, label: String?) + fun build(): List +} + +class RichTextPartBuilder : ContentAccumulator { private var normalBuffer = StringBuilder() private val parts = mutableListOf() - fun appendText(value: String) { + override fun appendText(value: String) { normalBuffer.append(value.cleanFirstTextLine()) } - fun appendItalic(value: String) { + override fun appendItalic(value: String) { flushNormalBuffer() parts.add(RichText.Part.Italic(value.cleanFirstTextLine())) } - fun appendBold(value: String) { + override fun appendBold(value: String) { flushNormalBuffer() parts.add(RichText.Part.Bold(value.cleanFirstTextLine())) } private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this - fun appendPerson(userId: UserId, displayName: String) { + override fun appendPerson(userId: UserId, displayName: String) { flushNormalBuffer() parts.add(RichText.Part.Person(userId, displayName)) } - fun appendLink(url: String, label: String?) { + override fun appendLink(url: String, label: String?) { flushNormalBuffer() parts.add(RichText.Part.Link(url, label ?: url)) } - fun build(): List { + override fun build(): List { flushNormalBuffer() - return when(parts.isEmpty()) { + return when (parts.isEmpty()) { true -> parts else -> { val last = parts.last() @@ -59,16 +68,9 @@ internal class PartBuilder { normalBuffer.clear() } } - } -internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: Int, input: String) { - if (previousIndex != tagOpenIndex) { - this.appendText(input.substring(previousIndex, tagOpenIndex)) - } -} - -internal fun PartBuilder.appendNewline() { +internal fun ContentAccumulator.appendNewline() { this.appendText("\n") } diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/HtmlProcessor.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/HtmlProcessor.kt new file mode 100644 index 0000000..98856d1 --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/HtmlProcessor.kt @@ -0,0 +1,27 @@ +package app.dapk.st.matrix.sync.internal.sync.message.html + +import app.dapk.st.matrix.sync.internal.sync.message.AccumulatingContentParser +import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator + +class HtmlProcessor { + + private val tagCaptor = TagCaptor() + private val htmlTagParser = RichTextHtmlTagParser() + + fun process(input: String, tagOpen: Int, partBuilder: ContentAccumulator, nestingLevel: Int, nestedParser: AccumulatingContentParser): Int { + val afterTagCaptureIndex = tagCaptor.tagCapture(input, tagOpen) { tagName, attributes, tagContent -> + htmlTagParser.parse(tagName, attributes, tagContent, partBuilder) { nestedContent, accumulator -> + nestedParser.parse(nestedContent, accumulator, nestingLevel + 1) + } + } + return when (afterTagCaptureIndex) { + -1 -> { + partBuilder.appendText(input[tagOpen].toString()) + tagOpen + 1 + } + + else -> afterTagCaptureIndex + } + } + +} \ No newline at end of file diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/ListAccumulator.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/ListAccumulator.kt new file mode 100644 index 0000000..4d6dbc1 --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/ListAccumulator.kt @@ -0,0 +1,23 @@ +package app.dapk.st.matrix.sync.internal.sync.message.html + +import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator + +internal interface ListAccumulator { + fun appendLinePrefix(index: Int?) +} + +internal class OrderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator { + + private var currentIndex = 1 + + override fun appendLinePrefix(index: Int?) { + currentIndex = index ?: currentIndex + appendText("$currentIndex. ") + currentIndex++ + } +} + +internal class UnorderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator { + override fun appendLinePrefix(index: Int?) = appendText("- ") +} + diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/RichTextHtmlTagParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/RichTextHtmlTagParser.kt new file mode 100644 index 0000000..a3e3366 --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/RichTextHtmlTagParser.kt @@ -0,0 +1,95 @@ +package app.dapk.st.matrix.sync.internal.sync.message.html + +import app.dapk.st.matrix.common.UserId +import app.dapk.st.matrix.sync.internal.sync.message.* + +class RichTextHtmlTagParser : TagParser { + + override fun parse( + tagName: String, + attributes: Map, + content: String, + accumulator: ContentAccumulator, + parser: NestedParser + ) { + when { + tagName.startsWith('@') -> { + accumulator.appendPerson(UserId(tagName), tagName) + } + + else -> when (tagName) { + "br" -> { + accumulator.appendNewline() + } + + "a" -> { + attributes["href"]?.let { url -> + when { + url.startsWith("https://matrix.to/#/@") -> { + val userId = UserId(url.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) + accumulator.appendPerson(userId, "@${content.removePrefix("@")}") + } + + else -> accumulator.appendLink(url, content) + + } + } ?: accumulator.appendText(content) + } + + "p" -> { + parser.parse(content.trim(), accumulator) + accumulator.appendNewline() + } + + "blockquote" -> { + accumulator.appendText("> ") + parser.parse(content.trim(), accumulator) + } + + "strong", "b" -> { + accumulator.appendBold(content) + } + + "em", "i" -> { + accumulator.appendItalic(content) + } + + "h1", "h2", "h3", "h4", "h5" -> { + accumulator.appendBold(content) + accumulator.appendNewline() + } + + "ul", "ol" -> { + when (tagName) { + "ol" -> parser.parse(content, OrderedListAccumulator(accumulator)) + "ul" -> parser.parse(content, UnorderedListAccumulator(accumulator)) + } + } + + "li" -> { + (accumulator as ListAccumulator).appendLinePrefix(attributes["value"]?.toInt()) + + val nestedList = when { + content.contains("
    ") -> "
      " + content.contains("
        ") -> "
          " + else -> null + } + + if (nestedList == null) { + parser.parse(content.trim(), accumulator) + accumulator.appendNewline() + } else { + val firstItemInNested = content.substringBefore(nestedList) + parser.parse(firstItemInNested.trim(), accumulator) + accumulator.appendNewline() + parser.parse(content.substring(content.indexOf(nestedList)).trim(), accumulator) + } + } + + else -> { + // skip tag + } + } + } + } +} \ No newline at end of file diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/TagCaptor.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/TagCaptor.kt new file mode 100644 index 0000000..37b756a --- /dev/null +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/html/TagCaptor.kt @@ -0,0 +1,78 @@ +package app.dapk.st.matrix.sync.internal.sync.message.html + +class TagCaptor { + + fun tagCapture(input: String, startIndex: Int, tagFactory: (String, Map, String) -> Unit): Int { + return when (val closeIndex = input.indexOf('>', startIndex = startIndex)) { + -1 -> -1 + else -> { + val fullTag = input.substring(startIndex, closeIndex + 1) + val tagName = input.substring(startIndex + 1, closeIndex) + when { + fullTag.isExitlessTag() -> { + val trim = fullTag.removeSurrounding("<", ">").trim() + tagFactory(trim, emptyMap(), "") + closeIndex + 1 + } + + fullTag.isSelfClosing() -> { + val trim = fullTag.removeSuffix("/>").removePrefix("<").trim() + tagFactory(trim, emptyMap(), "") + closeIndex + 1 + } + + else -> { + val exitTag = if (tagName.contains(' ')) { + "" + } else { + "" + } + + val exitIndex = input.findTagClose(tagName, exitTag, searchIndex = closeIndex + 1) + if (exitIndex == -1) { + -1 + } else { + val exitTagCloseIndex = exitIndex + exitTag.length + if (tagName.contains(' ')) { + val parts = tagName.split(' ') + val attributes = parts.drop(1).associate { + val (key, value) = it.split("=") + key to value.removeSurrounding("\"") + } + tagFactory(parts.first(), attributes, input.substring(closeIndex + 1, exitIndex)) + } else { + tagFactory(tagName, emptyMap(), input.substring(closeIndex + 1, exitIndex)) + } + exitTagCloseIndex + } + } + } + } + } + } + + private fun String.findTagClose(tagName: String, exitTag: String, searchIndex: Int, open: Int = 1): Int { + val exitIndex = this.indexOf(exitTag, startIndex = searchIndex) + val nextOpen = this.indexOf("<$tagName", startIndex = searchIndex) + return when { + open == 1 && (nextOpen == -1 || exitIndex < nextOpen) -> exitIndex + open > 8 || open < 1 -> { + // something has gone wrong, lets exit + -1 + } + + exitIndex == -1 -> -1 + nextOpen == -1 || nextOpen > exitIndex -> this.findTagClose(tagName, exitTag, exitIndex + 1, open - 1) + + nextOpen < exitIndex -> { + this.findTagClose(tagName, exitTag, nextOpen + 1, open + 1) + } + + else -> -1 + } + } +} + +private fun String.isExitlessTag() = this == "
          " || (this.startsWith("<@") && this.endsWith('>')) + +private fun String.isSelfClosing() = this.endsWith("/>") diff --git a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/UrlParser.kt b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/url/UrlParser.kt similarity index 59% rename from matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/UrlParser.kt rename to matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/url/UrlParser.kt index 9366120..290e05e 100644 --- a/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/UrlParser.kt +++ b/matrix/services/sync/src/main/kotlin/app/dapk/st/matrix/sync/internal/sync/message/url/UrlParser.kt @@ -1,18 +1,14 @@ -package app.dapk.st.matrix.sync.internal.sync.message +package app.dapk.st.matrix.sync.internal.sync.message.url +import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator + +private const val END_SEARCH = -1 private const val INVALID_TRAILING_CHARS = ",.:;?<>" internal class UrlParser { - private fun String.hasLookAhead(current: Int, value: String): Boolean { - return length > current + value.length && this.substring(current, current + value.length) == value - } - - fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int { - val urlIndex = input.indexOf("http", startIndex = linkStartIndex) + fun parseUrl(input: String, urlIndex: Int, accumulator: ContentAccumulator): Int { return if (urlIndex == END_SEARCH) END_SEARCH else { - builder.appendTextBeforeTag(linkStartIndex, urlIndex, input) - val originalUrl = input.substring(urlIndex) var index = 0 val maybeUrl = originalUrl.takeWhile { @@ -25,29 +21,27 @@ internal class UrlParser { when { urlContinuesUntilEnd -> { val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() - builder.appendLink(url = cleanedUrl, label = null) + accumulator.appendLink(url = cleanedUrl, label = null) if (cleanedUrl != originalUrl) { - builder.appendText(originalUrl.last().toString()) + accumulator.appendText(originalUrl.last().toString()) } - input.length.next() + input.length + 1 } else -> { val originalUrl = input.substring(urlIndex, urlEndIndex) val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() - builder.appendLink(url = cleanedUrl, label = null) + accumulator.appendLink(url = cleanedUrl, label = null) if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1 } } } } - - fun test(startingFrom: Int, input: String): Int { - return input.indexOf("http", startingFrom) - } - } +private fun String.hasLookAhead(current: Int, value: String): Boolean { + return length > current + value.length && this.substring(current, current + value.length) == value +} private fun String.bestGuessStripTrailingUrlChar(): String { val last = this.last() diff --git a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichTextMessageParserTest.kt similarity index 91% rename from matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt rename to matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichTextMessageParserTest.kt index 796dcfd..54c8369 100644 --- a/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichMessageParserTest.kt +++ b/matrix/services/sync/src/test/kotlin/app/dapk/st/matrix/sync/internal/sync/RichTextMessageParserTest.kt @@ -5,10 +5,9 @@ import app.dapk.st.matrix.common.RichText.Part.* import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser import fixture.aUserId import org.amshove.kluent.shouldBeEqualTo -import org.junit.Ignore import org.junit.Test -class RichMessageParserTest { +class RichTextMessageParserTest { private val parser = RichMessageParser() @@ -18,6 +17,34 @@ class RichMessageParserTest { expected = RichText(listOf(Normal("Hello world!"))) ) + @Test + fun `parses strong tags`() = runParserTest( + Case( + input = """hello world""", + expected = RichText( + listOf( + Normal("hello "), + Bold("wor"), + Normal("ld"), + ) + ) + ), + ) + + @Test + fun `parses em tags`() = runParserTest( + Case( + input = """hello world""", + expected = RichText( + listOf( + Normal("hello "), + Italic("wor"), + Normal("ld"), + ) + ) + ), + ) + @Test fun `parses p tags`() = runParserTest( input = "

          Hello world!

          foo bar

          after paragraph", @@ -63,7 +90,7 @@ class RichMessageParserTest { @Test fun `replaces matrixdotto with person`() = runParserTest( input = """Hello a-name: world""", - expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world"))) + expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(": world"))) ) @Test @@ -122,6 +149,21 @@ class RichMessageParserTest { ), ) + + @Test + fun `parses nested lists`() = runParserTest( + input = """ +
            +
          • first item +
              +
            • nested item
            • +
            +
          • +
          + """.trimIndent().lines().joinToString("") { it.trim() }, + expected = RichText(listOf(Normal("- first item\n- nested item"))) + ) + @Test fun `parses urls`() = runParserTest( Case( @@ -178,58 +220,6 @@ class RichMessageParserTest { expected = RichText(listOf(Normal(">> ><>> << more content"))) ) - @Test - fun `parses strong tags`() = runParserTest( - Case( - input = """hello world""", - expected = RichText( - listOf( - Normal("hello "), - Bold("wor"), - Normal("ld"), - ) - ) - ), - ) - - @Test - fun `parses em tags`() = runParserTest( - Case( - input = """hello world""", - expected = RichText( - listOf( - Normal("hello "), - Italic("wor"), - Normal("ld"), - ) - ) - ), - ) - - @Ignore // TODO - @Test - fun `parses nested tags`() = runParserTest( - Case( - input = """hello world""", - expected = RichText( - listOf( - Normal("hello "), - BoldItalic("wor"), - Normal("ld"), - ) - ) - ), - Case( - input = """www.google.com""", - expected = RichText( - listOf( - Link(url = "www.google.com", label = "www.google.com"), - Link(url = "www.bing.com", label = "www.bing.com"), - ) - ) - ) - ) - @Test fun `parses 'a' tags`() = runParserTest( Case(