reduce duplication

This commit is contained in:
Adam Brown 2022-10-29 12:51:56 +01:00
parent 23b004ff02
commit 2fab30060f
3 changed files with 88 additions and 64 deletions

View File

@ -5,16 +5,19 @@ import app.dapk.st.matrix.common.UserId
private const val TAG_OPEN = '<' private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>' private const val TAG_CLOSE = '>'
private const val NO_RESULT_FOUND = -1 private const val NO_RESULT_FOUND = -1
private const val LI_VALUE_CAPTURE = "value=\"" private val SKIPPED_TAGS = setOf("mx-reply")
internal class HtmlParser { internal class HtmlParser {
fun test(startingFrom: Int, input: String): Int {
return input.indexOf(TAG_OPEN, startingFrom)
}
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag( fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag(
fromIndex = searchIndex, fromIndex = searchIndex,
onInvalidTag = { builder.appendText(input[it].toString()) }, onInvalidTag = { builder.appendText(input[it].toString()) },
onTag = { tagOpen, tagClose -> onTag = { tagOpen, tagClose ->
val wholeTag = input.substring(tagOpen, tagClose + 1) val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
when { when {
tagName.startsWith('@') -> { tagName.startsWith('@') -> {
@ -29,31 +32,40 @@ internal class HtmlParser {
tagClose.next() tagClose.next()
} }
else -> { else -> parseTagWithContent(tagName, input, tagClose, builder, searchIndex, tagOpen, wholeTag, nestingLevel)
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
val exitTagCloseIndex = exitIndex + exitTag.length
if (exitIndex == END_SEARCH) {
builder.appendText(input[searchIndex].toString())
searchIndex.next()
} else {
when (tagName) {
"mx-reply" -> {
exitTagCloseIndex
}
else -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
val tagContent = input.substring(tagClose + 1, exitIndex)
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
}
}
}
}
} }
} }
) )
private fun parseTagWithContent(
tagName: String,
input: String,
tagClose: Int,
builder: PartBuilder,
searchIndex: Int,
tagOpen: Int,
wholeTag: String,
nestingLevel: Int
): Int {
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
val exitTagCloseIndex = exitIndex + exitTag.length
return when {
exitIndex == NO_RESULT_FOUND -> {
builder.appendText(input[searchIndex].toString())
searchIndex.next()
}
SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex
else -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
val tagContent = input.substring(tagClose + 1, exitIndex)
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
}
}
}
private fun handleTagWithContent( private fun handleTagWithContent(
input: String, input: String,
tagName: String, tagName: String,
@ -64,14 +76,23 @@ internal class HtmlParser {
nestingLevel: Int, nestingLevel: Int,
) = when (tagName) { ) = when (tagName) {
"a" -> { "a" -> {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">") val findHrefUrl = wholeTag.findTagAttribute("href")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) { when {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\"")) findHrefUrl == null -> {
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}") builder.appendText(tagContent)
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex) exitTagCloseIndex
} else { }
builder.appendLink(findHrefUrl, label = tagContent)
exitTagCloseIndex findHrefUrl.startsWith("https://matrix.to/#/@") -> {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
}
else -> {
builder.appendLink(findHrefUrl, label = tagContent)
exitTagCloseIndex
}
} }
} }
@ -83,7 +104,7 @@ internal class HtmlParser {
"p" -> { "p" -> {
if (tagContent.isNotEmpty() && nestingLevel < 2) { if (tagContent.isNotEmpty() && nestingLevel < 2) {
var lastIndex = 0 var lastIndex = 0
iterateIndex(start = 0) { searchIndex -> iterateSearchIndex { searchIndex ->
lastIndex = searchIndex lastIndex = searchIndex
parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1) parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
} }
@ -147,23 +168,13 @@ internal class HtmlParser {
} }
private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) { private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) {
var index = 1 var listIndex = 1
iterateIndex(start = 0) { nextIndex -> iterateSearchIndex { nextIndex ->
singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent -> singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent ->
val content = when (parentTag) { val content = when (parentTag) {
"ol" -> { "ol" -> {
index = wholeTag.indexOf(LI_VALUE_CAPTURE).let { listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex
if (it == -1) { "$listIndex. $tagContent".also { listIndex++ }
index
} else {
val start = it + LI_VALUE_CAPTURE.length
wholeTag.substring(start).substringBefore('\"').toInt()
}
}
"$index. $tagContent".also {
index++
}
} }
else -> "- $tagContent" else -> "- $tagContent"
@ -179,8 +190,7 @@ internal class HtmlParser {
fromIndex = searchIndex, fromIndex = searchIndex,
onInvalidTag = { builder.appendText(content[it].toString()) }, onInvalidTag = { builder.appendText(content[it].toString()) },
onTag = { tagOpen, tagClose -> onTag = { tagOpen, tagClose ->
val wholeTag = content.substring(tagOpen, tagClose + 1) val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
if (tagName == wantedTagName) { if (tagName == wantedTagName) {
val exitTag = "</$tagName>" val exitTag = "</$tagName>"
@ -201,16 +211,21 @@ internal class HtmlParser {
) )
} }
fun test(startingFrom: Int, intput: String): Int { private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair<String, String> {
return intput.indexOf(TAG_OPEN, startingFrom) val wholeTag = input.substring(tagOpen, tagClose + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
return wholeTag to tagName
} }
}
private fun iterateIndex(start: SearchIndex, action: (SearchIndex) -> SearchIndex): SearchIndex { private fun String.findTagAttribute(name: String): String? {
var nextIndex = start val attribute = "$name="
while (nextIndex != END_SEARCH) { return this.indexOf(attribute).let {
nextIndex = action(nextIndex) if (it == NO_RESULT_FOUND) {
null
} else {
val start = it + attribute.length
this.substring(start).substringAfter('\"').substringBefore('\"')
} }
return nextIndex
} }
}
}

View File

@ -14,9 +14,11 @@ class RichMessageParser {
val input = source val input = source
.removeHtmlEntities() .removeHtmlEntities()
.dropTextFallback() .dropTextFallback()
val builder = PartBuilder() return RichText(collectRichText(input).build())
var nextIndex = 0 }
while (nextIndex != END_SEARCH) {
private fun collectRichText(input: String) = PartBuilder().also { builder ->
iterateSearchIndex { nextIndex ->
val htmlStart = htmlParser.test(nextIndex, input) val htmlStart = htmlParser.test(nextIndex, input)
val urlStart = urlParser.test(nextIndex, input) val urlStart = urlParser.test(nextIndex, input)
@ -37,9 +39,8 @@ class RichMessageParser {
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) { if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
builder.appendText(input.substring(nextIndex)) builder.appendText(input.substring(nextIndex))
} }
nextIndex = if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult) if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
} }
return RichText(builder.build())
} }
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
@ -60,3 +61,11 @@ private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("
private fun String.dropTextFallback() = this.lines() private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() } .dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n") .joinToString(separator = "\n")
internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex {
var nextIndex = 0
while (nextIndex != END_SEARCH) {
nextIndex = action(nextIndex)
}
return nextIndex
}

View File

@ -50,7 +50,7 @@ class RichMessageParserTest {
@Test @Test
fun `replaces matrixdotto with person`() = runParserTest( fun `replaces matrixdotto with person`() = runParserTest(
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar>a-name</a>: world""", input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
expected = RichText(setOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world"))) expected = RichText(setOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world")))
) )