reduce duplication
This commit is contained in:
parent
23b004ff02
commit
2fab30060f
|
@ -5,16 +5,19 @@ import app.dapk.st.matrix.common.UserId
|
|||
private const val TAG_OPEN = '<'
|
||||
private const val TAG_CLOSE = '>'
|
||||
private const val NO_RESULT_FOUND = -1
|
||||
private const val LI_VALUE_CAPTURE = "value=\""
|
||||
private val SKIPPED_TAGS = setOf("mx-reply")
|
||||
|
||||
internal class HtmlParser {
|
||||
|
||||
fun test(startingFrom: Int, input: String): Int {
|
||||
return input.indexOf(TAG_OPEN, startingFrom)
|
||||
}
|
||||
|
||||
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag(
|
||||
fromIndex = searchIndex,
|
||||
onInvalidTag = { builder.appendText(input[it].toString()) },
|
||||
onTag = { tagOpen, tagClose ->
|
||||
val wholeTag = input.substring(tagOpen, tagClose + 1)
|
||||
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
|
||||
val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose)
|
||||
|
||||
when {
|
||||
tagName.startsWith('@') -> {
|
||||
|
@ -29,31 +32,40 @@ internal class HtmlParser {
|
|||
tagClose.next()
|
||||
}
|
||||
|
||||
else -> {
|
||||
val exitTag = "</$tagName>"
|
||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
if (exitIndex == END_SEARCH) {
|
||||
builder.appendText(input[searchIndex].toString())
|
||||
searchIndex.next()
|
||||
} else {
|
||||
when (tagName) {
|
||||
"mx-reply" -> {
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
else -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
||||
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else -> parseTagWithContent(tagName, input, tagClose, builder, searchIndex, tagOpen, wholeTag, nestingLevel)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
private fun parseTagWithContent(
|
||||
tagName: String,
|
||||
input: String,
|
||||
tagClose: Int,
|
||||
builder: PartBuilder,
|
||||
searchIndex: Int,
|
||||
tagOpen: Int,
|
||||
wholeTag: String,
|
||||
nestingLevel: Int
|
||||
): Int {
|
||||
val exitTag = "</$tagName>"
|
||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
return when {
|
||||
exitIndex == NO_RESULT_FOUND -> {
|
||||
builder.appendText(input[searchIndex].toString())
|
||||
searchIndex.next()
|
||||
}
|
||||
|
||||
SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex
|
||||
|
||||
else -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
||||
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleTagWithContent(
|
||||
input: String,
|
||||
tagName: String,
|
||||
|
@ -64,14 +76,23 @@ internal class HtmlParser {
|
|||
nestingLevel: Int,
|
||||
) = when (tagName) {
|
||||
"a" -> {
|
||||
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
|
||||
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
|
||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
||||
} else {
|
||||
builder.appendLink(findHrefUrl, label = tagContent)
|
||||
exitTagCloseIndex
|
||||
val findHrefUrl = wholeTag.findTagAttribute("href")
|
||||
when {
|
||||
findHrefUrl == null -> {
|
||||
builder.appendText(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
findHrefUrl.startsWith("https://matrix.to/#/@") -> {
|
||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
||||
}
|
||||
|
||||
else -> {
|
||||
builder.appendLink(findHrefUrl, label = tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,7 +104,7 @@ internal class HtmlParser {
|
|||
"p" -> {
|
||||
if (tagContent.isNotEmpty() && nestingLevel < 2) {
|
||||
var lastIndex = 0
|
||||
iterateIndex(start = 0) { searchIndex ->
|
||||
iterateSearchIndex { searchIndex ->
|
||||
lastIndex = searchIndex
|
||||
parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
|
||||
}
|
||||
|
@ -147,23 +168,13 @@ internal class HtmlParser {
|
|||
}
|
||||
|
||||
private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) {
|
||||
var index = 1
|
||||
iterateIndex(start = 0) { nextIndex ->
|
||||
var listIndex = 1
|
||||
iterateSearchIndex { nextIndex ->
|
||||
singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent ->
|
||||
val content = when (parentTag) {
|
||||
"ol" -> {
|
||||
index = wholeTag.indexOf(LI_VALUE_CAPTURE).let {
|
||||
if (it == -1) {
|
||||
index
|
||||
} else {
|
||||
val start = it + LI_VALUE_CAPTURE.length
|
||||
wholeTag.substring(start).substringBefore('\"').toInt()
|
||||
}
|
||||
}
|
||||
|
||||
"$index. $tagContent".also {
|
||||
index++
|
||||
}
|
||||
listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex
|
||||
"$listIndex. $tagContent".also { listIndex++ }
|
||||
}
|
||||
|
||||
else -> "- $tagContent"
|
||||
|
@ -179,8 +190,7 @@ internal class HtmlParser {
|
|||
fromIndex = searchIndex,
|
||||
onInvalidTag = { builder.appendText(content[it].toString()) },
|
||||
onTag = { tagOpen, tagClose ->
|
||||
val wholeTag = content.substring(tagOpen, tagClose + 1)
|
||||
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
|
||||
val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose)
|
||||
|
||||
if (tagName == wantedTagName) {
|
||||
val exitTag = "</$tagName>"
|
||||
|
@ -201,16 +211,21 @@ internal class HtmlParser {
|
|||
)
|
||||
}
|
||||
|
||||
fun test(startingFrom: Int, intput: String): Int {
|
||||
return intput.indexOf(TAG_OPEN, startingFrom)
|
||||
private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair<String, String> {
|
||||
val wholeTag = input.substring(tagOpen, tagClose + 1)
|
||||
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
|
||||
return wholeTag to tagName
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.findTagAttribute(name: String): String? {
|
||||
val attribute = "$name="
|
||||
return this.indexOf(attribute).let {
|
||||
if (it == NO_RESULT_FOUND) {
|
||||
null
|
||||
} else {
|
||||
val start = it + attribute.length
|
||||
this.substring(start).substringAfter('\"').substringBefore('\"')
|
||||
}
|
||||
}
|
||||
|
||||
private fun iterateIndex(start: SearchIndex, action: (SearchIndex) -> SearchIndex): SearchIndex {
|
||||
var nextIndex = start
|
||||
while (nextIndex != END_SEARCH) {
|
||||
nextIndex = action(nextIndex)
|
||||
}
|
||||
return nextIndex
|
||||
}
|
||||
|
||||
}
|
|
@ -14,9 +14,11 @@ class RichMessageParser {
|
|||
val input = source
|
||||
.removeHtmlEntities()
|
||||
.dropTextFallback()
|
||||
val builder = PartBuilder()
|
||||
var nextIndex = 0
|
||||
while (nextIndex != END_SEARCH) {
|
||||
return RichText(collectRichText(input).build())
|
||||
}
|
||||
|
||||
private fun collectRichText(input: String) = PartBuilder().also { builder ->
|
||||
iterateSearchIndex { nextIndex ->
|
||||
val htmlStart = htmlParser.test(nextIndex, input)
|
||||
val urlStart = urlParser.test(nextIndex, input)
|
||||
|
||||
|
@ -37,9 +39,8 @@ class RichMessageParser {
|
|||
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
|
||||
builder.appendText(input.substring(nextIndex))
|
||||
}
|
||||
nextIndex = if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
||||
if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
||||
}
|
||||
return RichText(builder.build())
|
||||
}
|
||||
|
||||
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
|
||||
|
@ -60,3 +61,11 @@ private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("
|
|||
private fun String.dropTextFallback() = this.lines()
|
||||
.dropWhile { it.startsWith("> ") || it.isEmpty() }
|
||||
.joinToString(separator = "\n")
|
||||
|
||||
internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex {
|
||||
var nextIndex = 0
|
||||
while (nextIndex != END_SEARCH) {
|
||||
nextIndex = action(nextIndex)
|
||||
}
|
||||
return nextIndex
|
||||
}
|
|
@ -50,7 +50,7 @@ class RichMessageParserTest {
|
|||
|
||||
@Test
|
||||
fun `replaces matrixdotto with person`() = runParserTest(
|
||||
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar>a-name</a>: world""",
|
||||
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
|
||||
expected = RichText(setOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world")))
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue