add support for more types of html tags and nesting

This commit is contained in:
Adam Brown 2022-11-05 09:30:02 +00:00
parent c6b9a997f3
commit c9a29b0b25
12 changed files with 417 additions and 421 deletions

View File

@ -0,0 +1,85 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.sync.internal.sync.message.html.HtmlProcessor
import app.dapk.st.matrix.sync.internal.sync.message.url.UrlParser
private const val MAX_NESTING_LIMIT = 20
class AccumulatingRichTextContentParser : AccumulatingContentParser {
private val urlParser = UrlParser()
private val tagProcessor = HtmlProcessor()
override fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator {
if (nestingLevel >= MAX_NESTING_LIMIT) {
accumulator.appendText(input)
} else {
iterate { index ->
process(
input,
index,
processTag = {
prependTextBeforeCapture(input, index, it, accumulator)
tagProcessor.process(input, it, accumulator, nestingLevel, nestedParser = this)
},
processUrl = {
prependTextBeforeCapture(input, index, it, accumulator)
urlParser.parseUrl(input, it, accumulator)
}
).also {
if (it == -1) {
appendRemainingText(index, input, accumulator)
}
}
}
}
return accumulator
}
private inline fun iterate(action: (Int) -> Int) {
var result = 0
while (result != -1) {
result = action(result)
}
}
private fun process(input: String, searchIndex: Int, processTag: (Int) -> Int, processUrl: (Int) -> Int): Int {
val tagOpen = input.indexOf('<', startIndex = searchIndex)
val httpOpen = input.indexOf("http", startIndex = searchIndex)
return selectProcessor(
tagOpen,
httpOpen,
processTag = { processTag(tagOpen) },
processUrl = { processUrl(httpOpen) }
)
}
private inline fun selectProcessor(tagOpen: Int, httpOpen: Int, processTag: () -> Int, processUrl: () -> Int) = when {
tagOpen == -1 && httpOpen == -1 -> -1
tagOpen != -1 && httpOpen == -1 -> processTag()
tagOpen == -1 && httpOpen != -1 -> processUrl()
tagOpen == httpOpen -> {
// favour tags as urls can existing within tags
processTag()
}
else -> {
when (tagOpen < httpOpen) {
true -> processTag()
false -> processUrl()
}
}
}
private fun prependTextBeforeCapture(input: String, index: Int, captureIndex: Int, accumulator: ContentAccumulator) {
if (index < captureIndex) {
accumulator.appendText(input.substring(index, captureIndex))
}
}
private fun appendRemainingText(index: Int, input: String, accumulator: ContentAccumulator) {
if (index < input.length) {
accumulator.appendText(input.substring(index, input.length))
}
}
}

View File

@ -1,248 +0,0 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.UserId
private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>'
private const val NO_RESULT_FOUND = -1
private val SKIPPED_TAGS = setOf("mx-reply")
internal class HtmlParser {
fun test(startingFrom: Int, input: String) = input.indexOf(TAG_OPEN, startingFrom)
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag(
fromIndex = searchIndex,
onInvalidTag = { builder.appendText(input[it].toString()) },
onTag = { tagOpen, tagClose ->
val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose)
when {
tagName.startsWith('@') -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
builder.appendPerson(UserId(tagName), tagName)
tagClose.next()
}
tagName == "br" -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
builder.appendNewline()
tagClose.next()
}
else -> parseTagWithContent(input, tagName, tagClose, searchIndex, tagOpen, wholeTag, builder, nestingLevel)
}
}
)
private fun parseTagWithContent(
input: String,
tagName: String,
tagClose: Int,
searchIndex: Int,
tagOpen: Int,
wholeTag: String,
builder: PartBuilder,
nestingLevel: Int
): Int {
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
val exitTagCloseIndex = exitIndex + exitTag.length
return when {
exitIndex == NO_RESULT_FOUND -> {
builder.appendText(input[searchIndex].toString())
searchIndex.next()
}
SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex
else -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
val tagContent = input.substring(tagClose + 1, exitIndex)
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
}
}
}
private fun handleTagWithContent(
input: String,
tagName: String,
wholeTag: String,
builder: PartBuilder,
tagContent: String,
exitTagCloseIndex: Int,
nestingLevel: Int,
) = when (tagName) {
"a" -> {
val findHrefUrl = wholeTag.findTagAttribute("href")
when {
findHrefUrl == null -> {
builder.appendText(tagContent)
exitTagCloseIndex
}
findHrefUrl.startsWith("https://matrix.to/#/@") -> {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
}
else -> {
builder.appendLink(findHrefUrl, label = tagContent)
exitTagCloseIndex
}
}
}
"b", "strong" -> {
builder.appendBold(tagContent)
exitTagCloseIndex
}
"blockquote" -> {
if (tagContent.isNotEmpty() && nestingLevel < 3) {
var lastIndex = 0
val trimmedTagContent = tagContent.trim()
builder.appendText("> ")
iterateSearchIndex { searchIndex ->
lastIndex = searchIndex
parseHtmlTags(trimmedTagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
}
if (lastIndex < trimmedTagContent.length) {
builder.appendText(trimmedTagContent.substring(lastIndex))
}
}
builder.appendNewline()
exitTagCloseIndex
}
"p" -> {
if (tagContent.isNotEmpty() && nestingLevel < 2) {
var lastIndex = 0
iterateSearchIndex { searchIndex ->
lastIndex = searchIndex
parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
}
if (lastIndex < tagContent.length) {
builder.appendText(tagContent.substring(lastIndex))
}
}
builder.appendNewline()
exitTagCloseIndex
}
"ul", "ol" -> {
parseList(tagName, tagContent, builder)
exitTagCloseIndex
}
"h1", "h2", "h3", "h4", "h5" -> {
builder.appendBold(tagContent.trim())
builder.appendNewline()
exitTagCloseIndex
}
"i", "em" -> {
builder.appendItalic(tagContent)
exitTagCloseIndex
}
else -> {
builder.appendText(tagContent)
exitTagCloseIndex
}
}
private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') {
exitTagCloseIndex.next()
} else {
exitTagCloseIndex
}
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
if (searchIndex != tagOpen) {
builder.appendText(input.substring(searchIndex, tagOpen))
}
}
private fun String.findTag(fromIndex: Int, onInvalidTag: (Int) -> Unit, onTag: (Int, Int) -> Int): Int {
return when (val foundIndex = this.indexOf(TAG_OPEN, startIndex = fromIndex)) {
NO_RESULT_FOUND -> END_SEARCH
else -> when (val closeIndex = indexOf(TAG_CLOSE, startIndex = foundIndex)) {
NO_RESULT_FOUND -> {
onInvalidTag(fromIndex)
fromIndex + 1
}
else -> onTag(foundIndex, closeIndex)
}
}
}
private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) {
var listIndex = 1
iterateSearchIndex { nextIndex ->
singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent ->
val content = when (parentTag) {
"ol" -> {
listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex
"$listIndex. $tagContent".also { listIndex++ }
}
else -> "- $tagContent"
}
builder.appendText(content)
builder.appendNewline()
}
}
}
private fun singleTagParser(content: String, wantedTagName: String, searchIndex: Int, builder: PartBuilder, onTag: (String, String) -> Unit): SearchIndex {
return content.findTag(
fromIndex = searchIndex,
onInvalidTag = { builder.appendText(content[it].toString()) },
onTag = { tagOpen, tagClose ->
val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose)
if (tagName == wantedTagName) {
val exitTag = "</$tagName>"
val exitIndex = content.indexOf(exitTag, startIndex = tagClose)
val exitTagCloseIndex = exitIndex + exitTag.length
if (exitIndex == END_SEARCH) {
builder.appendText(content[searchIndex].toString())
searchIndex.next()
} else {
val tagContent = content.substring(tagClose + 1, exitIndex)
onTag(wholeTag, tagContent)
exitTagCloseIndex
}
} else {
END_SEARCH
}
}
)
}
private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair<String, String> {
val wholeTag = input.substring(tagOpen, tagClose + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
return wholeTag to tagName
}
}
private fun String.findTagAttribute(name: String): String? {
val attribute = "$name="
return this.indexOf(attribute).let {
if (it == NO_RESULT_FOUND) {
null
} else {
val start = it + attribute.length
this.substring(start).substringAfter('\"').substringBefore('\"')
}
}
}

View File

@ -1,13 +0,0 @@
package app.dapk.st.matrix.sync.internal.sync.message
internal typealias SearchIndex = Int
internal fun Int.next() = this + 1
internal interface ParserScope {
fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String)
fun SearchIndex.next(): SearchIndex
}

View File

@ -1,71 +0,0 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.RichText
import kotlin.math.max
internal const val END_SEARCH = -1
class RichMessageParser {
private val htmlParser = HtmlParser()
private val urlParser = UrlParser()
fun parse(source: String): RichText {
val input = source
.removeHtmlEntities()
.dropTextFallback()
return RichText(collectRichText(input).build())
}
private fun collectRichText(input: String) = PartBuilder().also { builder ->
iterateSearchIndex { nextIndex ->
val htmlStart = htmlParser.test(nextIndex, input)
val urlStart = urlParser.test(nextIndex, input)
val firstResult = if (htmlStart < urlStart) {
htmlParser.parseHtmlTags(input, nextIndex, builder)
} else {
urlParser.parseUrl(input, nextIndex, builder)
}
val secondStartIndex = findUrlStartIndex(firstResult, nextIndex)
val secondResult = if (htmlStart < urlStart) {
urlParser.parseUrl(input, secondStartIndex, builder)
} else {
htmlParser.parseHtmlTags(input, secondStartIndex, builder)
}
val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input)
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
builder.appendText(input.substring(nextIndex))
}
if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
}
}
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
private fun findUrlStartIndex(htmlResult: Int, searchIndex: Int) = when {
htmlResult == END_SEARCH && searchIndex == 0 -> 0
htmlResult == END_SEARCH -> searchIndex
else -> htmlResult
}
private fun hasReachedEnd(htmlResult: SearchIndex, urlResult: Int, input: String) =
(htmlResult == END_SEARCH && urlResult == END_SEARCH) || (htmlResult >= input.length || urlResult >= input.length)
}
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'").replace("&apos;", "'").replace("&amp;", "&")
private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")
internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex {
var nextIndex = 0
while (nextIndex != END_SEARCH) {
nextIndex = action(nextIndex)
}
return nextIndex
}

View File

@ -0,0 +1,34 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.RichText
fun interface NestedParser {
fun parse(content: String, accumulator: ContentAccumulator)
}
fun interface TagParser {
fun parse(tagName: String, attributes: Map<String, String>, content: String, accumulator: ContentAccumulator, parser: NestedParser)
}
fun interface AccumulatingContentParser {
fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator
}
class RichMessageParser(
private val accumulatingParser: AccumulatingContentParser = AccumulatingRichTextContentParser()
) {
fun parse(source: String): RichText {
val input = source
.removeHtmlEntities()
.dropTextFallback()
return RichText(accumulatingParser.parse(input, RichTextPartBuilder(), nestingLevel = 0).build())
}
}
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'").replace("&apos;", "'").replace("&amp;", "&")
private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")

View File

@ -3,39 +3,48 @@ package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.RichText import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.common.UserId import app.dapk.st.matrix.common.UserId
internal class PartBuilder { interface ContentAccumulator {
fun appendText(value: String)
fun appendItalic(value: String)
fun appendBold(value: String)
fun appendPerson(userId: UserId, displayName: String)
fun appendLink(url: String, label: String?)
fun build(): List<RichText.Part>
}
class RichTextPartBuilder : ContentAccumulator {
private var normalBuffer = StringBuilder() private var normalBuffer = StringBuilder()
private val parts = mutableListOf<RichText.Part>() private val parts = mutableListOf<RichText.Part>()
fun appendText(value: String) { override fun appendText(value: String) {
normalBuffer.append(value.cleanFirstTextLine()) normalBuffer.append(value.cleanFirstTextLine())
} }
fun appendItalic(value: String) { override fun appendItalic(value: String) {
flushNormalBuffer() flushNormalBuffer()
parts.add(RichText.Part.Italic(value.cleanFirstTextLine())) parts.add(RichText.Part.Italic(value.cleanFirstTextLine()))
} }
fun appendBold(value: String) { override fun appendBold(value: String) {
flushNormalBuffer() flushNormalBuffer()
parts.add(RichText.Part.Bold(value.cleanFirstTextLine())) parts.add(RichText.Part.Bold(value.cleanFirstTextLine()))
} }
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
fun appendPerson(userId: UserId, displayName: String) { override fun appendPerson(userId: UserId, displayName: String) {
flushNormalBuffer() flushNormalBuffer()
parts.add(RichText.Part.Person(userId, displayName)) parts.add(RichText.Part.Person(userId, displayName))
} }
fun appendLink(url: String, label: String?) { override fun appendLink(url: String, label: String?) {
flushNormalBuffer() flushNormalBuffer()
parts.add(RichText.Part.Link(url, label ?: url)) parts.add(RichText.Part.Link(url, label ?: url))
} }
fun build(): List<RichText.Part> { override fun build(): List<RichText.Part> {
flushNormalBuffer() flushNormalBuffer()
return when (parts.isEmpty()) { return when (parts.isEmpty()) {
true -> parts true -> parts
@ -59,16 +68,9 @@ internal class PartBuilder {
normalBuffer.clear() normalBuffer.clear()
} }
} }
} }
internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: Int, input: String) { internal fun ContentAccumulator.appendNewline() {
if (previousIndex != tagOpenIndex) {
this.appendText(input.substring(previousIndex, tagOpenIndex))
}
}
internal fun PartBuilder.appendNewline() {
this.appendText("\n") this.appendText("\n")
} }

View File

@ -0,0 +1,27 @@
package app.dapk.st.matrix.sync.internal.sync.message.html
import app.dapk.st.matrix.sync.internal.sync.message.AccumulatingContentParser
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
class HtmlProcessor {
private val tagCaptor = TagCaptor()
private val htmlTagParser = RichTextHtmlTagParser()
fun process(input: String, tagOpen: Int, partBuilder: ContentAccumulator, nestingLevel: Int, nestedParser: AccumulatingContentParser): Int {
val afterTagCaptureIndex = tagCaptor.tagCapture(input, tagOpen) { tagName, attributes, tagContent ->
htmlTagParser.parse(tagName, attributes, tagContent, partBuilder) { nestedContent, accumulator ->
nestedParser.parse(nestedContent, accumulator, nestingLevel + 1)
}
}
return when (afterTagCaptureIndex) {
-1 -> {
partBuilder.appendText(input[tagOpen].toString())
tagOpen + 1
}
else -> afterTagCaptureIndex
}
}
}

View File

@ -0,0 +1,23 @@
package app.dapk.st.matrix.sync.internal.sync.message.html
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
internal interface ListAccumulator {
fun appendLinePrefix(index: Int?)
}
internal class OrderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
private var currentIndex = 1
override fun appendLinePrefix(index: Int?) {
currentIndex = index ?: currentIndex
appendText("$currentIndex. ")
currentIndex++
}
}
internal class UnorderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
override fun appendLinePrefix(index: Int?) = appendText("- ")
}

View File

@ -0,0 +1,95 @@
package app.dapk.st.matrix.sync.internal.sync.message.html
import app.dapk.st.matrix.common.UserId
import app.dapk.st.matrix.sync.internal.sync.message.*
class RichTextHtmlTagParser : TagParser {
override fun parse(
tagName: String,
attributes: Map<String, String>,
content: String,
accumulator: ContentAccumulator,
parser: NestedParser
) {
when {
tagName.startsWith('@') -> {
accumulator.appendPerson(UserId(tagName), tagName)
}
else -> when (tagName) {
"br" -> {
accumulator.appendNewline()
}
"a" -> {
attributes["href"]?.let { url ->
when {
url.startsWith("https://matrix.to/#/@") -> {
val userId = UserId(url.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
accumulator.appendPerson(userId, "@${content.removePrefix("@")}")
}
else -> accumulator.appendLink(url, content)
}
} ?: accumulator.appendText(content)
}
"p" -> {
parser.parse(content.trim(), accumulator)
accumulator.appendNewline()
}
"blockquote" -> {
accumulator.appendText("> ")
parser.parse(content.trim(), accumulator)
}
"strong", "b" -> {
accumulator.appendBold(content)
}
"em", "i" -> {
accumulator.appendItalic(content)
}
"h1", "h2", "h3", "h4", "h5" -> {
accumulator.appendBold(content)
accumulator.appendNewline()
}
"ul", "ol" -> {
when (tagName) {
"ol" -> parser.parse(content, OrderedListAccumulator(accumulator))
"ul" -> parser.parse(content, UnorderedListAccumulator(accumulator))
}
}
"li" -> {
(accumulator as ListAccumulator).appendLinePrefix(attributes["value"]?.toInt())
val nestedList = when {
content.contains("<ul>") -> "<ul>"
content.contains("<ol>") -> "<ol>"
else -> null
}
if (nestedList == null) {
parser.parse(content.trim(), accumulator)
accumulator.appendNewline()
} else {
val firstItemInNested = content.substringBefore(nestedList)
parser.parse(firstItemInNested.trim(), accumulator)
accumulator.appendNewline()
parser.parse(content.substring(content.indexOf(nestedList)).trim(), accumulator)
}
}
else -> {
// skip tag
}
}
}
}
}

View File

@ -0,0 +1,78 @@
package app.dapk.st.matrix.sync.internal.sync.message.html
class TagCaptor {
fun tagCapture(input: String, startIndex: Int, tagFactory: (String, Map<String, String>, String) -> Unit): Int {
return when (val closeIndex = input.indexOf('>', startIndex = startIndex)) {
-1 -> -1
else -> {
val fullTag = input.substring(startIndex, closeIndex + 1)
val tagName = input.substring(startIndex + 1, closeIndex)
when {
fullTag.isExitlessTag() -> {
val trim = fullTag.removeSurrounding("<", ">").trim()
tagFactory(trim, emptyMap(), "")
closeIndex + 1
}
fullTag.isSelfClosing() -> {
val trim = fullTag.removeSuffix("/>").removePrefix("<").trim()
tagFactory(trim, emptyMap(), "")
closeIndex + 1
}
else -> {
val exitTag = if (tagName.contains(' ')) {
"</${tagName.substringBefore(' ')}>"
} else {
"</$tagName>"
}
val exitIndex = input.findTagClose(tagName, exitTag, searchIndex = closeIndex + 1)
if (exitIndex == -1) {
-1
} else {
val exitTagCloseIndex = exitIndex + exitTag.length
if (tagName.contains(' ')) {
val parts = tagName.split(' ')
val attributes = parts.drop(1).associate {
val (key, value) = it.split("=")
key to value.removeSurrounding("\"")
}
tagFactory(parts.first(), attributes, input.substring(closeIndex + 1, exitIndex))
} else {
tagFactory(tagName, emptyMap(), input.substring(closeIndex + 1, exitIndex))
}
exitTagCloseIndex
}
}
}
}
}
}
private fun String.findTagClose(tagName: String, exitTag: String, searchIndex: Int, open: Int = 1): Int {
val exitIndex = this.indexOf(exitTag, startIndex = searchIndex)
val nextOpen = this.indexOf("<$tagName", startIndex = searchIndex)
return when {
open == 1 && (nextOpen == -1 || exitIndex < nextOpen) -> exitIndex
open > 8 || open < 1 -> {
// something has gone wrong, lets exit
-1
}
exitIndex == -1 -> -1
nextOpen == -1 || nextOpen > exitIndex -> this.findTagClose(tagName, exitTag, exitIndex + 1, open - 1)
nextOpen < exitIndex -> {
this.findTagClose(tagName, exitTag, nextOpen + 1, open + 1)
}
else -> -1
}
}
}
private fun String.isExitlessTag() = this == "<br>" || (this.startsWith("<@") && this.endsWith('>'))
private fun String.isSelfClosing() = this.endsWith("/>")

View File

@ -1,18 +1,14 @@
package app.dapk.st.matrix.sync.internal.sync.message package app.dapk.st.matrix.sync.internal.sync.message.url
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
private const val END_SEARCH = -1
private const val INVALID_TRAILING_CHARS = ",.:;?<>" private const val INVALID_TRAILING_CHARS = ",.:;?<>"
internal class UrlParser { internal class UrlParser {
private fun String.hasLookAhead(current: Int, value: String): Boolean { fun parseUrl(input: String, urlIndex: Int, accumulator: ContentAccumulator): Int {
return length > current + value.length && this.substring(current, current + value.length) == value
}
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
return if (urlIndex == END_SEARCH) END_SEARCH else { return if (urlIndex == END_SEARCH) END_SEARCH else {
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
val originalUrl = input.substring(urlIndex) val originalUrl = input.substring(urlIndex)
var index = 0 var index = 0
val maybeUrl = originalUrl.takeWhile { val maybeUrl = originalUrl.takeWhile {
@ -25,30 +21,28 @@ internal class UrlParser {
when { when {
urlContinuesUntilEnd -> { urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null) accumulator.appendLink(url = cleanedUrl, label = null)
if (cleanedUrl != originalUrl) { if (cleanedUrl != originalUrl) {
builder.appendText(originalUrl.last().toString()) accumulator.appendText(originalUrl.last().toString())
} }
input.length.next() input.length + 1
} }
else -> { else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex) val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar() val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null) accumulator.appendLink(url = cleanedUrl, label = null)
if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1 if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
} }
} }
} }
} }
fun test(startingFrom: Int, input: String): Int {
return input.indexOf("http", startingFrom)
} }
private fun String.hasLookAhead(current: Int, value: String): Boolean {
return length > current + value.length && this.substring(current, current + value.length) == value
} }
private fun String.bestGuessStripTrailingUrlChar(): String { private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last() val last = this.last()
return if (INVALID_TRAILING_CHARS.contains(last)) { return if (INVALID_TRAILING_CHARS.contains(last)) {

View File

@ -5,10 +5,9 @@ import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import fixture.aUserId import fixture.aUserId
import org.amshove.kluent.shouldBeEqualTo import org.amshove.kluent.shouldBeEqualTo
import org.junit.Ignore
import org.junit.Test import org.junit.Test
class RichMessageParserTest { class RichTextMessageParserTest {
private val parser = RichMessageParser() private val parser = RichMessageParser()
@ -18,6 +17,34 @@ class RichMessageParserTest {
expected = RichText(listOf(Normal("Hello world!"))) expected = RichText(listOf(Normal("Hello world!")))
) )
@Test
fun `parses strong tags`() = runParserTest(
Case(
input = """hello <strong>wor</strong>ld""",
expected = RichText(
listOf(
Normal("hello "),
Bold("wor"),
Normal("ld"),
)
)
),
)
@Test
fun `parses em tags`() = runParserTest(
Case(
input = """hello <em>wor</em>ld""",
expected = RichText(
listOf(
Normal("hello "),
Italic("wor"),
Normal("ld"),
)
)
),
)
@Test @Test
fun `parses p tags`() = runParserTest( fun `parses p tags`() = runParserTest(
input = "<p>Hello world!</p><p>foo bar</p>after paragraph", input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
@ -63,7 +90,7 @@ class RichMessageParserTest {
@Test @Test
fun `replaces matrixdotto with person`() = runParserTest( fun `replaces matrixdotto with person`() = runParserTest(
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""", input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world"))) expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(": world")))
) )
@Test @Test
@ -122,6 +149,21 @@ class RichMessageParserTest {
), ),
) )
@Test
fun `parses nested lists`() = runParserTest(
input = """
<ul>
<li>first item
<ul>
<li>nested item</li>
</ul>
</li>
</ul>
""".trimIndent().lines().joinToString("") { it.trim() },
expected = RichText(listOf(Normal("- first item\n- nested item")))
)
@Test @Test
fun `parses urls`() = runParserTest( fun `parses urls`() = runParserTest(
Case( Case(
@ -178,58 +220,6 @@ class RichMessageParserTest {
expected = RichText(listOf(Normal(">><foo> ><>> << more content"))) expected = RichText(listOf(Normal(">><foo> ><>> << more content")))
) )
@Test
fun `parses strong tags`() = runParserTest(
Case(
input = """hello <strong>wor</strong>ld""",
expected = RichText(
listOf(
Normal("hello "),
Bold("wor"),
Normal("ld"),
)
)
),
)
@Test
fun `parses em tags`() = runParserTest(
Case(
input = """hello <em>wor</em>ld""",
expected = RichText(
listOf(
Normal("hello "),
Italic("wor"),
Normal("ld"),
)
)
),
)
@Ignore // TODO
@Test
fun `parses nested tags`() = runParserTest(
Case(
input = """hello <b><i>wor<i/><b/>ld""",
expected = RichText(
listOf(
Normal("hello "),
BoldItalic("wor"),
Normal("ld"),
)
)
),
Case(
input = """<a href="www.google.com"><a href="www.google.com">www.google.com<a/><a/>""",
expected = RichText(
listOf(
Link(url = "www.google.com", label = "www.google.com"),
Link(url = "www.bing.com", label = "www.bing.com"),
)
)
)
)
@Test @Test
fun `parses 'a' tags`() = runParserTest( fun `parses 'a' tags`() = runParserTest(
Case( Case(