message parsing refactor

This commit is contained in:
Adam Brown 2022-10-28 22:19:16 +01:00
parent 89af610f58
commit 8e36efe0c2
14 changed files with 293 additions and 201 deletions

View File

@ -8,7 +8,7 @@ import app.dapk.st.matrix.sync.internal.DefaultSyncService
import app.dapk.st.matrix.sync.internal.request.*
import app.dapk.st.matrix.sync.internal.room.MessageDecrypter
import app.dapk.st.matrix.sync.internal.room.MissingMessageDecrypter
import app.dapk.st.matrix.sync.internal.sync.RichMessageParser
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow

View File

@ -13,6 +13,7 @@ import app.dapk.st.matrix.sync.internal.room.RoomEventsDecrypter
import app.dapk.st.matrix.sync.internal.room.SyncEventDecrypter
import app.dapk.st.matrix.sync.internal.room.SyncSideEffects
import app.dapk.st.matrix.sync.internal.sync.*
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll

View File

@ -6,7 +6,7 @@ import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent
import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent.TimelineMessage.Content.Image
import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent.TimelineMessage.Content.Text
import app.dapk.st.matrix.sync.internal.request.DecryptedContent
import app.dapk.st.matrix.sync.internal.sync.RichMessageParser
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import kotlinx.serialization.json.Json
internal class RoomEventsDecrypter(

View File

@ -1,197 +0,0 @@
package app.dapk.st.matrix.sync.internal.sync
import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.common.UserId
private const val INVALID_TRAILING_CHARS = ",.:;?"
private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>'
class RichMessageParser {
fun parse(source: String): RichText {
val input = source
.removeHtmlEntities()
.dropTextFallback()
val builder = PartBuilder()
var openIndex = 0
var closeIndex = 0
var lastStartIndex = 0
while (openIndex != -1) {
val foundIndex = input.indexOf(TAG_OPEN, startIndex = openIndex)
if (foundIndex != -1) {
closeIndex = input.indexOf(TAG_CLOSE, startIndex = foundIndex)
if (closeIndex == -1) {
openIndex++
} else {
val wholeTag = input.substring(foundIndex, closeIndex + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
if (tagName.startsWith('@')) {
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
builder.appendPerson(UserId(tagName), tagName)
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
if (tagName == "br") {
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
builder.appendText("\n")
openIndex = foundIndex + wholeTag.length
lastStartIndex = openIndex
continue
}
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = closeIndex)
if (exitIndex == -1) {
openIndex++
} else {
when (tagName) {
"mx-reply" -> {
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
continue
}
}
if (openIndex != foundIndex) {
builder.appendText(input.substring(openIndex, foundIndex))
}
val tagContent = input.substring(closeIndex + 1, exitIndex)
openIndex = exitIndex + exitTag.length
lastStartIndex = openIndex
when (tagName) {
"a" -> {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
if (input.getOrNull(openIndex) == ':') {
openIndex++
lastStartIndex = openIndex
}
} else {
builder.appendLink(findHrefUrl, label = tagContent)
}
}
"b" -> builder.appendBold(tagContent)
"strong" -> builder.appendBold(tagContent)
"i" -> builder.appendItalic(tagContent)
"em" -> builder.appendItalic(tagContent)
else -> builder.appendText(tagContent)
}
}
}
} else {
// check for urls
val urlIndex = input.indexOf("http", startIndex = openIndex)
if (urlIndex != -1) {
if (lastStartIndex != urlIndex) {
builder.appendText(input.substring(lastStartIndex, urlIndex))
}
val originalUrl = input.substring(urlIndex)
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
val urlContinuesUntilEnd = urlEndIndex == -1
when {
urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
if (cleanedUrl != originalUrl) {
builder.appendText(originalUrl.last().toString())
}
break
}
else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
openIndex = if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
lastStartIndex = openIndex
continue
}
}
}
// exit
if (lastStartIndex < input.length) {
builder.appendText(input.substring(lastStartIndex))
}
break
}
}
return RichText(builder.build())
}
}
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'")
private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")
private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last()
return if (INVALID_TRAILING_CHARS.contains(last)) {
this.dropLast(1)
} else {
this
}
}
private class PartBuilder {
private var normalBuffer = StringBuilder()
private val parts = mutableSetOf<RichText.Part>()
fun appendText(value: String) {
normalBuffer.append(value.cleanFirstTextLine())
}
fun appendItalic(value: String) {
flushNormalBuffer()
parts.add(Italic(value.cleanFirstTextLine()))
}
fun appendBold(value: String) {
flushNormalBuffer()
parts.add(Bold(value.cleanFirstTextLine()))
}
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
fun appendPerson(userId: UserId, displayName: String) {
flushNormalBuffer()
parts.add(Person(userId, displayName))
}
fun appendLink(url: String, label: String?) {
flushNormalBuffer()
parts.add(Link(url, label ?: url))
}
fun build(): Set<RichText.Part> {
flushNormalBuffer()
return parts
}
private fun flushNormalBuffer() {
if (normalBuffer.isNotEmpty()) {
parts.add(Normal(normalBuffer.toString()))
normalBuffer.clear()
}
}
}

View File

@ -12,6 +12,7 @@ import app.dapk.st.matrix.sync.RoomMembersService
import app.dapk.st.matrix.sync.find
import app.dapk.st.matrix.sync.internal.request.ApiEncryptedContent
import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
private typealias Lookup = suspend (EventId) -> LookupResult

View File

@ -6,6 +6,7 @@ import app.dapk.st.matrix.sync.RoomEvent
import app.dapk.st.matrix.sync.RoomMembersService
import app.dapk.st.matrix.sync.find
import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
private val UNKNOWN_AUTHOR = RoomMember(id = UserId("unknown"), displayName = null, avatarUrl = null)

View File

@ -0,0 +1,118 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.UserId
private const val TAG_OPEN = '<'
private const val TAG_CLOSE = '>'
private const val NO_RESULT_FOUND = -1
internal class HtmlParser {
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder) = input.findTag(
fromIndex = searchIndex,
onInvalidTag = { builder.appendText(input[it].toString()) },
onTag = { tagOpen, tagClose ->
val wholeTag = input.substring(tagOpen, tagClose + 1)
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
when {
tagName.startsWith('@') -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
builder.appendPerson(UserId(tagName), tagName)
tagClose.next()
}
tagName == "br" -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
builder.appendText("\n")
tagClose.next()
}
else -> {
val exitTag = "</$tagName>"
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
val exitTagClose = exitIndex + exitTag.length
if (exitIndex == END_SEARCH) {
builder.appendText(input[searchIndex].toString())
searchIndex.next()
} else {
when (tagName) {
"mx-reply" -> {
exitTagClose
}
else -> {
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
val tagContent = input.substring(tagClose + 1, exitIndex)
when (tagName) {
"a" -> {
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
if (input.getOrNull(exitTagClose) == ':') {
exitTagClose.next()
} else {
exitTagClose
}
} else {
builder.appendLink(findHrefUrl, label = tagContent)
exitTagClose
}
}
"b" -> {
builder.appendBold(tagContent)
exitTagClose
}
"strong" -> {
builder.appendBold(tagContent)
exitTagClose
}
"i" -> {
builder.appendItalic(tagContent)
exitTagClose
}
"em" -> {
builder.appendItalic(tagContent)
exitTagClose
}
else -> {
builder.appendText(tagContent)
exitTagClose
}
}
}
}
}
}
}
}
)
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
if (searchIndex != tagOpen) {
builder.appendText(input.substring(searchIndex, tagOpen))
}
}
private fun String.findTag(fromIndex: Int, onInvalidTag: (Int) -> Unit, onTag: (Int, Int) -> Int): Int {
return when (val foundIndex = this.indexOf(TAG_OPEN, startIndex = fromIndex)) {
NO_RESULT_FOUND -> END_SEARCH
else -> when (val closeIndex = indexOf(TAG_CLOSE, startIndex = foundIndex)) {
NO_RESULT_FOUND -> {
onInvalidTag(fromIndex)
fromIndex + 1
}
else -> onTag(foundIndex, closeIndex)
}
}
}
}

View File

@ -0,0 +1,13 @@
package app.dapk.st.matrix.sync.internal.sync.message
internal typealias SearchIndex = Int
internal fun Int.next() = this + 1
internal interface ParserScope {
fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String)
fun SearchIndex.next(): SearchIndex
}

View File

@ -0,0 +1,56 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.common.UserId
internal class PartBuilder {
private var normalBuffer = StringBuilder()
private val parts = mutableSetOf<RichText.Part>()
fun appendText(value: String) {
normalBuffer.append(value.cleanFirstTextLine())
}
fun appendItalic(value: String) {
flushNormalBuffer()
parts.add(RichText.Part.Italic(value.cleanFirstTextLine()))
}
fun appendBold(value: String) {
flushNormalBuffer()
parts.add(RichText.Part.Bold(value.cleanFirstTextLine()))
}
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
fun appendPerson(userId: UserId, displayName: String) {
flushNormalBuffer()
parts.add(RichText.Part.Person(userId, displayName))
}
fun appendLink(url: String, label: String?) {
flushNormalBuffer()
parts.add(RichText.Part.Link(url, label ?: url))
}
fun build(): Set<RichText.Part> {
flushNormalBuffer()
return parts
}
private fun flushNormalBuffer() {
if (normalBuffer.isNotEmpty()) {
parts.add(RichText.Part.Normal(normalBuffer.toString()))
normalBuffer.clear()
}
}
}
internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: Int, input: String) {
if (previousIndex != tagOpenIndex) {
this.appendText(input.substring(previousIndex, tagOpenIndex))
}
}

View File

@ -0,0 +1,50 @@
package app.dapk.st.matrix.sync.internal.sync.message
import app.dapk.st.matrix.common.RichText
import kotlin.math.max
internal const val END_SEARCH = -1
class RichMessageParser {
private val htmlParser = HtmlParser()
private val urlParser = UrlParser()
fun parse(source: String): RichText {
val input = source
.removeHtmlEntities()
.dropTextFallback()
val builder = PartBuilder()
var nextIndex = 0
while (nextIndex != END_SEARCH) {
val htmlResult = htmlParser.parseHtmlTags(input, nextIndex, builder)
val linkStartIndex = findUrlStartIndex(htmlResult, nextIndex)
val urlResult = urlParser.parseUrl(input, linkStartIndex, builder)
val hasReachedEnd = hasReachedEnd(htmlResult, urlResult, input)
if (hasReachedEnd && hasUnprocessedText(htmlResult, urlResult, input)) {
builder.appendText(input.substring(nextIndex))
}
nextIndex = if (hasReachedEnd) END_SEARCH else max(htmlResult, urlResult)
}
return RichText(builder.build())
}
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
private fun findUrlStartIndex(htmlResult: Int, searchIndex: Int) = when {
htmlResult == END_SEARCH && searchIndex == 0 -> 0
htmlResult == END_SEARCH -> searchIndex
else -> htmlResult
}
private fun hasReachedEnd(htmlResult: SearchIndex, urlResult: Int, input: String) =
(htmlResult == END_SEARCH && urlResult == END_SEARCH) || (htmlResult >= input.length || urlResult >= input.length)
}
private fun String.removeHtmlEntities() = this.replace("&quot;", "\"").replace("&#39;", "'")
private fun String.dropTextFallback() = this.lines()
.dropWhile { it.startsWith("> ") || it.isEmpty() }
.joinToString(separator = "\n")

View File

@ -0,0 +1,47 @@
package app.dapk.st.matrix.sync.internal.sync.message
private const val INVALID_TRAILING_CHARS = ",.:;?"
internal class UrlParser {
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
val urlResult = if (urlIndex == END_SEARCH) END_SEARCH else {
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
val originalUrl = input.substring(urlIndex)
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
val urlContinuesUntilEnd = urlEndIndex == -1
when {
urlContinuesUntilEnd -> {
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
if (cleanedUrl != originalUrl) {
builder.appendText(originalUrl.last().toString())
}
input.length.next()
}
else -> {
val originalUrl = input.substring(urlIndex, urlEndIndex)
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
builder.appendLink(url = cleanedUrl, label = null)
if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
}
}
}
return urlResult
}
}
private fun String.bestGuessStripTrailingUrlChar(): String {
val last = this.last()
return if (INVALID_TRAILING_CHARS.contains(last)) {
this.dropLast(1)
} else {
this
}
}

View File

@ -5,7 +5,7 @@ import app.dapk.st.matrix.common.JsonString
import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.sync.RoomEvent
import app.dapk.st.matrix.sync.internal.request.DecryptedContent
import app.dapk.st.matrix.sync.internal.sync.RichMessageParser
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import fake.FakeMatrixLogger
import fake.FakeMessageDecrypter
import fixture.*

View File

@ -2,6 +2,7 @@ package app.dapk.st.matrix.sync.internal.sync
import app.dapk.st.matrix.common.RichText
import app.dapk.st.matrix.common.RichText.Part.*
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import fixture.aUserId
import org.amshove.kluent.shouldBeEqualTo
import org.junit.Ignore
@ -157,7 +158,7 @@ class RichMessageParserTest {
expected = RichText(
setOf(
Normal("hello "),
RichText.Part.BoldItalic("wor"),
BoldItalic("wor"),
Normal("ld"),
)
)

View File

@ -6,6 +6,7 @@ import app.dapk.st.matrix.common.asString
import app.dapk.st.matrix.sync.RoomEvent
import app.dapk.st.matrix.sync.internal.request.ApiEncryptedContent
import app.dapk.st.matrix.sync.internal.request.ApiTimelineEvent
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
import fake.FakeErrorTracker
import fake.FakeRoomMembersService
import fixture.*