add support for more types of html tags and nesting
This commit is contained in:
parent
c6b9a997f3
commit
c9a29b0b25
|
@ -0,0 +1,85 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.html.HtmlProcessor
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.url.UrlParser
|
||||||
|
|
||||||
|
private const val MAX_NESTING_LIMIT = 20
|
||||||
|
|
||||||
|
class AccumulatingRichTextContentParser : AccumulatingContentParser {
|
||||||
|
|
||||||
|
private val urlParser = UrlParser()
|
||||||
|
private val tagProcessor = HtmlProcessor()
|
||||||
|
|
||||||
|
override fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator {
|
||||||
|
if (nestingLevel >= MAX_NESTING_LIMIT) {
|
||||||
|
accumulator.appendText(input)
|
||||||
|
} else {
|
||||||
|
iterate { index ->
|
||||||
|
process(
|
||||||
|
input,
|
||||||
|
index,
|
||||||
|
processTag = {
|
||||||
|
prependTextBeforeCapture(input, index, it, accumulator)
|
||||||
|
tagProcessor.process(input, it, accumulator, nestingLevel, nestedParser = this)
|
||||||
|
},
|
||||||
|
processUrl = {
|
||||||
|
prependTextBeforeCapture(input, index, it, accumulator)
|
||||||
|
urlParser.parseUrl(input, it, accumulator)
|
||||||
|
}
|
||||||
|
).also {
|
||||||
|
if (it == -1) {
|
||||||
|
appendRemainingText(index, input, accumulator)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return accumulator
|
||||||
|
}
|
||||||
|
|
||||||
|
private inline fun iterate(action: (Int) -> Int) {
|
||||||
|
var result = 0
|
||||||
|
while (result != -1) {
|
||||||
|
result = action(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun process(input: String, searchIndex: Int, processTag: (Int) -> Int, processUrl: (Int) -> Int): Int {
|
||||||
|
val tagOpen = input.indexOf('<', startIndex = searchIndex)
|
||||||
|
val httpOpen = input.indexOf("http", startIndex = searchIndex)
|
||||||
|
return selectProcessor(
|
||||||
|
tagOpen,
|
||||||
|
httpOpen,
|
||||||
|
processTag = { processTag(tagOpen) },
|
||||||
|
processUrl = { processUrl(httpOpen) }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private inline fun selectProcessor(tagOpen: Int, httpOpen: Int, processTag: () -> Int, processUrl: () -> Int) = when {
|
||||||
|
tagOpen == -1 && httpOpen == -1 -> -1
|
||||||
|
tagOpen != -1 && httpOpen == -1 -> processTag()
|
||||||
|
tagOpen == -1 && httpOpen != -1 -> processUrl()
|
||||||
|
tagOpen == httpOpen -> {
|
||||||
|
// favour tags as urls can existing within tags
|
||||||
|
processTag()
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {
|
||||||
|
when (tagOpen < httpOpen) {
|
||||||
|
true -> processTag()
|
||||||
|
false -> processUrl()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun prependTextBeforeCapture(input: String, index: Int, captureIndex: Int, accumulator: ContentAccumulator) {
|
||||||
|
if (index < captureIndex) {
|
||||||
|
accumulator.appendText(input.substring(index, captureIndex))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun appendRemainingText(index: Int, input: String, accumulator: ContentAccumulator) {
|
||||||
|
if (index < input.length) {
|
||||||
|
accumulator.appendText(input.substring(index, input.length))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,248 +0,0 @@
|
||||||
package app.dapk.st.matrix.sync.internal.sync.message
|
|
||||||
|
|
||||||
import app.dapk.st.matrix.common.UserId
|
|
||||||
|
|
||||||
private const val TAG_OPEN = '<'
|
|
||||||
private const val TAG_CLOSE = '>'
|
|
||||||
private const val NO_RESULT_FOUND = -1
|
|
||||||
private val SKIPPED_TAGS = setOf("mx-reply")
|
|
||||||
|
|
||||||
internal class HtmlParser {
|
|
||||||
|
|
||||||
fun test(startingFrom: Int, input: String) = input.indexOf(TAG_OPEN, startingFrom)
|
|
||||||
|
|
||||||
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag(
|
|
||||||
fromIndex = searchIndex,
|
|
||||||
onInvalidTag = { builder.appendText(input[it].toString()) },
|
|
||||||
onTag = { tagOpen, tagClose ->
|
|
||||||
val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose)
|
|
||||||
|
|
||||||
when {
|
|
||||||
tagName.startsWith('@') -> {
|
|
||||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
|
||||||
builder.appendPerson(UserId(tagName), tagName)
|
|
||||||
tagClose.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
tagName == "br" -> {
|
|
||||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
|
||||||
builder.appendNewline()
|
|
||||||
tagClose.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> parseTagWithContent(input, tagName, tagClose, searchIndex, tagOpen, wholeTag, builder, nestingLevel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
private fun parseTagWithContent(
|
|
||||||
input: String,
|
|
||||||
tagName: String,
|
|
||||||
tagClose: Int,
|
|
||||||
searchIndex: Int,
|
|
||||||
tagOpen: Int,
|
|
||||||
wholeTag: String,
|
|
||||||
builder: PartBuilder,
|
|
||||||
nestingLevel: Int
|
|
||||||
): Int {
|
|
||||||
val exitTag = "</$tagName>"
|
|
||||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
|
||||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
|
||||||
return when {
|
|
||||||
exitIndex == NO_RESULT_FOUND -> {
|
|
||||||
builder.appendText(input[searchIndex].toString())
|
|
||||||
searchIndex.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex
|
|
||||||
|
|
||||||
else -> {
|
|
||||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
|
||||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
|
||||||
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun handleTagWithContent(
|
|
||||||
input: String,
|
|
||||||
tagName: String,
|
|
||||||
wholeTag: String,
|
|
||||||
builder: PartBuilder,
|
|
||||||
tagContent: String,
|
|
||||||
exitTagCloseIndex: Int,
|
|
||||||
nestingLevel: Int,
|
|
||||||
) = when (tagName) {
|
|
||||||
"a" -> {
|
|
||||||
val findHrefUrl = wholeTag.findTagAttribute("href")
|
|
||||||
when {
|
|
||||||
findHrefUrl == null -> {
|
|
||||||
builder.appendText(tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
findHrefUrl.startsWith("https://matrix.to/#/@") -> {
|
|
||||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
|
||||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
|
||||||
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> {
|
|
||||||
builder.appendLink(findHrefUrl, label = tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"b", "strong" -> {
|
|
||||||
builder.appendBold(tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
"blockquote" -> {
|
|
||||||
if (tagContent.isNotEmpty() && nestingLevel < 3) {
|
|
||||||
var lastIndex = 0
|
|
||||||
val trimmedTagContent = tagContent.trim()
|
|
||||||
builder.appendText("> ")
|
|
||||||
iterateSearchIndex { searchIndex ->
|
|
||||||
lastIndex = searchIndex
|
|
||||||
parseHtmlTags(trimmedTagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lastIndex < trimmedTagContent.length) {
|
|
||||||
builder.appendText(trimmedTagContent.substring(lastIndex))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
builder.appendNewline()
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
"p" -> {
|
|
||||||
if (tagContent.isNotEmpty() && nestingLevel < 2) {
|
|
||||||
var lastIndex = 0
|
|
||||||
iterateSearchIndex { searchIndex ->
|
|
||||||
lastIndex = searchIndex
|
|
||||||
parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lastIndex < tagContent.length) {
|
|
||||||
builder.appendText(tagContent.substring(lastIndex))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
builder.appendNewline()
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
"ul", "ol" -> {
|
|
||||||
parseList(tagName, tagContent, builder)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
"h1", "h2", "h3", "h4", "h5" -> {
|
|
||||||
builder.appendBold(tagContent.trim())
|
|
||||||
builder.appendNewline()
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
"i", "em" -> {
|
|
||||||
builder.appendItalic(tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> {
|
|
||||||
builder.appendText(tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') {
|
|
||||||
exitTagCloseIndex.next()
|
|
||||||
} else {
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
|
|
||||||
if (searchIndex != tagOpen) {
|
|
||||||
builder.appendText(input.substring(searchIndex, tagOpen))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun String.findTag(fromIndex: Int, onInvalidTag: (Int) -> Unit, onTag: (Int, Int) -> Int): Int {
|
|
||||||
return when (val foundIndex = this.indexOf(TAG_OPEN, startIndex = fromIndex)) {
|
|
||||||
NO_RESULT_FOUND -> END_SEARCH
|
|
||||||
|
|
||||||
else -> when (val closeIndex = indexOf(TAG_CLOSE, startIndex = foundIndex)) {
|
|
||||||
NO_RESULT_FOUND -> {
|
|
||||||
onInvalidTag(fromIndex)
|
|
||||||
fromIndex + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> onTag(foundIndex, closeIndex)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) {
|
|
||||||
var listIndex = 1
|
|
||||||
iterateSearchIndex { nextIndex ->
|
|
||||||
singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent ->
|
|
||||||
val content = when (parentTag) {
|
|
||||||
"ol" -> {
|
|
||||||
listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex
|
|
||||||
"$listIndex. $tagContent".also { listIndex++ }
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> "- $tagContent"
|
|
||||||
}
|
|
||||||
builder.appendText(content)
|
|
||||||
builder.appendNewline()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun singleTagParser(content: String, wantedTagName: String, searchIndex: Int, builder: PartBuilder, onTag: (String, String) -> Unit): SearchIndex {
|
|
||||||
return content.findTag(
|
|
||||||
fromIndex = searchIndex,
|
|
||||||
onInvalidTag = { builder.appendText(content[it].toString()) },
|
|
||||||
onTag = { tagOpen, tagClose ->
|
|
||||||
val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose)
|
|
||||||
|
|
||||||
if (tagName == wantedTagName) {
|
|
||||||
val exitTag = "</$tagName>"
|
|
||||||
val exitIndex = content.indexOf(exitTag, startIndex = tagClose)
|
|
||||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
|
||||||
if (exitIndex == END_SEARCH) {
|
|
||||||
builder.appendText(content[searchIndex].toString())
|
|
||||||
searchIndex.next()
|
|
||||||
} else {
|
|
||||||
val tagContent = content.substring(tagClose + 1, exitIndex)
|
|
||||||
onTag(wholeTag, tagContent)
|
|
||||||
exitTagCloseIndex
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
END_SEARCH
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair<String, String> {
|
|
||||||
val wholeTag = input.substring(tagOpen, tagClose + 1)
|
|
||||||
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
|
|
||||||
return wholeTag to tagName
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun String.findTagAttribute(name: String): String? {
|
|
||||||
val attribute = "$name="
|
|
||||||
return this.indexOf(attribute).let {
|
|
||||||
if (it == NO_RESULT_FOUND) {
|
|
||||||
null
|
|
||||||
} else {
|
|
||||||
val start = it + attribute.length
|
|
||||||
this.substring(start).substringAfter('\"').substringBefore('\"')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,13 +0,0 @@
|
||||||
package app.dapk.st.matrix.sync.internal.sync.message
|
|
||||||
|
|
||||||
internal typealias SearchIndex = Int
|
|
||||||
|
|
||||||
internal fun Int.next() = this + 1
|
|
||||||
|
|
||||||
|
|
||||||
internal interface ParserScope {
|
|
||||||
fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String)
|
|
||||||
|
|
||||||
fun SearchIndex.next(): SearchIndex
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,71 +0,0 @@
|
||||||
package app.dapk.st.matrix.sync.internal.sync.message
|
|
||||||
|
|
||||||
import app.dapk.st.matrix.common.RichText
|
|
||||||
import kotlin.math.max
|
|
||||||
|
|
||||||
internal const val END_SEARCH = -1
|
|
||||||
|
|
||||||
class RichMessageParser {
|
|
||||||
|
|
||||||
private val htmlParser = HtmlParser()
|
|
||||||
private val urlParser = UrlParser()
|
|
||||||
|
|
||||||
fun parse(source: String): RichText {
|
|
||||||
val input = source
|
|
||||||
.removeHtmlEntities()
|
|
||||||
.dropTextFallback()
|
|
||||||
return RichText(collectRichText(input).build())
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun collectRichText(input: String) = PartBuilder().also { builder ->
|
|
||||||
iterateSearchIndex { nextIndex ->
|
|
||||||
val htmlStart = htmlParser.test(nextIndex, input)
|
|
||||||
val urlStart = urlParser.test(nextIndex, input)
|
|
||||||
|
|
||||||
val firstResult = if (htmlStart < urlStart) {
|
|
||||||
htmlParser.parseHtmlTags(input, nextIndex, builder)
|
|
||||||
} else {
|
|
||||||
urlParser.parseUrl(input, nextIndex, builder)
|
|
||||||
}
|
|
||||||
|
|
||||||
val secondStartIndex = findUrlStartIndex(firstResult, nextIndex)
|
|
||||||
val secondResult = if (htmlStart < urlStart) {
|
|
||||||
urlParser.parseUrl(input, secondStartIndex, builder)
|
|
||||||
} else {
|
|
||||||
htmlParser.parseHtmlTags(input, secondStartIndex, builder)
|
|
||||||
}
|
|
||||||
|
|
||||||
val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input)
|
|
||||||
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
|
|
||||||
builder.appendText(input.substring(nextIndex))
|
|
||||||
}
|
|
||||||
if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
|
|
||||||
|
|
||||||
private fun findUrlStartIndex(htmlResult: Int, searchIndex: Int) = when {
|
|
||||||
htmlResult == END_SEARCH && searchIndex == 0 -> 0
|
|
||||||
htmlResult == END_SEARCH -> searchIndex
|
|
||||||
else -> htmlResult
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun hasReachedEnd(htmlResult: SearchIndex, urlResult: Int, input: String) =
|
|
||||||
(htmlResult == END_SEARCH && urlResult == END_SEARCH) || (htmlResult >= input.length || urlResult >= input.length)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&")
|
|
||||||
|
|
||||||
private fun String.dropTextFallback() = this.lines()
|
|
||||||
.dropWhile { it.startsWith("> ") || it.isEmpty() }
|
|
||||||
.joinToString(separator = "\n")
|
|
||||||
|
|
||||||
internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex {
|
|
||||||
var nextIndex = 0
|
|
||||||
while (nextIndex != END_SEARCH) {
|
|
||||||
nextIndex = action(nextIndex)
|
|
||||||
}
|
|
||||||
return nextIndex
|
|
||||||
}
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.common.RichText
|
||||||
|
|
||||||
|
fun interface NestedParser {
|
||||||
|
fun parse(content: String, accumulator: ContentAccumulator)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun interface TagParser {
|
||||||
|
fun parse(tagName: String, attributes: Map<String, String>, content: String, accumulator: ContentAccumulator, parser: NestedParser)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun interface AccumulatingContentParser {
|
||||||
|
fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator
|
||||||
|
}
|
||||||
|
|
||||||
|
class RichMessageParser(
|
||||||
|
private val accumulatingParser: AccumulatingContentParser = AccumulatingRichTextContentParser()
|
||||||
|
) {
|
||||||
|
|
||||||
|
fun parse(source: String): RichText {
|
||||||
|
val input = source
|
||||||
|
.removeHtmlEntities()
|
||||||
|
.dropTextFallback()
|
||||||
|
return RichText(accumulatingParser.parse(input, RichTextPartBuilder(), nestingLevel = 0).build())
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&")
|
||||||
|
|
||||||
|
private fun String.dropTextFallback() = this.lines()
|
||||||
|
.dropWhile { it.startsWith("> ") || it.isEmpty() }
|
||||||
|
.joinToString(separator = "\n")
|
|
@ -3,39 +3,48 @@ package app.dapk.st.matrix.sync.internal.sync.message
|
||||||
import app.dapk.st.matrix.common.RichText
|
import app.dapk.st.matrix.common.RichText
|
||||||
import app.dapk.st.matrix.common.UserId
|
import app.dapk.st.matrix.common.UserId
|
||||||
|
|
||||||
internal class PartBuilder {
|
interface ContentAccumulator {
|
||||||
|
fun appendText(value: String)
|
||||||
|
fun appendItalic(value: String)
|
||||||
|
fun appendBold(value: String)
|
||||||
|
fun appendPerson(userId: UserId, displayName: String)
|
||||||
|
fun appendLink(url: String, label: String?)
|
||||||
|
fun build(): List<RichText.Part>
|
||||||
|
}
|
||||||
|
|
||||||
|
class RichTextPartBuilder : ContentAccumulator {
|
||||||
|
|
||||||
private var normalBuffer = StringBuilder()
|
private var normalBuffer = StringBuilder()
|
||||||
|
|
||||||
private val parts = mutableListOf<RichText.Part>()
|
private val parts = mutableListOf<RichText.Part>()
|
||||||
|
|
||||||
fun appendText(value: String) {
|
override fun appendText(value: String) {
|
||||||
normalBuffer.append(value.cleanFirstTextLine())
|
normalBuffer.append(value.cleanFirstTextLine())
|
||||||
}
|
}
|
||||||
|
|
||||||
fun appendItalic(value: String) {
|
override fun appendItalic(value: String) {
|
||||||
flushNormalBuffer()
|
flushNormalBuffer()
|
||||||
parts.add(RichText.Part.Italic(value.cleanFirstTextLine()))
|
parts.add(RichText.Part.Italic(value.cleanFirstTextLine()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fun appendBold(value: String) {
|
override fun appendBold(value: String) {
|
||||||
flushNormalBuffer()
|
flushNormalBuffer()
|
||||||
parts.add(RichText.Part.Bold(value.cleanFirstTextLine()))
|
parts.add(RichText.Part.Bold(value.cleanFirstTextLine()))
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
|
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
|
||||||
|
|
||||||
fun appendPerson(userId: UserId, displayName: String) {
|
override fun appendPerson(userId: UserId, displayName: String) {
|
||||||
flushNormalBuffer()
|
flushNormalBuffer()
|
||||||
parts.add(RichText.Part.Person(userId, displayName))
|
parts.add(RichText.Part.Person(userId, displayName))
|
||||||
}
|
}
|
||||||
|
|
||||||
fun appendLink(url: String, label: String?) {
|
override fun appendLink(url: String, label: String?) {
|
||||||
flushNormalBuffer()
|
flushNormalBuffer()
|
||||||
parts.add(RichText.Part.Link(url, label ?: url))
|
parts.add(RichText.Part.Link(url, label ?: url))
|
||||||
}
|
}
|
||||||
|
|
||||||
fun build(): List<RichText.Part> {
|
override fun build(): List<RichText.Part> {
|
||||||
flushNormalBuffer()
|
flushNormalBuffer()
|
||||||
return when (parts.isEmpty()) {
|
return when (parts.isEmpty()) {
|
||||||
true -> parts
|
true -> parts
|
||||||
|
@ -59,16 +68,9 @@ internal class PartBuilder {
|
||||||
normalBuffer.clear()
|
normalBuffer.clear()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: Int, input: String) {
|
internal fun ContentAccumulator.appendNewline() {
|
||||||
if (previousIndex != tagOpenIndex) {
|
|
||||||
this.appendText(input.substring(previousIndex, tagOpenIndex))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal fun PartBuilder.appendNewline() {
|
|
||||||
this.appendText("\n")
|
this.appendText("\n")
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.AccumulatingContentParser
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||||
|
|
||||||
|
class HtmlProcessor {
|
||||||
|
|
||||||
|
private val tagCaptor = TagCaptor()
|
||||||
|
private val htmlTagParser = RichTextHtmlTagParser()
|
||||||
|
|
||||||
|
fun process(input: String, tagOpen: Int, partBuilder: ContentAccumulator, nestingLevel: Int, nestedParser: AccumulatingContentParser): Int {
|
||||||
|
val afterTagCaptureIndex = tagCaptor.tagCapture(input, tagOpen) { tagName, attributes, tagContent ->
|
||||||
|
htmlTagParser.parse(tagName, attributes, tagContent, partBuilder) { nestedContent, accumulator ->
|
||||||
|
nestedParser.parse(nestedContent, accumulator, nestingLevel + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return when (afterTagCaptureIndex) {
|
||||||
|
-1 -> {
|
||||||
|
partBuilder.appendText(input[tagOpen].toString())
|
||||||
|
tagOpen + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> afterTagCaptureIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||||
|
|
||||||
|
internal interface ListAccumulator {
|
||||||
|
fun appendLinePrefix(index: Int?)
|
||||||
|
}
|
||||||
|
|
||||||
|
internal class OrderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
|
||||||
|
|
||||||
|
private var currentIndex = 1
|
||||||
|
|
||||||
|
override fun appendLinePrefix(index: Int?) {
|
||||||
|
currentIndex = index ?: currentIndex
|
||||||
|
appendText("$currentIndex. ")
|
||||||
|
currentIndex++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal class UnorderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
|
||||||
|
override fun appendLinePrefix(index: Int?) = appendText("- ")
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.common.UserId
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.*
|
||||||
|
|
||||||
|
class RichTextHtmlTagParser : TagParser {
|
||||||
|
|
||||||
|
override fun parse(
|
||||||
|
tagName: String,
|
||||||
|
attributes: Map<String, String>,
|
||||||
|
content: String,
|
||||||
|
accumulator: ContentAccumulator,
|
||||||
|
parser: NestedParser
|
||||||
|
) {
|
||||||
|
when {
|
||||||
|
tagName.startsWith('@') -> {
|
||||||
|
accumulator.appendPerson(UserId(tagName), tagName)
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> when (tagName) {
|
||||||
|
"br" -> {
|
||||||
|
accumulator.appendNewline()
|
||||||
|
}
|
||||||
|
|
||||||
|
"a" -> {
|
||||||
|
attributes["href"]?.let { url ->
|
||||||
|
when {
|
||||||
|
url.startsWith("https://matrix.to/#/@") -> {
|
||||||
|
val userId = UserId(url.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||||
|
accumulator.appendPerson(userId, "@${content.removePrefix("@")}")
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> accumulator.appendLink(url, content)
|
||||||
|
|
||||||
|
}
|
||||||
|
} ?: accumulator.appendText(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
"p" -> {
|
||||||
|
parser.parse(content.trim(), accumulator)
|
||||||
|
accumulator.appendNewline()
|
||||||
|
}
|
||||||
|
|
||||||
|
"blockquote" -> {
|
||||||
|
accumulator.appendText("> ")
|
||||||
|
parser.parse(content.trim(), accumulator)
|
||||||
|
}
|
||||||
|
|
||||||
|
"strong", "b" -> {
|
||||||
|
accumulator.appendBold(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
"em", "i" -> {
|
||||||
|
accumulator.appendItalic(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
"h1", "h2", "h3", "h4", "h5" -> {
|
||||||
|
accumulator.appendBold(content)
|
||||||
|
accumulator.appendNewline()
|
||||||
|
}
|
||||||
|
|
||||||
|
"ul", "ol" -> {
|
||||||
|
when (tagName) {
|
||||||
|
"ol" -> parser.parse(content, OrderedListAccumulator(accumulator))
|
||||||
|
"ul" -> parser.parse(content, UnorderedListAccumulator(accumulator))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
"li" -> {
|
||||||
|
(accumulator as ListAccumulator).appendLinePrefix(attributes["value"]?.toInt())
|
||||||
|
|
||||||
|
val nestedList = when {
|
||||||
|
content.contains("<ul>") -> "<ul>"
|
||||||
|
content.contains("<ol>") -> "<ol>"
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nestedList == null) {
|
||||||
|
parser.parse(content.trim(), accumulator)
|
||||||
|
accumulator.appendNewline()
|
||||||
|
} else {
|
||||||
|
val firstItemInNested = content.substringBefore(nestedList)
|
||||||
|
parser.parse(firstItemInNested.trim(), accumulator)
|
||||||
|
accumulator.appendNewline()
|
||||||
|
parser.parse(content.substring(content.indexOf(nestedList)).trim(), accumulator)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {
|
||||||
|
// skip tag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||||
|
|
||||||
|
class TagCaptor {
|
||||||
|
|
||||||
|
fun tagCapture(input: String, startIndex: Int, tagFactory: (String, Map<String, String>, String) -> Unit): Int {
|
||||||
|
return when (val closeIndex = input.indexOf('>', startIndex = startIndex)) {
|
||||||
|
-1 -> -1
|
||||||
|
else -> {
|
||||||
|
val fullTag = input.substring(startIndex, closeIndex + 1)
|
||||||
|
val tagName = input.substring(startIndex + 1, closeIndex)
|
||||||
|
when {
|
||||||
|
fullTag.isExitlessTag() -> {
|
||||||
|
val trim = fullTag.removeSurrounding("<", ">").trim()
|
||||||
|
tagFactory(trim, emptyMap(), "")
|
||||||
|
closeIndex + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
fullTag.isSelfClosing() -> {
|
||||||
|
val trim = fullTag.removeSuffix("/>").removePrefix("<").trim()
|
||||||
|
tagFactory(trim, emptyMap(), "")
|
||||||
|
closeIndex + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {
|
||||||
|
val exitTag = if (tagName.contains(' ')) {
|
||||||
|
"</${tagName.substringBefore(' ')}>"
|
||||||
|
} else {
|
||||||
|
"</$tagName>"
|
||||||
|
}
|
||||||
|
|
||||||
|
val exitIndex = input.findTagClose(tagName, exitTag, searchIndex = closeIndex + 1)
|
||||||
|
if (exitIndex == -1) {
|
||||||
|
-1
|
||||||
|
} else {
|
||||||
|
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||||
|
if (tagName.contains(' ')) {
|
||||||
|
val parts = tagName.split(' ')
|
||||||
|
val attributes = parts.drop(1).associate {
|
||||||
|
val (key, value) = it.split("=")
|
||||||
|
key to value.removeSurrounding("\"")
|
||||||
|
}
|
||||||
|
tagFactory(parts.first(), attributes, input.substring(closeIndex + 1, exitIndex))
|
||||||
|
} else {
|
||||||
|
tagFactory(tagName, emptyMap(), input.substring(closeIndex + 1, exitIndex))
|
||||||
|
}
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun String.findTagClose(tagName: String, exitTag: String, searchIndex: Int, open: Int = 1): Int {
|
||||||
|
val exitIndex = this.indexOf(exitTag, startIndex = searchIndex)
|
||||||
|
val nextOpen = this.indexOf("<$tagName", startIndex = searchIndex)
|
||||||
|
return when {
|
||||||
|
open == 1 && (nextOpen == -1 || exitIndex < nextOpen) -> exitIndex
|
||||||
|
open > 8 || open < 1 -> {
|
||||||
|
// something has gone wrong, lets exit
|
||||||
|
-1
|
||||||
|
}
|
||||||
|
|
||||||
|
exitIndex == -1 -> -1
|
||||||
|
nextOpen == -1 || nextOpen > exitIndex -> this.findTagClose(tagName, exitTag, exitIndex + 1, open - 1)
|
||||||
|
|
||||||
|
nextOpen < exitIndex -> {
|
||||||
|
this.findTagClose(tagName, exitTag, nextOpen + 1, open + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun String.isExitlessTag() = this == "<br>" || (this.startsWith("<@") && this.endsWith('>'))
|
||||||
|
|
||||||
|
private fun String.isSelfClosing() = this.endsWith("/>")
|
|
@ -1,18 +1,14 @@
|
||||||
package app.dapk.st.matrix.sync.internal.sync.message
|
package app.dapk.st.matrix.sync.internal.sync.message.url
|
||||||
|
|
||||||
|
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||||
|
|
||||||
|
private const val END_SEARCH = -1
|
||||||
private const val INVALID_TRAILING_CHARS = ",.:;?<>"
|
private const val INVALID_TRAILING_CHARS = ",.:;?<>"
|
||||||
|
|
||||||
internal class UrlParser {
|
internal class UrlParser {
|
||||||
|
|
||||||
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
fun parseUrl(input: String, urlIndex: Int, accumulator: ContentAccumulator): Int {
|
||||||
return length > current + value.length && this.substring(current, current + value.length) == value
|
|
||||||
}
|
|
||||||
|
|
||||||
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
|
|
||||||
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
|
|
||||||
return if (urlIndex == END_SEARCH) END_SEARCH else {
|
return if (urlIndex == END_SEARCH) END_SEARCH else {
|
||||||
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
|
|
||||||
|
|
||||||
val originalUrl = input.substring(urlIndex)
|
val originalUrl = input.substring(urlIndex)
|
||||||
var index = 0
|
var index = 0
|
||||||
val maybeUrl = originalUrl.takeWhile {
|
val maybeUrl = originalUrl.takeWhile {
|
||||||
|
@ -25,30 +21,28 @@ internal class UrlParser {
|
||||||
when {
|
when {
|
||||||
urlContinuesUntilEnd -> {
|
urlContinuesUntilEnd -> {
|
||||||
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
||||||
builder.appendLink(url = cleanedUrl, label = null)
|
accumulator.appendLink(url = cleanedUrl, label = null)
|
||||||
if (cleanedUrl != originalUrl) {
|
if (cleanedUrl != originalUrl) {
|
||||||
builder.appendText(originalUrl.last().toString())
|
accumulator.appendText(originalUrl.last().toString())
|
||||||
}
|
}
|
||||||
input.length.next()
|
input.length + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
else -> {
|
else -> {
|
||||||
val originalUrl = input.substring(urlIndex, urlEndIndex)
|
val originalUrl = input.substring(urlIndex, urlEndIndex)
|
||||||
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
||||||
builder.appendLink(url = cleanedUrl, label = null)
|
accumulator.appendLink(url = cleanedUrl, label = null)
|
||||||
if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
|
if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun test(startingFrom: Int, input: String): Int {
|
|
||||||
return input.indexOf("http", startingFrom)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
||||||
|
return length > current + value.length && this.substring(current, current + value.length) == value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private fun String.bestGuessStripTrailingUrlChar(): String {
|
private fun String.bestGuessStripTrailingUrlChar(): String {
|
||||||
val last = this.last()
|
val last = this.last()
|
||||||
return if (INVALID_TRAILING_CHARS.contains(last)) {
|
return if (INVALID_TRAILING_CHARS.contains(last)) {
|
|
@ -5,10 +5,9 @@ import app.dapk.st.matrix.common.RichText.Part.*
|
||||||
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
|
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
|
||||||
import fixture.aUserId
|
import fixture.aUserId
|
||||||
import org.amshove.kluent.shouldBeEqualTo
|
import org.amshove.kluent.shouldBeEqualTo
|
||||||
import org.junit.Ignore
|
|
||||||
import org.junit.Test
|
import org.junit.Test
|
||||||
|
|
||||||
class RichMessageParserTest {
|
class RichTextMessageParserTest {
|
||||||
|
|
||||||
private val parser = RichMessageParser()
|
private val parser = RichMessageParser()
|
||||||
|
|
||||||
|
@ -18,6 +17,34 @@ class RichMessageParserTest {
|
||||||
expected = RichText(listOf(Normal("Hello world!")))
|
expected = RichText(listOf(Normal("Hello world!")))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `parses strong tags`() = runParserTest(
|
||||||
|
Case(
|
||||||
|
input = """hello <strong>wor</strong>ld""",
|
||||||
|
expected = RichText(
|
||||||
|
listOf(
|
||||||
|
Normal("hello "),
|
||||||
|
Bold("wor"),
|
||||||
|
Normal("ld"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `parses em tags`() = runParserTest(
|
||||||
|
Case(
|
||||||
|
input = """hello <em>wor</em>ld""",
|
||||||
|
expected = RichText(
|
||||||
|
listOf(
|
||||||
|
Normal("hello "),
|
||||||
|
Italic("wor"),
|
||||||
|
Normal("ld"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `parses p tags`() = runParserTest(
|
fun `parses p tags`() = runParserTest(
|
||||||
input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
|
input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
|
||||||
|
@ -63,7 +90,7 @@ class RichMessageParserTest {
|
||||||
@Test
|
@Test
|
||||||
fun `replaces matrixdotto with person`() = runParserTest(
|
fun `replaces matrixdotto with person`() = runParserTest(
|
||||||
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
|
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
|
||||||
expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world")))
|
expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(": world")))
|
||||||
)
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -122,6 +149,21 @@ class RichMessageParserTest {
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `parses nested lists`() = runParserTest(
|
||||||
|
input = """
|
||||||
|
<ul>
|
||||||
|
<li>first item
|
||||||
|
<ul>
|
||||||
|
<li>nested item</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
""".trimIndent().lines().joinToString("") { it.trim() },
|
||||||
|
expected = RichText(listOf(Normal("- first item\n- nested item")))
|
||||||
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `parses urls`() = runParserTest(
|
fun `parses urls`() = runParserTest(
|
||||||
Case(
|
Case(
|
||||||
|
@ -178,58 +220,6 @@ class RichMessageParserTest {
|
||||||
expected = RichText(listOf(Normal(">><foo> ><>> << more content")))
|
expected = RichText(listOf(Normal(">><foo> ><>> << more content")))
|
||||||
)
|
)
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `parses strong tags`() = runParserTest(
|
|
||||||
Case(
|
|
||||||
input = """hello <strong>wor</strong>ld""",
|
|
||||||
expected = RichText(
|
|
||||||
listOf(
|
|
||||||
Normal("hello "),
|
|
||||||
Bold("wor"),
|
|
||||||
Normal("ld"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `parses em tags`() = runParserTest(
|
|
||||||
Case(
|
|
||||||
input = """hello <em>wor</em>ld""",
|
|
||||||
expected = RichText(
|
|
||||||
listOf(
|
|
||||||
Normal("hello "),
|
|
||||||
Italic("wor"),
|
|
||||||
Normal("ld"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@Ignore // TODO
|
|
||||||
@Test
|
|
||||||
fun `parses nested tags`() = runParserTest(
|
|
||||||
Case(
|
|
||||||
input = """hello <b><i>wor<i/><b/>ld""",
|
|
||||||
expected = RichText(
|
|
||||||
listOf(
|
|
||||||
Normal("hello "),
|
|
||||||
BoldItalic("wor"),
|
|
||||||
Normal("ld"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
),
|
|
||||||
Case(
|
|
||||||
input = """<a href="www.google.com"><a href="www.google.com">www.google.com<a/><a/>""",
|
|
||||||
expected = RichText(
|
|
||||||
listOf(
|
|
||||||
Link(url = "www.google.com", label = "www.google.com"),
|
|
||||||
Link(url = "www.bing.com", label = "www.bing.com"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `parses 'a' tags`() = runParserTest(
|
fun `parses 'a' tags`() = runParserTest(
|
||||||
Case(
|
Case(
|
Loading…
Reference in New Issue