add support for more types of html tags and nesting
This commit is contained in:
parent
c6b9a997f3
commit
c9a29b0b25
|
@ -0,0 +1,85 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.html.HtmlProcessor
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.url.UrlParser
|
||||
|
||||
private const val MAX_NESTING_LIMIT = 20
|
||||
|
||||
class AccumulatingRichTextContentParser : AccumulatingContentParser {
|
||||
|
||||
private val urlParser = UrlParser()
|
||||
private val tagProcessor = HtmlProcessor()
|
||||
|
||||
override fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator {
|
||||
if (nestingLevel >= MAX_NESTING_LIMIT) {
|
||||
accumulator.appendText(input)
|
||||
} else {
|
||||
iterate { index ->
|
||||
process(
|
||||
input,
|
||||
index,
|
||||
processTag = {
|
||||
prependTextBeforeCapture(input, index, it, accumulator)
|
||||
tagProcessor.process(input, it, accumulator, nestingLevel, nestedParser = this)
|
||||
},
|
||||
processUrl = {
|
||||
prependTextBeforeCapture(input, index, it, accumulator)
|
||||
urlParser.parseUrl(input, it, accumulator)
|
||||
}
|
||||
).also {
|
||||
if (it == -1) {
|
||||
appendRemainingText(index, input, accumulator)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return accumulator
|
||||
}
|
||||
|
||||
private inline fun iterate(action: (Int) -> Int) {
|
||||
var result = 0
|
||||
while (result != -1) {
|
||||
result = action(result)
|
||||
}
|
||||
}
|
||||
|
||||
private fun process(input: String, searchIndex: Int, processTag: (Int) -> Int, processUrl: (Int) -> Int): Int {
|
||||
val tagOpen = input.indexOf('<', startIndex = searchIndex)
|
||||
val httpOpen = input.indexOf("http", startIndex = searchIndex)
|
||||
return selectProcessor(
|
||||
tagOpen,
|
||||
httpOpen,
|
||||
processTag = { processTag(tagOpen) },
|
||||
processUrl = { processUrl(httpOpen) }
|
||||
)
|
||||
}
|
||||
|
||||
private inline fun selectProcessor(tagOpen: Int, httpOpen: Int, processTag: () -> Int, processUrl: () -> Int) = when {
|
||||
tagOpen == -1 && httpOpen == -1 -> -1
|
||||
tagOpen != -1 && httpOpen == -1 -> processTag()
|
||||
tagOpen == -1 && httpOpen != -1 -> processUrl()
|
||||
tagOpen == httpOpen -> {
|
||||
// favour tags as urls can existing within tags
|
||||
processTag()
|
||||
}
|
||||
|
||||
else -> {
|
||||
when (tagOpen < httpOpen) {
|
||||
true -> processTag()
|
||||
false -> processUrl()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun prependTextBeforeCapture(input: String, index: Int, captureIndex: Int, accumulator: ContentAccumulator) {
|
||||
if (index < captureIndex) {
|
||||
accumulator.appendText(input.substring(index, captureIndex))
|
||||
}
|
||||
}
|
||||
|
||||
private fun appendRemainingText(index: Int, input: String, accumulator: ContentAccumulator) {
|
||||
if (index < input.length) {
|
||||
accumulator.appendText(input.substring(index, input.length))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,248 +0,0 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
import app.dapk.st.matrix.common.UserId
|
||||
|
||||
private const val TAG_OPEN = '<'
|
||||
private const val TAG_CLOSE = '>'
|
||||
private const val NO_RESULT_FOUND = -1
|
||||
private val SKIPPED_TAGS = setOf("mx-reply")
|
||||
|
||||
internal class HtmlParser {
|
||||
|
||||
fun test(startingFrom: Int, input: String) = input.indexOf(TAG_OPEN, startingFrom)
|
||||
|
||||
fun parseHtmlTags(input: String, searchIndex: Int, builder: PartBuilder, nestingLevel: Int = 0): SearchIndex = input.findTag(
|
||||
fromIndex = searchIndex,
|
||||
onInvalidTag = { builder.appendText(input[it].toString()) },
|
||||
onTag = { tagOpen, tagClose ->
|
||||
val (wholeTag, tagName) = parseTag(input, tagOpen, tagClose)
|
||||
|
||||
when {
|
||||
tagName.startsWith('@') -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
builder.appendPerson(UserId(tagName), tagName)
|
||||
tagClose.next()
|
||||
}
|
||||
|
||||
tagName == "br" -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
builder.appendNewline()
|
||||
tagClose.next()
|
||||
}
|
||||
|
||||
else -> parseTagWithContent(input, tagName, tagClose, searchIndex, tagOpen, wholeTag, builder, nestingLevel)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
private fun parseTagWithContent(
|
||||
input: String,
|
||||
tagName: String,
|
||||
tagClose: Int,
|
||||
searchIndex: Int,
|
||||
tagOpen: Int,
|
||||
wholeTag: String,
|
||||
builder: PartBuilder,
|
||||
nestingLevel: Int
|
||||
): Int {
|
||||
val exitTag = "</$tagName>"
|
||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
return when {
|
||||
exitIndex == NO_RESULT_FOUND -> {
|
||||
builder.appendText(input[searchIndex].toString())
|
||||
searchIndex.next()
|
||||
}
|
||||
|
||||
SKIPPED_TAGS.contains(tagName) -> exitTagCloseIndex
|
||||
|
||||
else -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
||||
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex, nestingLevel)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleTagWithContent(
|
||||
input: String,
|
||||
tagName: String,
|
||||
wholeTag: String,
|
||||
builder: PartBuilder,
|
||||
tagContent: String,
|
||||
exitTagCloseIndex: Int,
|
||||
nestingLevel: Int,
|
||||
) = when (tagName) {
|
||||
"a" -> {
|
||||
val findHrefUrl = wholeTag.findTagAttribute("href")
|
||||
when {
|
||||
findHrefUrl == null -> {
|
||||
builder.appendText(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
findHrefUrl.startsWith("https://matrix.to/#/@") -> {
|
||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
||||
}
|
||||
|
||||
else -> {
|
||||
builder.appendLink(findHrefUrl, label = tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"b", "strong" -> {
|
||||
builder.appendBold(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"blockquote" -> {
|
||||
if (tagContent.isNotEmpty() && nestingLevel < 3) {
|
||||
var lastIndex = 0
|
||||
val trimmedTagContent = tagContent.trim()
|
||||
builder.appendText("> ")
|
||||
iterateSearchIndex { searchIndex ->
|
||||
lastIndex = searchIndex
|
||||
parseHtmlTags(trimmedTagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
|
||||
}
|
||||
|
||||
if (lastIndex < trimmedTagContent.length) {
|
||||
builder.appendText(trimmedTagContent.substring(lastIndex))
|
||||
}
|
||||
}
|
||||
|
||||
builder.appendNewline()
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"p" -> {
|
||||
if (tagContent.isNotEmpty() && nestingLevel < 2) {
|
||||
var lastIndex = 0
|
||||
iterateSearchIndex { searchIndex ->
|
||||
lastIndex = searchIndex
|
||||
parseHtmlTags(tagContent, searchIndex, builder, nestingLevel = nestingLevel + 1)
|
||||
}
|
||||
|
||||
if (lastIndex < tagContent.length) {
|
||||
builder.appendText(tagContent.substring(lastIndex))
|
||||
}
|
||||
}
|
||||
|
||||
builder.appendNewline()
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"ul", "ol" -> {
|
||||
parseList(tagName, tagContent, builder)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"h1", "h2", "h3", "h4", "h5" -> {
|
||||
builder.appendBold(tagContent.trim())
|
||||
builder.appendNewline()
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"i", "em" -> {
|
||||
builder.appendItalic(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
else -> {
|
||||
builder.appendText(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
|
||||
private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') {
|
||||
exitTagCloseIndex.next()
|
||||
} else {
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
|
||||
if (searchIndex != tagOpen) {
|
||||
builder.appendText(input.substring(searchIndex, tagOpen))
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.findTag(fromIndex: Int, onInvalidTag: (Int) -> Unit, onTag: (Int, Int) -> Int): Int {
|
||||
return when (val foundIndex = this.indexOf(TAG_OPEN, startIndex = fromIndex)) {
|
||||
NO_RESULT_FOUND -> END_SEARCH
|
||||
|
||||
else -> when (val closeIndex = indexOf(TAG_CLOSE, startIndex = foundIndex)) {
|
||||
NO_RESULT_FOUND -> {
|
||||
onInvalidTag(fromIndex)
|
||||
fromIndex + 1
|
||||
}
|
||||
|
||||
else -> onTag(foundIndex, closeIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun parseList(parentTag: String, parentContent: String, builder: PartBuilder) {
|
||||
var listIndex = 1
|
||||
iterateSearchIndex { nextIndex ->
|
||||
singleTagParser(parentContent, "li", nextIndex, builder) { wholeTag, tagContent ->
|
||||
val content = when (parentTag) {
|
||||
"ol" -> {
|
||||
listIndex = wholeTag.findTagAttribute("value")?.toInt() ?: listIndex
|
||||
"$listIndex. $tagContent".also { listIndex++ }
|
||||
}
|
||||
|
||||
else -> "- $tagContent"
|
||||
}
|
||||
builder.appendText(content)
|
||||
builder.appendNewline()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun singleTagParser(content: String, wantedTagName: String, searchIndex: Int, builder: PartBuilder, onTag: (String, String) -> Unit): SearchIndex {
|
||||
return content.findTag(
|
||||
fromIndex = searchIndex,
|
||||
onInvalidTag = { builder.appendText(content[it].toString()) },
|
||||
onTag = { tagOpen, tagClose ->
|
||||
val (wholeTag, tagName) = parseTag(content, tagOpen, tagClose)
|
||||
|
||||
if (tagName == wantedTagName) {
|
||||
val exitTag = "</$tagName>"
|
||||
val exitIndex = content.indexOf(exitTag, startIndex = tagClose)
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
if (exitIndex == END_SEARCH) {
|
||||
builder.appendText(content[searchIndex].toString())
|
||||
searchIndex.next()
|
||||
} else {
|
||||
val tagContent = content.substring(tagClose + 1, exitIndex)
|
||||
onTag(wholeTag, tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
} else {
|
||||
END_SEARCH
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private fun parseTag(input: String, tagOpen: Int, tagClose: Int): Pair<String, String> {
|
||||
val wholeTag = input.substring(tagOpen, tagClose + 1)
|
||||
val tagName = wholeTag.substring(1, wholeTag.indexOfFirst { it == '>' || it == ' ' })
|
||||
return wholeTag to tagName
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.findTagAttribute(name: String): String? {
|
||||
val attribute = "$name="
|
||||
return this.indexOf(attribute).let {
|
||||
if (it == NO_RESULT_FOUND) {
|
||||
null
|
||||
} else {
|
||||
val start = it + attribute.length
|
||||
this.substring(start).substringAfter('\"').substringBefore('\"')
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
internal typealias SearchIndex = Int
|
||||
|
||||
internal fun Int.next() = this + 1
|
||||
|
||||
|
||||
internal interface ParserScope {
|
||||
fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String)
|
||||
|
||||
fun SearchIndex.next(): SearchIndex
|
||||
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
import app.dapk.st.matrix.common.RichText
|
||||
import kotlin.math.max
|
||||
|
||||
internal const val END_SEARCH = -1
|
||||
|
||||
class RichMessageParser {
|
||||
|
||||
private val htmlParser = HtmlParser()
|
||||
private val urlParser = UrlParser()
|
||||
|
||||
fun parse(source: String): RichText {
|
||||
val input = source
|
||||
.removeHtmlEntities()
|
||||
.dropTextFallback()
|
||||
return RichText(collectRichText(input).build())
|
||||
}
|
||||
|
||||
private fun collectRichText(input: String) = PartBuilder().also { builder ->
|
||||
iterateSearchIndex { nextIndex ->
|
||||
val htmlStart = htmlParser.test(nextIndex, input)
|
||||
val urlStart = urlParser.test(nextIndex, input)
|
||||
|
||||
val firstResult = if (htmlStart < urlStart) {
|
||||
htmlParser.parseHtmlTags(input, nextIndex, builder)
|
||||
} else {
|
||||
urlParser.parseUrl(input, nextIndex, builder)
|
||||
}
|
||||
|
||||
val secondStartIndex = findUrlStartIndex(firstResult, nextIndex)
|
||||
val secondResult = if (htmlStart < urlStart) {
|
||||
urlParser.parseUrl(input, secondStartIndex, builder)
|
||||
} else {
|
||||
htmlParser.parseHtmlTags(input, secondStartIndex, builder)
|
||||
}
|
||||
|
||||
val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input)
|
||||
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
|
||||
builder.appendText(input.substring(nextIndex))
|
||||
}
|
||||
if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
||||
}
|
||||
}
|
||||
|
||||
private fun hasUnprocessedText(htmlResult: Int, urlResult: Int, input: String) = htmlResult < input.length && urlResult < input.length
|
||||
|
||||
private fun findUrlStartIndex(htmlResult: Int, searchIndex: Int) = when {
|
||||
htmlResult == END_SEARCH && searchIndex == 0 -> 0
|
||||
htmlResult == END_SEARCH -> searchIndex
|
||||
else -> htmlResult
|
||||
}
|
||||
|
||||
private fun hasReachedEnd(htmlResult: SearchIndex, urlResult: Int, input: String) =
|
||||
(htmlResult == END_SEARCH && urlResult == END_SEARCH) || (htmlResult >= input.length || urlResult >= input.length)
|
||||
|
||||
}
|
||||
|
||||
private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&")
|
||||
|
||||
private fun String.dropTextFallback() = this.lines()
|
||||
.dropWhile { it.startsWith("> ") || it.isEmpty() }
|
||||
.joinToString(separator = "\n")
|
||||
|
||||
internal fun iterateSearchIndex(action: (SearchIndex) -> SearchIndex): SearchIndex {
|
||||
var nextIndex = 0
|
||||
while (nextIndex != END_SEARCH) {
|
||||
nextIndex = action(nextIndex)
|
||||
}
|
||||
return nextIndex
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
import app.dapk.st.matrix.common.RichText
|
||||
|
||||
fun interface NestedParser {
|
||||
fun parse(content: String, accumulator: ContentAccumulator)
|
||||
}
|
||||
|
||||
fun interface TagParser {
|
||||
fun parse(tagName: String, attributes: Map<String, String>, content: String, accumulator: ContentAccumulator, parser: NestedParser)
|
||||
}
|
||||
|
||||
fun interface AccumulatingContentParser {
|
||||
fun parse(input: String, accumulator: ContentAccumulator, nestingLevel: Int): ContentAccumulator
|
||||
}
|
||||
|
||||
class RichMessageParser(
|
||||
private val accumulatingParser: AccumulatingContentParser = AccumulatingRichTextContentParser()
|
||||
) {
|
||||
|
||||
fun parse(source: String): RichText {
|
||||
val input = source
|
||||
.removeHtmlEntities()
|
||||
.dropTextFallback()
|
||||
return RichText(accumulatingParser.parse(input, RichTextPartBuilder(), nestingLevel = 0).build())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private fun String.removeHtmlEntities() = this.replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&")
|
||||
|
||||
private fun String.dropTextFallback() = this.lines()
|
||||
.dropWhile { it.startsWith("> ") || it.isEmpty() }
|
||||
.joinToString(separator = "\n")
|
|
@ -3,41 +3,50 @@ package app.dapk.st.matrix.sync.internal.sync.message
|
|||
import app.dapk.st.matrix.common.RichText
|
||||
import app.dapk.st.matrix.common.UserId
|
||||
|
||||
internal class PartBuilder {
|
||||
interface ContentAccumulator {
|
||||
fun appendText(value: String)
|
||||
fun appendItalic(value: String)
|
||||
fun appendBold(value: String)
|
||||
fun appendPerson(userId: UserId, displayName: String)
|
||||
fun appendLink(url: String, label: String?)
|
||||
fun build(): List<RichText.Part>
|
||||
}
|
||||
|
||||
class RichTextPartBuilder : ContentAccumulator {
|
||||
|
||||
private var normalBuffer = StringBuilder()
|
||||
|
||||
private val parts = mutableListOf<RichText.Part>()
|
||||
|
||||
fun appendText(value: String) {
|
||||
override fun appendText(value: String) {
|
||||
normalBuffer.append(value.cleanFirstTextLine())
|
||||
}
|
||||
|
||||
fun appendItalic(value: String) {
|
||||
override fun appendItalic(value: String) {
|
||||
flushNormalBuffer()
|
||||
parts.add(RichText.Part.Italic(value.cleanFirstTextLine()))
|
||||
}
|
||||
|
||||
fun appendBold(value: String) {
|
||||
override fun appendBold(value: String) {
|
||||
flushNormalBuffer()
|
||||
parts.add(RichText.Part.Bold(value.cleanFirstTextLine()))
|
||||
}
|
||||
|
||||
private fun String.cleanFirstTextLine() = if (parts.isEmpty() && normalBuffer.isEmpty()) this.trimStart() else this
|
||||
|
||||
fun appendPerson(userId: UserId, displayName: String) {
|
||||
override fun appendPerson(userId: UserId, displayName: String) {
|
||||
flushNormalBuffer()
|
||||
parts.add(RichText.Part.Person(userId, displayName))
|
||||
}
|
||||
|
||||
fun appendLink(url: String, label: String?) {
|
||||
override fun appendLink(url: String, label: String?) {
|
||||
flushNormalBuffer()
|
||||
parts.add(RichText.Part.Link(url, label ?: url))
|
||||
}
|
||||
|
||||
fun build(): List<RichText.Part> {
|
||||
override fun build(): List<RichText.Part> {
|
||||
flushNormalBuffer()
|
||||
return when(parts.isEmpty()) {
|
||||
return when (parts.isEmpty()) {
|
||||
true -> parts
|
||||
else -> {
|
||||
val last = parts.last()
|
||||
|
@ -59,16 +68,9 @@ internal class PartBuilder {
|
|||
normalBuffer.clear()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: Int, input: String) {
|
||||
if (previousIndex != tagOpenIndex) {
|
||||
this.appendText(input.substring(previousIndex, tagOpenIndex))
|
||||
}
|
||||
}
|
||||
|
||||
internal fun PartBuilder.appendNewline() {
|
||||
internal fun ContentAccumulator.appendNewline() {
|
||||
this.appendText("\n")
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.AccumulatingContentParser
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||
|
||||
class HtmlProcessor {
|
||||
|
||||
private val tagCaptor = TagCaptor()
|
||||
private val htmlTagParser = RichTextHtmlTagParser()
|
||||
|
||||
fun process(input: String, tagOpen: Int, partBuilder: ContentAccumulator, nestingLevel: Int, nestedParser: AccumulatingContentParser): Int {
|
||||
val afterTagCaptureIndex = tagCaptor.tagCapture(input, tagOpen) { tagName, attributes, tagContent ->
|
||||
htmlTagParser.parse(tagName, attributes, tagContent, partBuilder) { nestedContent, accumulator ->
|
||||
nestedParser.parse(nestedContent, accumulator, nestingLevel + 1)
|
||||
}
|
||||
}
|
||||
return when (afterTagCaptureIndex) {
|
||||
-1 -> {
|
||||
partBuilder.appendText(input[tagOpen].toString())
|
||||
tagOpen + 1
|
||||
}
|
||||
|
||||
else -> afterTagCaptureIndex
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||
|
||||
internal interface ListAccumulator {
|
||||
fun appendLinePrefix(index: Int?)
|
||||
}
|
||||
|
||||
internal class OrderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
|
||||
|
||||
private var currentIndex = 1
|
||||
|
||||
override fun appendLinePrefix(index: Int?) {
|
||||
currentIndex = index ?: currentIndex
|
||||
appendText("$currentIndex. ")
|
||||
currentIndex++
|
||||
}
|
||||
}
|
||||
|
||||
internal class UnorderedListAccumulator(delegate: ContentAccumulator) : ContentAccumulator by delegate, ListAccumulator {
|
||||
override fun appendLinePrefix(index: Int?) = appendText("- ")
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||
|
||||
import app.dapk.st.matrix.common.UserId
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.*
|
||||
|
||||
class RichTextHtmlTagParser : TagParser {
|
||||
|
||||
override fun parse(
|
||||
tagName: String,
|
||||
attributes: Map<String, String>,
|
||||
content: String,
|
||||
accumulator: ContentAccumulator,
|
||||
parser: NestedParser
|
||||
) {
|
||||
when {
|
||||
tagName.startsWith('@') -> {
|
||||
accumulator.appendPerson(UserId(tagName), tagName)
|
||||
}
|
||||
|
||||
else -> when (tagName) {
|
||||
"br" -> {
|
||||
accumulator.appendNewline()
|
||||
}
|
||||
|
||||
"a" -> {
|
||||
attributes["href"]?.let { url ->
|
||||
when {
|
||||
url.startsWith("https://matrix.to/#/@") -> {
|
||||
val userId = UserId(url.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
accumulator.appendPerson(userId, "@${content.removePrefix("@")}")
|
||||
}
|
||||
|
||||
else -> accumulator.appendLink(url, content)
|
||||
|
||||
}
|
||||
} ?: accumulator.appendText(content)
|
||||
}
|
||||
|
||||
"p" -> {
|
||||
parser.parse(content.trim(), accumulator)
|
||||
accumulator.appendNewline()
|
||||
}
|
||||
|
||||
"blockquote" -> {
|
||||
accumulator.appendText("> ")
|
||||
parser.parse(content.trim(), accumulator)
|
||||
}
|
||||
|
||||
"strong", "b" -> {
|
||||
accumulator.appendBold(content)
|
||||
}
|
||||
|
||||
"em", "i" -> {
|
||||
accumulator.appendItalic(content)
|
||||
}
|
||||
|
||||
"h1", "h2", "h3", "h4", "h5" -> {
|
||||
accumulator.appendBold(content)
|
||||
accumulator.appendNewline()
|
||||
}
|
||||
|
||||
"ul", "ol" -> {
|
||||
when (tagName) {
|
||||
"ol" -> parser.parse(content, OrderedListAccumulator(accumulator))
|
||||
"ul" -> parser.parse(content, UnorderedListAccumulator(accumulator))
|
||||
}
|
||||
}
|
||||
|
||||
"li" -> {
|
||||
(accumulator as ListAccumulator).appendLinePrefix(attributes["value"]?.toInt())
|
||||
|
||||
val nestedList = when {
|
||||
content.contains("<ul>") -> "<ul>"
|
||||
content.contains("<ol>") -> "<ol>"
|
||||
else -> null
|
||||
}
|
||||
|
||||
if (nestedList == null) {
|
||||
parser.parse(content.trim(), accumulator)
|
||||
accumulator.appendNewline()
|
||||
} else {
|
||||
val firstItemInNested = content.substringBefore(nestedList)
|
||||
parser.parse(firstItemInNested.trim(), accumulator)
|
||||
accumulator.appendNewline()
|
||||
parser.parse(content.substring(content.indexOf(nestedList)).trim(), accumulator)
|
||||
}
|
||||
}
|
||||
|
||||
else -> {
|
||||
// skip tag
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message.html
|
||||
|
||||
class TagCaptor {
|
||||
|
||||
fun tagCapture(input: String, startIndex: Int, tagFactory: (String, Map<String, String>, String) -> Unit): Int {
|
||||
return when (val closeIndex = input.indexOf('>', startIndex = startIndex)) {
|
||||
-1 -> -1
|
||||
else -> {
|
||||
val fullTag = input.substring(startIndex, closeIndex + 1)
|
||||
val tagName = input.substring(startIndex + 1, closeIndex)
|
||||
when {
|
||||
fullTag.isExitlessTag() -> {
|
||||
val trim = fullTag.removeSurrounding("<", ">").trim()
|
||||
tagFactory(trim, emptyMap(), "")
|
||||
closeIndex + 1
|
||||
}
|
||||
|
||||
fullTag.isSelfClosing() -> {
|
||||
val trim = fullTag.removeSuffix("/>").removePrefix("<").trim()
|
||||
tagFactory(trim, emptyMap(), "")
|
||||
closeIndex + 1
|
||||
}
|
||||
|
||||
else -> {
|
||||
val exitTag = if (tagName.contains(' ')) {
|
||||
"</${tagName.substringBefore(' ')}>"
|
||||
} else {
|
||||
"</$tagName>"
|
||||
}
|
||||
|
||||
val exitIndex = input.findTagClose(tagName, exitTag, searchIndex = closeIndex + 1)
|
||||
if (exitIndex == -1) {
|
||||
-1
|
||||
} else {
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
if (tagName.contains(' ')) {
|
||||
val parts = tagName.split(' ')
|
||||
val attributes = parts.drop(1).associate {
|
||||
val (key, value) = it.split("=")
|
||||
key to value.removeSurrounding("\"")
|
||||
}
|
||||
tagFactory(parts.first(), attributes, input.substring(closeIndex + 1, exitIndex))
|
||||
} else {
|
||||
tagFactory(tagName, emptyMap(), input.substring(closeIndex + 1, exitIndex))
|
||||
}
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.findTagClose(tagName: String, exitTag: String, searchIndex: Int, open: Int = 1): Int {
|
||||
val exitIndex = this.indexOf(exitTag, startIndex = searchIndex)
|
||||
val nextOpen = this.indexOf("<$tagName", startIndex = searchIndex)
|
||||
return when {
|
||||
open == 1 && (nextOpen == -1 || exitIndex < nextOpen) -> exitIndex
|
||||
open > 8 || open < 1 -> {
|
||||
// something has gone wrong, lets exit
|
||||
-1
|
||||
}
|
||||
|
||||
exitIndex == -1 -> -1
|
||||
nextOpen == -1 || nextOpen > exitIndex -> this.findTagClose(tagName, exitTag, exitIndex + 1, open - 1)
|
||||
|
||||
nextOpen < exitIndex -> {
|
||||
this.findTagClose(tagName, exitTag, nextOpen + 1, open + 1)
|
||||
}
|
||||
|
||||
else -> -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.isExitlessTag() = this == "<br>" || (this.startsWith("<@") && this.endsWith('>'))
|
||||
|
||||
private fun String.isSelfClosing() = this.endsWith("/>")
|
|
@ -1,18 +1,14 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
package app.dapk.st.matrix.sync.internal.sync.message.url
|
||||
|
||||
import app.dapk.st.matrix.sync.internal.sync.message.ContentAccumulator
|
||||
|
||||
private const val END_SEARCH = -1
|
||||
private const val INVALID_TRAILING_CHARS = ",.:;?<>"
|
||||
|
||||
internal class UrlParser {
|
||||
|
||||
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
||||
return length > current + value.length && this.substring(current, current + value.length) == value
|
||||
}
|
||||
|
||||
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
|
||||
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
|
||||
fun parseUrl(input: String, urlIndex: Int, accumulator: ContentAccumulator): Int {
|
||||
return if (urlIndex == END_SEARCH) END_SEARCH else {
|
||||
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
|
||||
|
||||
val originalUrl = input.substring(urlIndex)
|
||||
var index = 0
|
||||
val maybeUrl = originalUrl.takeWhile {
|
||||
|
@ -25,29 +21,27 @@ internal class UrlParser {
|
|||
when {
|
||||
urlContinuesUntilEnd -> {
|
||||
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
||||
builder.appendLink(url = cleanedUrl, label = null)
|
||||
accumulator.appendLink(url = cleanedUrl, label = null)
|
||||
if (cleanedUrl != originalUrl) {
|
||||
builder.appendText(originalUrl.last().toString())
|
||||
accumulator.appendText(originalUrl.last().toString())
|
||||
}
|
||||
input.length.next()
|
||||
input.length + 1
|
||||
}
|
||||
|
||||
else -> {
|
||||
val originalUrl = input.substring(urlIndex, urlEndIndex)
|
||||
val cleanedUrl = originalUrl.bestGuessStripTrailingUrlChar()
|
||||
builder.appendLink(url = cleanedUrl, label = null)
|
||||
accumulator.appendLink(url = cleanedUrl, label = null)
|
||||
if (originalUrl == cleanedUrl) urlEndIndex else urlEndIndex - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun test(startingFrom: Int, input: String): Int {
|
||||
return input.indexOf("http", startingFrom)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
||||
return length > current + value.length && this.substring(current, current + value.length) == value
|
||||
}
|
||||
|
||||
private fun String.bestGuessStripTrailingUrlChar(): String {
|
||||
val last = this.last()
|
|
@ -5,10 +5,9 @@ import app.dapk.st.matrix.common.RichText.Part.*
|
|||
import app.dapk.st.matrix.sync.internal.sync.message.RichMessageParser
|
||||
import fixture.aUserId
|
||||
import org.amshove.kluent.shouldBeEqualTo
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
|
||||
class RichMessageParserTest {
|
||||
class RichTextMessageParserTest {
|
||||
|
||||
private val parser = RichMessageParser()
|
||||
|
||||
|
@ -18,6 +17,34 @@ class RichMessageParserTest {
|
|||
expected = RichText(listOf(Normal("Hello world!")))
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses strong tags`() = runParserTest(
|
||||
Case(
|
||||
input = """hello <strong>wor</strong>ld""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Normal("hello "),
|
||||
Bold("wor"),
|
||||
Normal("ld"),
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses em tags`() = runParserTest(
|
||||
Case(
|
||||
input = """hello <em>wor</em>ld""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Normal("hello "),
|
||||
Italic("wor"),
|
||||
Normal("ld"),
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses p tags`() = runParserTest(
|
||||
input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
|
||||
|
@ -63,7 +90,7 @@ class RichMessageParserTest {
|
|||
@Test
|
||||
fun `replaces matrixdotto with person`() = runParserTest(
|
||||
input = """Hello <a href="https://matrix.to/#/@a-name:foo.bar">a-name</a>: world""",
|
||||
expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(" world")))
|
||||
expected = RichText(listOf(Normal("Hello "), Person(aUserId("@a-name:foo.bar"), "@a-name"), Normal(": world")))
|
||||
)
|
||||
|
||||
@Test
|
||||
|
@ -122,6 +149,21 @@ class RichMessageParserTest {
|
|||
),
|
||||
)
|
||||
|
||||
|
||||
@Test
|
||||
fun `parses nested lists`() = runParserTest(
|
||||
input = """
|
||||
<ul>
|
||||
<li>first item
|
||||
<ul>
|
||||
<li>nested item</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
""".trimIndent().lines().joinToString("") { it.trim() },
|
||||
expected = RichText(listOf(Normal("- first item\n- nested item")))
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses urls`() = runParserTest(
|
||||
Case(
|
||||
|
@ -178,58 +220,6 @@ class RichMessageParserTest {
|
|||
expected = RichText(listOf(Normal(">><foo> ><>> << more content")))
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses strong tags`() = runParserTest(
|
||||
Case(
|
||||
input = """hello <strong>wor</strong>ld""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Normal("hello "),
|
||||
Bold("wor"),
|
||||
Normal("ld"),
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses em tags`() = runParserTest(
|
||||
Case(
|
||||
input = """hello <em>wor</em>ld""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Normal("hello "),
|
||||
Italic("wor"),
|
||||
Normal("ld"),
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@Ignore // TODO
|
||||
@Test
|
||||
fun `parses nested tags`() = runParserTest(
|
||||
Case(
|
||||
input = """hello <b><i>wor<i/><b/>ld""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Normal("hello "),
|
||||
BoldItalic("wor"),
|
||||
Normal("ld"),
|
||||
)
|
||||
)
|
||||
),
|
||||
Case(
|
||||
input = """<a href="www.google.com"><a href="www.google.com">www.google.com<a/><a/>""",
|
||||
expected = RichText(
|
||||
listOf(
|
||||
Link(url = "www.google.com", label = "www.google.com"),
|
||||
Link(url = "www.bing.com", label = "www.bing.com"),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@Test
|
||||
fun `parses 'a' tags`() = runParserTest(
|
||||
Case(
|
Loading…
Reference in New Issue