more robust url parsing
This commit is contained in:
parent
8e36efe0c2
commit
55745b9c41
|
@ -24,68 +24,27 @@ internal class HtmlParser {
|
|||
|
||||
tagName == "br" -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
builder.appendText("\n")
|
||||
builder.appendNewline()
|
||||
tagClose.next()
|
||||
}
|
||||
|
||||
else -> {
|
||||
val exitTag = "</$tagName>"
|
||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
||||
val exitTagClose = exitIndex + exitTag.length
|
||||
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||
if (exitIndex == END_SEARCH) {
|
||||
builder.appendText(input[searchIndex].toString())
|
||||
searchIndex.next()
|
||||
} else {
|
||||
when (tagName) {
|
||||
"mx-reply" -> {
|
||||
exitTagClose
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
else -> {
|
||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
||||
when (tagName) {
|
||||
"a" -> {
|
||||
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
|
||||
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
|
||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||
if (input.getOrNull(exitTagClose) == ':') {
|
||||
exitTagClose.next()
|
||||
} else {
|
||||
exitTagClose
|
||||
}
|
||||
} else {
|
||||
builder.appendLink(findHrefUrl, label = tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
}
|
||||
|
||||
"b" -> {
|
||||
builder.appendBold(tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
|
||||
"strong" -> {
|
||||
builder.appendBold(tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
|
||||
"i" -> {
|
||||
builder.appendItalic(tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
|
||||
"em" -> {
|
||||
builder.appendItalic(tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
|
||||
else -> {
|
||||
builder.appendText(tagContent)
|
||||
exitTagClose
|
||||
}
|
||||
}
|
||||
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -94,6 +53,65 @@ internal class HtmlParser {
|
|||
}
|
||||
)
|
||||
|
||||
private fun handleTagWithContent(
|
||||
input: String,
|
||||
tagName: String,
|
||||
wholeTag: String,
|
||||
builder: PartBuilder,
|
||||
tagContent: String,
|
||||
exitTagCloseIndex: Int
|
||||
) = when (tagName) {
|
||||
"a" -> {
|
||||
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
|
||||
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
|
||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
||||
} else {
|
||||
builder.appendLink(findHrefUrl, label = tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
|
||||
"b" -> {
|
||||
builder.appendBold(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"p" -> {
|
||||
builder.appendText(tagContent)
|
||||
builder.appendNewline()
|
||||
builder.appendNewline()
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"strong" -> {
|
||||
builder.appendBold(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"i" -> {
|
||||
builder.appendItalic(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
"em" -> {
|
||||
builder.appendItalic(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
else -> {
|
||||
builder.appendText(tagContent)
|
||||
exitTagCloseIndex
|
||||
}
|
||||
}
|
||||
|
||||
private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') {
|
||||
exitTagCloseIndex.next()
|
||||
} else {
|
||||
exitTagCloseIndex
|
||||
}
|
||||
|
||||
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
|
||||
if (searchIndex != tagOpen) {
|
||||
builder.appendText(input.substring(searchIndex, tagOpen))
|
||||
|
@ -115,4 +133,8 @@ internal class HtmlParser {
|
|||
}
|
||||
}
|
||||
|
||||
fun test(startingFrom: Int, intput: String): Int {
|
||||
return intput.indexOf('<', startingFrom)
|
||||
}
|
||||
|
||||
}
|
|
@ -54,3 +54,8 @@ internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: I
|
|||
this.appendText(input.substring(previousIndex, tagOpenIndex))
|
||||
}
|
||||
}
|
||||
|
||||
internal fun PartBuilder.appendNewline() {
|
||||
this.appendText("\n")
|
||||
}
|
||||
|
||||
|
|
|
@ -17,15 +17,27 @@ class RichMessageParser {
|
|||
val builder = PartBuilder()
|
||||
var nextIndex = 0
|
||||
while (nextIndex != END_SEARCH) {
|
||||
val htmlResult = htmlParser.parseHtmlTags(input, nextIndex, builder)
|
||||
val linkStartIndex = findUrlStartIndex(htmlResult, nextIndex)
|
||||
val urlResult = urlParser.parseUrl(input, linkStartIndex, builder)
|
||||
val htmlStart = htmlParser.test(nextIndex, input)
|
||||
val urlStart = urlParser.test(nextIndex, input)
|
||||
|
||||
val hasReachedEnd = hasReachedEnd(htmlResult, urlResult, input)
|
||||
if (hasReachedEnd && hasUnprocessedText(htmlResult, urlResult, input)) {
|
||||
val firstResult = if (htmlStart < urlStart) {
|
||||
htmlParser.parseHtmlTags(input, nextIndex, builder)
|
||||
} else {
|
||||
urlParser.parseUrl(input, nextIndex, builder)
|
||||
}
|
||||
|
||||
val secondStartIndex = findUrlStartIndex(firstResult, nextIndex)
|
||||
val secondResult = if (htmlStart < urlStart) {
|
||||
urlParser.parseUrl(input, secondStartIndex, builder)
|
||||
} else {
|
||||
htmlParser.parseHtmlTags(input, secondStartIndex, builder)
|
||||
}
|
||||
|
||||
val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input)
|
||||
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
|
||||
builder.appendText(input.substring(nextIndex))
|
||||
}
|
||||
nextIndex = if (hasReachedEnd) END_SEARCH else max(htmlResult, urlResult)
|
||||
nextIndex = if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
||||
}
|
||||
return RichText(builder.build())
|
||||
}
|
||||
|
|
|
@ -1,16 +1,25 @@
|
|||
package app.dapk.st.matrix.sync.internal.sync.message
|
||||
|
||||
private const val INVALID_TRAILING_CHARS = ",.:;?"
|
||||
private const val INVALID_TRAILING_CHARS = ",.:;?<>"
|
||||
|
||||
internal class UrlParser {
|
||||
|
||||
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
||||
return length > current + value.length && this.substring(current, current + value.length) == value
|
||||
}
|
||||
|
||||
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
|
||||
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
|
||||
val urlResult = if (urlIndex == END_SEARCH) END_SEARCH else {
|
||||
return if (urlIndex == END_SEARCH) END_SEARCH else {
|
||||
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
|
||||
|
||||
val originalUrl = input.substring(urlIndex)
|
||||
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
|
||||
var index = 0
|
||||
val maybeUrl = originalUrl.takeWhile {
|
||||
it != '\n' && it != ' ' && !originalUrl.hasLookAhead(index++, "<br")
|
||||
}
|
||||
|
||||
val urlEndIndex = maybeUrl.length + urlIndex
|
||||
val urlContinuesUntilEnd = urlEndIndex == -1
|
||||
|
||||
when {
|
||||
|
@ -31,7 +40,10 @@ internal class UrlParser {
|
|||
}
|
||||
}
|
||||
}
|
||||
return urlResult
|
||||
}
|
||||
|
||||
fun test(startingFrom: Int, input: String): Int {
|
||||
return input.indexOf("http", startingFrom)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,9 +19,9 @@ class RichMessageParserTest {
|
|||
)
|
||||
|
||||
@Test
|
||||
fun `skips p tags`() = runParserTest(
|
||||
input = "Hello world! <p>foo bar</p> after paragraph",
|
||||
expected = RichText(setOf(Normal("Hello world! foo bar after paragraph")))
|
||||
fun `parses p tags`() = runParserTest(
|
||||
input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
|
||||
expected = RichText(setOf(Normal("Hello world!\n\nfoo bar\n\nafter paragraph")))
|
||||
)
|
||||
|
||||
@Test
|
||||
|
@ -84,6 +84,10 @@ class RichMessageParserTest {
|
|||
input = "ending sentence with url https://google.com.",
|
||||
expected = RichText(setOf(Normal("ending sentence with url "), Link("https://google.com", "https://google.com"), Normal(".")))
|
||||
),
|
||||
Case(
|
||||
input = "https://google.com<br>html after url",
|
||||
expected = RichText(setOf(Link("https://google.com", "https://google.com"), Normal("\nhtml after url")))
|
||||
),
|
||||
)
|
||||
|
||||
@Test
|
||||
|
|
|
@ -111,7 +111,7 @@ internal class RoomEventCreatorTest {
|
|||
result shouldBeEqualTo aMatrixRoomMessageEvent(
|
||||
eventId = editEvent.id,
|
||||
utcTimestamp = editEvent.utcTimestamp,
|
||||
content = RichText.of(editEvent.asTextContent().body!!),
|
||||
content = RichText.of(editEvent.asTextContent().body!!.trimStart()),
|
||||
author = A_SENDER,
|
||||
edited = true
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue