more robust url parsing
This commit is contained in:
parent
8e36efe0c2
commit
55745b9c41
|
@ -24,68 +24,27 @@ internal class HtmlParser {
|
||||||
|
|
||||||
tagName == "br" -> {
|
tagName == "br" -> {
|
||||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||||
builder.appendText("\n")
|
builder.appendNewline()
|
||||||
tagClose.next()
|
tagClose.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
else -> {
|
else -> {
|
||||||
val exitTag = "</$tagName>"
|
val exitTag = "</$tagName>"
|
||||||
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
val exitIndex = input.indexOf(exitTag, startIndex = tagClose)
|
||||||
val exitTagClose = exitIndex + exitTag.length
|
val exitTagCloseIndex = exitIndex + exitTag.length
|
||||||
if (exitIndex == END_SEARCH) {
|
if (exitIndex == END_SEARCH) {
|
||||||
builder.appendText(input[searchIndex].toString())
|
builder.appendText(input[searchIndex].toString())
|
||||||
searchIndex.next()
|
searchIndex.next()
|
||||||
} else {
|
} else {
|
||||||
when (tagName) {
|
when (tagName) {
|
||||||
"mx-reply" -> {
|
"mx-reply" -> {
|
||||||
exitTagClose
|
exitTagCloseIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
else -> {
|
else -> {
|
||||||
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
appendTextBeforeTag(searchIndex, tagOpen, builder, input)
|
||||||
val tagContent = input.substring(tagClose + 1, exitIndex)
|
val tagContent = input.substring(tagClose + 1, exitIndex)
|
||||||
when (tagName) {
|
handleTagWithContent(input, tagName, wholeTag, builder, tagContent, exitTagCloseIndex)
|
||||||
"a" -> {
|
|
||||||
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
|
|
||||||
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
|
|
||||||
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
|
||||||
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
|
||||||
if (input.getOrNull(exitTagClose) == ':') {
|
|
||||||
exitTagClose.next()
|
|
||||||
} else {
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
builder.appendLink(findHrefUrl, label = tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"b" -> {
|
|
||||||
builder.appendBold(tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
|
|
||||||
"strong" -> {
|
|
||||||
builder.appendBold(tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
|
|
||||||
"i" -> {
|
|
||||||
builder.appendItalic(tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
|
|
||||||
"em" -> {
|
|
||||||
builder.appendItalic(tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
|
|
||||||
else -> {
|
|
||||||
builder.appendText(tagContent)
|
|
||||||
exitTagClose
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,6 +53,65 @@ internal class HtmlParser {
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private fun handleTagWithContent(
|
||||||
|
input: String,
|
||||||
|
tagName: String,
|
||||||
|
wholeTag: String,
|
||||||
|
builder: PartBuilder,
|
||||||
|
tagContent: String,
|
||||||
|
exitTagCloseIndex: Int
|
||||||
|
) = when (tagName) {
|
||||||
|
"a" -> {
|
||||||
|
val findHrefUrl = wholeTag.substringAfter("href=").replace("\"", "").removeSuffix(">")
|
||||||
|
if (findHrefUrl.startsWith("https://matrix.to/#/@")) {
|
||||||
|
val userId = UserId(findHrefUrl.substringAfter("https://matrix.to/#/").substringBeforeLast("\""))
|
||||||
|
builder.appendPerson(userId, "@${tagContent.removePrefix("@")}")
|
||||||
|
ignoreMatrixColonMentionSuffix(input, exitTagCloseIndex)
|
||||||
|
} else {
|
||||||
|
builder.appendLink(findHrefUrl, label = tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
"b" -> {
|
||||||
|
builder.appendBold(tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
"p" -> {
|
||||||
|
builder.appendText(tagContent)
|
||||||
|
builder.appendNewline()
|
||||||
|
builder.appendNewline()
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
"strong" -> {
|
||||||
|
builder.appendBold(tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
"i" -> {
|
||||||
|
builder.appendItalic(tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
"em" -> {
|
||||||
|
builder.appendItalic(tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {
|
||||||
|
builder.appendText(tagContent)
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun ignoreMatrixColonMentionSuffix(input: String, exitTagCloseIndex: Int) = if (input.getOrNull(exitTagCloseIndex) == ':') {
|
||||||
|
exitTagCloseIndex.next()
|
||||||
|
} else {
|
||||||
|
exitTagCloseIndex
|
||||||
|
}
|
||||||
|
|
||||||
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
|
private fun appendTextBeforeTag(searchIndex: Int, tagOpen: Int, builder: PartBuilder, input: String) {
|
||||||
if (searchIndex != tagOpen) {
|
if (searchIndex != tagOpen) {
|
||||||
builder.appendText(input.substring(searchIndex, tagOpen))
|
builder.appendText(input.substring(searchIndex, tagOpen))
|
||||||
|
@ -115,4 +133,8 @@ internal class HtmlParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun test(startingFrom: Int, intput: String): Int {
|
||||||
|
return intput.indexOf('<', startingFrom)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -54,3 +54,8 @@ internal fun PartBuilder.appendTextBeforeTag(previousIndex: Int, tagOpenIndex: I
|
||||||
this.appendText(input.substring(previousIndex, tagOpenIndex))
|
this.appendText(input.substring(previousIndex, tagOpenIndex))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal fun PartBuilder.appendNewline() {
|
||||||
|
this.appendText("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,15 +17,27 @@ class RichMessageParser {
|
||||||
val builder = PartBuilder()
|
val builder = PartBuilder()
|
||||||
var nextIndex = 0
|
var nextIndex = 0
|
||||||
while (nextIndex != END_SEARCH) {
|
while (nextIndex != END_SEARCH) {
|
||||||
val htmlResult = htmlParser.parseHtmlTags(input, nextIndex, builder)
|
val htmlStart = htmlParser.test(nextIndex, input)
|
||||||
val linkStartIndex = findUrlStartIndex(htmlResult, nextIndex)
|
val urlStart = urlParser.test(nextIndex, input)
|
||||||
val urlResult = urlParser.parseUrl(input, linkStartIndex, builder)
|
|
||||||
|
|
||||||
val hasReachedEnd = hasReachedEnd(htmlResult, urlResult, input)
|
val firstResult = if (htmlStart < urlStart) {
|
||||||
if (hasReachedEnd && hasUnprocessedText(htmlResult, urlResult, input)) {
|
htmlParser.parseHtmlTags(input, nextIndex, builder)
|
||||||
|
} else {
|
||||||
|
urlParser.parseUrl(input, nextIndex, builder)
|
||||||
|
}
|
||||||
|
|
||||||
|
val secondStartIndex = findUrlStartIndex(firstResult, nextIndex)
|
||||||
|
val secondResult = if (htmlStart < urlStart) {
|
||||||
|
urlParser.parseUrl(input, secondStartIndex, builder)
|
||||||
|
} else {
|
||||||
|
htmlParser.parseHtmlTags(input, secondStartIndex, builder)
|
||||||
|
}
|
||||||
|
|
||||||
|
val hasReachedEnd = hasReachedEnd(firstResult, secondResult, input)
|
||||||
|
if (hasReachedEnd && hasUnprocessedText(firstResult, secondResult, input)) {
|
||||||
builder.appendText(input.substring(nextIndex))
|
builder.appendText(input.substring(nextIndex))
|
||||||
}
|
}
|
||||||
nextIndex = if (hasReachedEnd) END_SEARCH else max(htmlResult, urlResult)
|
nextIndex = if (hasReachedEnd) END_SEARCH else max(firstResult, secondResult)
|
||||||
}
|
}
|
||||||
return RichText(builder.build())
|
return RichText(builder.build())
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,25 @@
|
||||||
package app.dapk.st.matrix.sync.internal.sync.message
|
package app.dapk.st.matrix.sync.internal.sync.message
|
||||||
|
|
||||||
private const val INVALID_TRAILING_CHARS = ",.:;?"
|
private const val INVALID_TRAILING_CHARS = ",.:;?<>"
|
||||||
|
|
||||||
internal class UrlParser {
|
internal class UrlParser {
|
||||||
|
|
||||||
|
private fun String.hasLookAhead(current: Int, value: String): Boolean {
|
||||||
|
return length > current + value.length && this.substring(current, current + value.length) == value
|
||||||
|
}
|
||||||
|
|
||||||
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
|
fun parseUrl(input: String, linkStartIndex: Int, builder: PartBuilder): Int {
|
||||||
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
|
val urlIndex = input.indexOf("http", startIndex = linkStartIndex)
|
||||||
val urlResult = if (urlIndex == END_SEARCH) END_SEARCH else {
|
return if (urlIndex == END_SEARCH) END_SEARCH else {
|
||||||
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
|
builder.appendTextBeforeTag(linkStartIndex, urlIndex, input)
|
||||||
|
|
||||||
val originalUrl = input.substring(urlIndex)
|
val originalUrl = input.substring(urlIndex)
|
||||||
val urlEndIndex = originalUrl.indexOfFirst { it == '\n' || it == ' ' }
|
var index = 0
|
||||||
|
val maybeUrl = originalUrl.takeWhile {
|
||||||
|
it != '\n' && it != ' ' && !originalUrl.hasLookAhead(index++, "<br")
|
||||||
|
}
|
||||||
|
|
||||||
|
val urlEndIndex = maybeUrl.length + urlIndex
|
||||||
val urlContinuesUntilEnd = urlEndIndex == -1
|
val urlContinuesUntilEnd = urlEndIndex == -1
|
||||||
|
|
||||||
when {
|
when {
|
||||||
|
@ -31,7 +40,10 @@ internal class UrlParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return urlResult
|
}
|
||||||
|
|
||||||
|
fun test(startingFrom: Int, input: String): Int {
|
||||||
|
return input.indexOf("http", startingFrom)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,9 +19,9 @@ class RichMessageParserTest {
|
||||||
)
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `skips p tags`() = runParserTest(
|
fun `parses p tags`() = runParserTest(
|
||||||
input = "Hello world! <p>foo bar</p> after paragraph",
|
input = "<p>Hello world!</p><p>foo bar</p>after paragraph",
|
||||||
expected = RichText(setOf(Normal("Hello world! foo bar after paragraph")))
|
expected = RichText(setOf(Normal("Hello world!\n\nfoo bar\n\nafter paragraph")))
|
||||||
)
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -84,6 +84,10 @@ class RichMessageParserTest {
|
||||||
input = "ending sentence with url https://google.com.",
|
input = "ending sentence with url https://google.com.",
|
||||||
expected = RichText(setOf(Normal("ending sentence with url "), Link("https://google.com", "https://google.com"), Normal(".")))
|
expected = RichText(setOf(Normal("ending sentence with url "), Link("https://google.com", "https://google.com"), Normal(".")))
|
||||||
),
|
),
|
||||||
|
Case(
|
||||||
|
input = "https://google.com<br>html after url",
|
||||||
|
expected = RichText(setOf(Link("https://google.com", "https://google.com"), Normal("\nhtml after url")))
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -111,7 +111,7 @@ internal class RoomEventCreatorTest {
|
||||||
result shouldBeEqualTo aMatrixRoomMessageEvent(
|
result shouldBeEqualTo aMatrixRoomMessageEvent(
|
||||||
eventId = editEvent.id,
|
eventId = editEvent.id,
|
||||||
utcTimestamp = editEvent.utcTimestamp,
|
utcTimestamp = editEvent.utcTimestamp,
|
||||||
content = RichText.of(editEvent.asTextContent().body!!),
|
content = RichText.of(editEvent.asTextContent().body!!.trimStart()),
|
||||||
author = A_SENDER,
|
author = A_SENDER,
|
||||||
edited = true
|
edited = true
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue