diff --git a/app/src/main/java/app/pachli/util/SpanUtils.kt b/app/src/main/java/app/pachli/util/SpanUtils.kt
index 81143d1c9..e182da69a 100644
--- a/app/src/main/java/app/pachli/util/SpanUtils.kt
+++ b/app/src/main/java/app/pachli/util/SpanUtils.kt
@@ -1,68 +1,33 @@
+/*
+ * Copyright 2024 Pachli Association
+ *
+ * This file is a part of Pachli.
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * Pachli is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
+ * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Pachli; if not,
+ * see <http://www.gnu.org/licenses>.
+ */
+
 package app.pachli.util
 
 import android.text.Spannable
 import android.text.Spanned
-import android.text.style.CharacterStyle
 import android.text.style.ForegroundColorSpan
 import android.text.style.URLSpan
 import app.pachli.core.ui.MentionSpan
 import app.pachli.core.ui.NoUnderlineURLSpan
-import java.util.regex.Pattern
-import kotlin.math.max
-
-/**
- * @see <a href="https://github.com/tootsuite/mastodon/blob/master/app/models/tag.rb">
- *     Tag#HASHTAG_RE</a>.
- */
-private const val HASHTAG_SEPARATORS = "_\\u00B7\\u200c"
-private const val UNICODE_WORD = "\\p{L}\\p{Mn}\\p{Nd}\\p{Nl}\\p{Pc}" // Ugh, java ( https://stackoverflow.com/questions/4304928/unicode-equivalents-for-w-and-b-in-java-regular-expressions )
-private const val TAG_REGEX = "(?:^|[^/)\\w])#(([${UNICODE_WORD}_][$UNICODE_WORD$HASHTAG_SEPARATORS]*[\\p{Alpha}$HASHTAG_SEPARATORS][$UNICODE_WORD$HASHTAG_SEPARATORS]*[${UNICODE_WORD}_])|([${UNICODE_WORD}_]*[\\p{Alpha}][${UNICODE_WORD}_]*))"
-
-/**
- * @see <a href="https://github.com/tootsuite/mastodon/blob/master/app/models/account.rb">
- *     Account#MENTION_RE</a>
- */
-private const val USERNAME_REGEX = "[\\w]+([\\w\\.-]+[\\w]+)?"
-private const val MENTION_REGEX = "(?<=^|[^\\/$UNICODE_WORD])@(($USERNAME_REGEX)(?:@[$UNICODE_WORD\\.\\-]+[$UNICODE_WORD]+)?)"
-
-private const val HTTP_URL_REGEX = "(?:(^|\\b)http://[^\\s]+)"
-private const val HTTPS_URL_REGEX = "(?:(^|\\b)https://[^\\s]+)"
-
-/**
- * Dump of android.util.Patterns.WEB_URL
- */
-private val STRICT_WEB_URL_PATTERN = Pattern.compile("(((?:(?i:http|https|rtsp)://(?:(?:[a-zA-Z0-9\\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?(?:(([a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 　]]](?:[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 　]]_\\-]{0,61}[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 　]]]){0,1}\\.)+(xn\\-\\-[\\w\\-]{0,58}\\w|[a-zA-Z[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 　]]]{2,63})|((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9]))))(?:\\:\\d{1,5})?)([/\\?](?:(?:[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 　]];/\\?:@&=#~\\-\\.\\+!\\*'\\(\\),_\\\$])|(?:%[a-fA-F0-9]{2}))*)?(?:\\b|\$|^))")
+import com.twitter.twittertext.Extractor
 
 private val spanClasses = listOf(ForegroundColorSpan::class.java, URLSpan::class.java)
-private val finders = mapOf(
-    FoundMatchType.HTTP_URL to PatternFinder(':', HTTP_URL_REGEX, 5, Character::isWhitespace),
-    FoundMatchType.HTTPS_URL to PatternFinder(':', HTTPS_URL_REGEX, 6, Character::isWhitespace),
-    FoundMatchType.TAG to PatternFinder('#', TAG_REGEX, 1, ::isValidForTagPrefix),
-    // TODO: We also need a proper validator for mentions
-    FoundMatchType.MENTION to PatternFinder('@', MENTION_REGEX, 1, Character::isWhitespace),
-)
 
-private enum class FoundMatchType {
-    HTTP_URL,
-    HTTPS_URL,
-    TAG,
-    MENTION,
-}
-
-private class FindCharsResult {
-    lateinit var matchType: FoundMatchType
-    var start: Int = -1
-    var end: Int = -1
-}
-
-private class PatternFinder(
-    val searchCharacter: Char,
-    regex: String,
-    val searchPrefixWidth: Int,
-    val prefixValidator: (Int) -> Boolean,
-) {
-    val pattern: Pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE)
-}
+private val extractor = Extractor().apply { isExtractURLWithoutProtocol = false }
 
 /**
  * Takes text containing mentions and hashtags and urls and makes them the given colour.
@@ -75,18 +40,16 @@ fun highlightSpans(text: Spannable, colour: Int) {
 
     // Colour the mentions and hashtags.
     val string = text.toString()
-    val length = text.length
-    var start = 0
-    var end = 0
-    while (end in 0 until length && start >= 0) {
-        // Search for url first because it can contain the other characters
-        val found = findPattern(string, end)
-        start = found.start
-        end = found.end
-        if (start in 0 until end) {
-            text.setSpan(getSpan(found.matchType, string, colour, start, end), start, end, Spanned.SPAN_INCLUSIVE_EXCLUSIVE)
-            start += finders[found.matchType]!!.searchPrefixWidth
+
+    val entities = extractor.extractEntitiesWithIndices(string)
+
+    for (entity in entities) {
+        val span = when (entity.type) {
+            Extractor.Entity.Type.URL -> NoUnderlineURLSpan(string.substring(entity.start, entity.end))
+            Extractor.Entity.Type.HASHTAG -> ForegroundColorSpan(colour)
+            Extractor.Entity.Type.MENTION -> MentionSpan(string.substring(entity.start, entity.end))
         }
+        text.setSpan(span, entity.start, entity.end, Spanned.SPAN_INCLUSIVE_EXCLUSIVE)
     }
 }
 
@@ -95,87 +58,3 @@ private fun <T> clearSpans(text: Spannable, spanClass: Class<T>) {
         text.removeSpan(span)
     }
 }
-
-private fun findPattern(string: String, fromIndex: Int): FindCharsResult {
-    val result = FindCharsResult()
-    for (i in fromIndex..string.lastIndex) {
-        val c = string[i]
-        for (matchType in FoundMatchType.entries) {
-            val finder = finders[matchType]
-            if (finder!!.searchCharacter == c &&
-                (
-                    (i - fromIndex) < finder.searchPrefixWidth ||
-                        finder.prefixValidator(string.codePointAt(i - finder.searchPrefixWidth))
-                    )
-            ) {
-                result.matchType = matchType
-                result.start = max(0, i - finder.searchPrefixWidth)
-                findEndOfPattern(string, result, finder.pattern)
-                if (result.start + finder.searchPrefixWidth <= i + 1 && // The found result is actually triggered by the correct search character
-                    result.end >= result.start
-                ) { // ...and we actually found a valid result
-                    return result
-                }
-            }
-        }
-    }
-    return result
-}
-
-private fun findEndOfPattern(string: String, result: FindCharsResult, pattern: Pattern) {
-    val matcher = pattern.matcher(string)
-    if (matcher.find(result.start)) {
-        // Once we have API level 26+, we can use named captures...
-        val end = matcher.end()
-        result.start = matcher.start()
-        when (result.matchType) {
-            FoundMatchType.TAG -> {
-                if (isValidForTagPrefix(string.codePointAt(result.start))) {
-                    if (string[result.start] != '#' ||
-                        (string[result.start] == '#' && string[result.start + 1] == '#')
-                    ) {
-                        ++result.start
-                    }
-                }
-            }
-            else -> {
-                if (Character.isWhitespace(string.codePointAt(result.start))) {
-                    ++result.start
-                }
-            }
-        }
-        when (result.matchType) {
-            FoundMatchType.HTTP_URL, FoundMatchType.HTTPS_URL -> {
-                // Preliminary url patterns are fast/permissive, now we'll do full validation
-                if (STRICT_WEB_URL_PATTERN.matcher(string.substring(result.start, end)).matches()) {
-                    result.end = end
-                }
-            }
-            else -> result.end = end
-        }
-    }
-}
-
-private fun getSpan(matchType: FoundMatchType, string: String, colour: Int, start: Int, end: Int): CharacterStyle {
-    return when (matchType) {
-        FoundMatchType.HTTP_URL -> NoUnderlineURLSpan(string.substring(start, end))
-        FoundMatchType.HTTPS_URL -> NoUnderlineURLSpan(string.substring(start, end))
-        FoundMatchType.MENTION -> MentionSpan(string.substring(start, end))
-        else -> ForegroundColorSpan(colour)
-    }
-}
-
-private fun isWordCharacters(codePoint: Int): Boolean {
-    return (codePoint in 0x30..0x39) || // [0-9]
-        (codePoint in 0x41..0x5a) || // [A-Z]
-        (codePoint == 0x5f) || // _
-        (codePoint in 0x61..0x7a) // [a-z]
-}
-
-private fun isValidForTagPrefix(codePoint: Int): Boolean {
-    return !(
-        isWordCharacters(codePoint) || // \w
-            (codePoint == 0x2f) || // /
-            (codePoint == 0x29)
-        ) // )
-}
diff --git a/app/src/test/java/app/pachli/SpanUtilsTest.kt b/app/src/test/java/app/pachli/SpanUtilsTest.kt
index 0888e4f80..f06da2201 100644
--- a/app/src/test/java/app/pachli/SpanUtilsTest.kt
+++ b/app/src/test/java/app/pachli/SpanUtilsTest.kt
@@ -4,9 +4,11 @@ import app.pachli.core.testing.fakes.FakeSpannable
 import app.pachli.util.highlightSpans
 import org.junit.Assert
 import org.junit.Test
+import org.junit.experimental.runners.Enclosed
 import org.junit.runner.RunWith
 import org.junit.runners.Parameterized
 
+@RunWith(Enclosed::class)
 class SpanUtilsTest {
     @Test
     fun matchesMixedSpans() {
@@ -19,8 +21,8 @@ class SpanUtilsTest {
 
     @Test
     fun doesntMergeAdjacentURLs() {
-        val firstURL = "http://first.thing"
-        val secondURL = "https://second.thing"
+        val firstURL = "http://first.bar"
+        val secondURL = "https://second.bar"
         val inputSpannable = FakeSpannable("$firstURL $secondURL")
         highlightSpans(inputSpannable, 0xffffff)
         val spans = inputSpannable.spans
@@ -71,14 +73,6 @@ class SpanUtilsTest {
             Assert.assertTrue(spans.isEmpty())
         }
 
-        @Test
-        fun doesNotMatchSpanEmbeddedInAnotherSpan() {
-            val inputSpannable = FakeSpannable("@aa${thingToHighlight}aa")
-            highlightSpans(inputSpannable, 0xffffff)
-            val spans = inputSpannable.spans
-            Assert.assertEquals(1, spans.size)
-        }
-
         @Test
         fun spansDoNotOverlap() {
             val begin = "@begin"
diff --git a/app/src/test/java/app/pachli/components/compose/ComposeActivityTest.kt b/app/src/test/java/app/pachli/components/compose/ComposeActivityTest.kt
index 6c9f2606b..a122b68b5 100644
--- a/app/src/test/java/app/pachli/components/compose/ComposeActivityTest.kt
+++ b/app/src/test/java/app/pachli/components/compose/ComposeActivityTest.kt
@@ -288,7 +288,7 @@ class ComposeActivityTest {
 
     @Test
     fun whenTextContainsUrl_onlyEllipsizedURLIsCounted() {
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = "Check out this @image #search result: "
         rule.launch()
         rule.getScenario().onActivity {
@@ -303,7 +303,7 @@ class ComposeActivityTest {
     @Test
     fun whenTextContainsShortUrls_allUrlsGetEllipsized() {
         val shortUrl = "https://pachli.app"
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = " Check out this @image #search result: "
         rule.launch()
         rule.getScenario().onActivity {
@@ -317,7 +317,7 @@ class ComposeActivityTest {
 
     @Test
     fun whenTextContainsMultipleURLs_allURLsGetEllipsized() {
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = " Check out this @image #search result: "
         rule.launch()
         rule.getScenario().onActivity {
@@ -331,7 +331,7 @@ class ComposeActivityTest {
 
     @Test
     fun whenTextContainsUrl_onlyEllipsizedURLIsCounted_withCustomConfiguration() {
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = "Check out this @image #search result: "
         val customUrlLength = 16
         getInstanceCallback = { getInstanceWithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
@@ -348,7 +348,7 @@ class ComposeActivityTest {
     @Test
     fun whenTextContainsShortUrls_allUrlsGetEllipsized_withCustomConfiguration() {
         val shortUrl = "https://pachli.app"
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = " Check out this @image #search result: "
         val customUrlLength = 18 // The intention is that this is longer than shortUrl.length
         getInstanceCallback = { getInstanceWithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
@@ -364,7 +364,7 @@ class ComposeActivityTest {
 
     @Test
     fun whenTextContainsMultipleURLs_allURLsGetEllipsized_withCustomConfiguration() {
-        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
+        val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM%3A"
         val additionalContent = " Check out this @image #search result: "
         val customUrlLength = 16
         getInstanceCallback = { getInstanceWithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
diff --git a/app/src/test/java/app/pachli/components/compose/StatusLengthTest.kt b/app/src/test/java/app/pachli/components/compose/StatusLengthTest.kt
index 4f39f8bbf..bf047fcff 100644
--- a/app/src/test/java/app/pachli/components/compose/StatusLengthTest.kt
+++ b/app/src/test/java/app/pachli/components/compose/StatusLengthTest.kt
@@ -40,9 +40,12 @@ class StatusLengthTest(
                 // "@user@server" should be treated as "@user"
                 arrayOf("123 @example@example.org", 12),
                 // URLs under 23 chars are treated as 23 chars
-                arrayOf("123 http://example.url", 27),
+                arrayOf("123 http://example.org", 27),
                 // URLs over 23 chars are treated as 23 chars
                 arrayOf("123 http://urlthatislongerthan23characters.example.org", 27),
+                // URLs end when they should (the ")." should be part of the status
+                // length, not considered to be part of the URL)
+                arrayOf("test (https://example.com). test", 36),
                 // Short hashtags are treated as is
                 arrayOf("123 #basictag", 13),
                 // Long hashtags are *also* treated as is (not treated as 23, like URLs)
diff --git a/core/ui/src/main/kotlin/com/twitter/twittertext/Extractor.kt b/core/ui/src/main/kotlin/com/twitter/twittertext/Extractor.kt
new file mode 100644
index 000000000..ef46a7056
--- /dev/null
+++ b/core/ui/src/main/kotlin/com/twitter/twittertext/Extractor.kt
@@ -0,0 +1,325 @@
+// Copyright 2018 Twitter, Inc.
+// Licensed under the Apache License, Version 2.0
+// http://www.apache.org/licenses/LICENSE-2.0
+
+package com.twitter.twittertext
+
+import java.net.IDN
+import java.util.regex.Matcher
+
+/**
+ * A class to extract usernames, hashtags and URLs from Mastodon text.
+ */
+open class Extractor {
+    data class Entity(
+        var start: Int,
+        var end: Int,
+        val value: String,
+        val type: Type,
+    ) {
+        enum class Type {
+            URL,
+            HASHTAG,
+            MENTION,
+        }
+
+        @JvmOverloads
+        constructor(matcher: Matcher, type: Type, groupNumber: Int, startOffset: Int = -1) : this(
+            matcher.start(groupNumber) + startOffset,
+            matcher.end(groupNumber),
+            matcher.group(groupNumber)!!,
+            type,
+        )
+    }
+
+    var isExtractURLWithoutProtocol = true
+
+    private fun removeOverlappingEntities(entities: MutableList<Entity>) {
+        // sort by index
+        entities.sortWith(Comparator { e1, e2 -> e1.start - e2.start })
+
+        // Remove overlapping entities.
+        // Two entities overlap only when one is URL and the other is hashtag/mention
+        // which is a part of the URL. When it happens, we choose URL over hashtag/mention
+        // by selecting the one with smaller start index.
+        if (!entities.isEmpty()) {
+            val it = entities.iterator()
+            var prev = it.next()
+            while (it.hasNext()) {
+                val cur = it.next()
+                if (prev.end > cur.start) {
+                    it.remove()
+                } else {
+                    prev = cur
+                }
+            }
+        }
+    }
+
+    /**
+     * Extract URLs, @mentions, lists and #hashtag from a given text/tweet.
+     *
+     * @param text text of tweet
+     * @return list of extracted entities
+     */
+    fun extractEntitiesWithIndices(text: String): List<Entity> = buildList {
+        addAll(extractURLsWithIndices(text))
+        addAll(extractHashtagsWithIndices(text, false))
+        addAll(extractMentionsOrListsWithIndices(text))
+        removeOverlappingEntities(this)
+    }
+
+    /**
+     * Extract @username and an optional list reference from Tweet text. A mention is an occurrence
+     * of @username anywhere in a Tweet. A mention with a list is a @username/list.
+     *
+     * @param text of the tweet from which to extract usernames
+     * @return List of usernames (without the leading @ sign) and an optional lists referenced
+     */
+    private fun extractMentionsOrListsWithIndices(text: String): List<Entity> {
+        if (text.isEmpty()) return emptyList()
+
+        // Performance optimization.
+        // If text doesn't contain @/＠ at all, the text doesn't
+        // contain @mention. So we can simply return an empty list.
+        var found = false
+        for (c in text.toCharArray()) {
+            if (c == '@' || c == '＠') {
+                found = true
+                break
+            }
+        }
+        if (!found) {
+            return emptyList()
+        }
+        val extracted: MutableList<Entity> = ArrayList()
+        val matcher: Matcher = Regex.VALID_MENTION_OR_LIST.matcher(text)
+        while (matcher.find()) {
+            val after = text.substring(matcher.end())
+            if (!Regex.INVALID_MENTION_MATCH_END.matcher(after).find()) {
+                if (matcher.group(Regex.VALID_MENTION_OR_LIST_GROUP_LIST) == null) {
+                    extracted.add(
+                        Entity(
+                            matcher,
+                            Entity.Type.MENTION,
+                            Regex.VALID_MENTION_OR_LIST_GROUP_USERNAME,
+                        ),
+                    )
+                } else {
+                    extracted.add(
+                        Entity(
+                            matcher.start(Regex.VALID_MENTION_OR_LIST_GROUP_USERNAME) - 1,
+                            matcher.end(Regex.VALID_MENTION_OR_LIST_GROUP_LIST),
+                            matcher.group(Regex.VALID_MENTION_OR_LIST_GROUP_USERNAME),
+                            Entity.Type.MENTION,
+                        ),
+                    )
+                }
+            }
+        }
+        return extracted
+    }
+
+    /**
+     * Extract URL references from Tweet text.
+     *
+     * @param text of the tweet from which to extract URLs
+     * @return List of URLs referenced.
+     */
+    private fun extractURLsWithIndices(text: String?): List<Entity> {
+        if (text.isNullOrEmpty() ||
+            (if (isExtractURLWithoutProtocol) text.indexOf('.') else text.indexOf(':')) == -1
+        ) {
+            // Performance optimization.
+            // If text doesn't contain '.' or ':' at all, text doesn't contain URL,
+            // so we can simply return an empty list.
+            return emptyList()
+        }
+        val urls: MutableList<Entity> = ArrayList()
+        val matcher: Matcher = Regex.VALID_URL.matcher(text)
+        while (matcher.find()) {
+            val protocol = matcher.group(Regex.VALID_URL_GROUP_PROTOCOL)
+            if (protocol.isNullOrEmpty()) {
+                // skip if protocol is not present and 'extractURLWithoutProtocol' is false
+                // or URL is preceded by invalid character.
+                if (!isExtractURLWithoutProtocol ||
+                    Regex.INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN
+                        .matcher(matcher.group(Regex.VALID_URL_GROUP_BEFORE))
+                        .matches()
+                ) {
+                    continue
+                }
+            }
+            val url = matcher.group(Regex.VALID_URL_GROUP_URL)
+            val start = matcher.start(Regex.VALID_URL_GROUP_URL)
+            val end = matcher.end(Regex.VALID_URL_GROUP_URL)
+            val host = matcher.group(Regex.VALID_URL_GROUP_DOMAIN)
+            if (isValidHostAndLength(url.length, protocol, host)) {
+                urls.add(Entity(start, end, url, Entity.Type.URL))
+            }
+        }
+        return urls
+    }
+
+    /**
+     * Extract #hashtag references from Tweet text.
+     *
+     * @param text of the tweet from which to extract hashtags
+     * @param checkUrlOverlap if true, check if extracted hashtags overlap URLs and
+     * remove overlapping ones
+     * @return List of hashtags referenced (without the leading # sign)
+     */
+    private fun extractHashtagsWithIndices(text: String, checkUrlOverlap: Boolean): List<Entity> {
+        if (text.isEmpty()) return emptyList()
+
+        // Performance optimization.
+        // If text doesn't contain #/＃ at all, text doesn't contain
+        // hashtag, so we can simply return an empty list.
+        var found = false
+        for (c in text.toCharArray()) {
+            if (c == '#' || c == '＃') {
+                found = true
+                break
+            }
+        }
+        if (!found) {
+            return emptyList()
+        }
+        val extracted: MutableList<Entity> = ArrayList()
+        val matcher: Matcher = Regex.VALID_HASHTAG.matcher(text)
+        while (matcher.find()) {
+            val after = text.substring(matcher.end())
+            if (!Regex.INVALID_HASHTAG_MATCH_END.matcher(after).find()) {
+                extracted.add(
+                    Entity(
+                        matcher,
+                        Entity.Type.HASHTAG,
+                        Regex.VALID_HASHTAG_GROUP_TAG,
+                    ),
+                )
+            }
+        }
+        if (checkUrlOverlap) {
+            // extract URLs
+            val urls = extractURLsWithIndices(text)
+            if (urls.isNotEmpty()) {
+                extracted.addAll(urls)
+                // remove overlap
+                removeOverlappingEntities(extracted)
+                // remove URL entities
+                val it = extracted.iterator()
+                while (it.hasNext()) {
+                    val entity = it.next()
+                    if (entity.type != Entity.Type.HASHTAG) {
+                        it.remove()
+                    }
+                }
+            }
+        }
+        return extracted
+    }
+
+    /**
+     * An efficient converter of indices between code points and code units.
+     */
+    private class IndexConverter(val text: String) {
+        // Keep track of a single corresponding pair of code unit and code point
+        // offsets so that we can re-use counting work if the next requested
+        // entity is near the most recent entity.
+        private var codePointIndex = 0
+        private var charIndex = 0
+
+        /**
+         * Converts code units to code points
+         *
+         * @param charIndex Index into the string measured in code units.
+         * @return The code point index that corresponds to the specified character index.
+         */
+        fun codeUnitsToCodePoints(charIndex: Int): Int {
+            if (charIndex < this.charIndex) {
+                codePointIndex -= text.codePointCount(charIndex, this.charIndex)
+            } else {
+                codePointIndex += text.codePointCount(this.charIndex, charIndex)
+            }
+            this.charIndex = charIndex
+
+            // Make sure that charIndex never points to the second code unit of a
+            // surrogate pair.
+            if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) {
+                this.charIndex -= 1
+            }
+            return codePointIndex
+        }
+
+        /**
+         * Converts code points to code units
+         *
+         * @param codePointIndex Index into the string measured in code points.
+         * @return the code unit index that corresponds to the specified code point index.
+         */
+        fun codePointsToCodeUnits(codePointIndex: Int): Int {
+            // Note that offsetByCodePoints accepts negative indices.
+            charIndex = text.offsetByCodePoints(charIndex, codePointIndex - this.codePointIndex)
+            this.codePointIndex = codePointIndex
+            return charIndex
+        }
+    }
+
+    companion object {
+        /**
+         * The maximum url length that the Twitter backend supports.
+         */
+        const val MAX_URL_LENGTH = 4096
+
+        /**
+         * The backend adds http:// for normal links and https to *.twitter.com URLs
+         * (it also rewrites http to https for URLs matching *.twitter.com).
+         * We're better off adding https:// all the time. By making the assumption that
+         * URL_GROUP_PROTOCOL_LENGTH is https, the trade off is we'll disallow a http URL
+         * that is 4096 characters.
+         */
+        private const val URL_GROUP_PROTOCOL_LENGTH = "https://".length
+
+        /**
+         * Verifies that the host name adheres to RFC 3490 and 1035
+         * Also, verifies that the entire url (including protocol) doesn't exceed MAX_URL_LENGTH
+         *
+         * @param originalUrlLength The length of the entire URL, including protocol if any
+         * @param protocol The protocol used
+         * @param originalHost The hostname to check validity of
+         * @return true if the host is valid
+         */
+        fun isValidHostAndLength(
+            originalUrlLength: Int,
+            protocol: String?,
+            originalHost: String?,
+        ): Boolean {
+            if (originalHost.isNullOrEmpty()) {
+                return false
+            }
+            val originalHostLength = originalHost.length
+            val host: String = try {
+                // Use IDN for all host names, if the host is all ASCII, it returns unchanged.
+                // It comes with an added benefit of checking host length to be between 1 and 63 characters.
+                IDN.toASCII(originalHost, IDN.ALLOW_UNASSIGNED)
+                // toASCII can throw IndexOutOfBoundsException when the domain name is longer than
+                // 256 characters, instead of the documented IllegalArgumentException.
+            } catch (e: IllegalArgumentException) {
+                return false
+            } catch (e: IndexOutOfBoundsException) {
+                return false
+            }
+            val punycodeEncodedHostLength = host.length
+            if (punycodeEncodedHostLength == 0) {
+                return false
+            }
+            // The punycodeEncoded host length might be different now, offset that length from the URL.
+            val urlLength = originalUrlLength + punycodeEncodedHostLength - originalHostLength
+            // Add the protocol to our length check, if there isn't one,
+            // to ensure it doesn't go over the limit.
+            val urlLengthWithProtocol =
+                urlLength + if (protocol == null) URL_GROUP_PROTOCOL_LENGTH else 0
+            return urlLengthWithProtocol <= MAX_URL_LENGTH
+        }
+    }
+}
diff --git a/core/ui/src/main/kotlin/com/twitter/twittertext/Regex.kt b/core/ui/src/main/kotlin/com/twitter/twittertext/Regex.kt
new file mode 100644
index 000000000..0d65b4f6b
--- /dev/null
+++ b/core/ui/src/main/kotlin/com/twitter/twittertext/Regex.kt
@@ -0,0 +1,296 @@
+// Copyright 2018 Twitter, Inc.
+// Licensed under the Apache License, Version 2.0
+// http://www.apache.org/licenses/LICENSE-2.0
+
+package com.twitter.twittertext
+
+import java.util.regex.Pattern
+
+object Regex {
+    private val URL_VALID_GTLD = "(?:(?:" +
+        join(TldLists.GTLDS) +
+        ")(?=[^a-z0-9@+-]|$))"
+    private val URL_VALID_CCTLD = "(?:(?:" +
+        join(TldLists.CTLDS) +
+        ")(?=[^a-z0-9@+-]|$))"
+    private const val INVALID_CHARACTERS = "\\uFFFE" + // BOM
+        "\\uFEFF" + // BOM
+        "\\uFFFF" // Special
+    private const val DIRECTIONAL_CHARACTERS = "\\u061C" + // ARABIC LETTER MARK (ALM)
+        "\\u200E" + // LEFT-TO-RIGHT MARK (LRM)
+        "\\u200F" + // RIGHT-TO-LEFT MARK (RLM)
+        "\\u202A" + // LEFT-TO-RIGHT EMBEDDING (LRE)
+        "\\u202B" + // RIGHT-TO-LEFT EMBEDDING (RLE)
+        "\\u202C" + // POP DIRECTIONAL FORMATTING (PDF)
+        "\\u202D" + // LEFT-TO-RIGHT OVERRIDE (LRO)
+        "\\u202E" + // RIGHT-TO-LEFT OVERRIDE (RLO)
+        "\\u2066" + // LEFT-TO-RIGHT ISOLATE (LRI)
+        "\\u2067" + // RIGHT-TO-LEFT ISOLATE (RLI)
+        "\\u2068" + // FIRST STRONG ISOLATE (FSI)
+        "\\u2069" // POP DIRECTIONAL ISOLATE (PDI)
+    private const val UNICODE_SPACES = "[" +
+        "\\u0009-\\u000d" + //  # White_Space # Cc   [5] <control-0009>..<control-000D>
+        "\\u0020" + // White_Space # Zs       SPACE
+        "\\u0085" + // White_Space # Cc       <control-0085>
+        "\\u00a0" + // White_Space # Zs       NO-BREAK SPACE
+        "\\u1680" + // White_Space # Zs       OGHAM SPACE MARK
+        "\\u180E" + // White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
+        "\\u2000-\\u200a" + // # White_Space # Zs  [11] EN QUAD..HAIR SPACE
+        "\\u2028" + // White_Space # Zl       LINE SEPARATOR
+        "\\u2029" + // White_Space # Zp       PARAGRAPH SEPARATOR
+        "\\u202F" + // White_Space # Zs       NARROW NO-BREAK SPACE
+        "\\u205F" + // White_Space # Zs       MEDIUM MATHEMATICAL SPACE
+        "\\u3000" + // White_Space # Zs       IDEOGRAPHIC SPACE
+        "]"
+    private const val LATIN_ACCENTS_CHARS = // Latin-1
+        "\\u00c0-\\u00d6\\u00d8-\\u00f6\\u00f8-\\u00ff" + // Latin Extended A and B
+            "\\u0100-\\u024f" + // IPA Extensions
+            "\\u0253\\u0254\\u0256\\u0257\\u0259\\u025b\\u0263\\u0268\\u026f\\u0272\\u0289\\u028b" + // Hawaiian
+            "\\u02bb" + // Combining diacritics
+            "\\u0300-\\u036f" + // Latin Extended Additional (mostly for Vietnamese)
+            "\\u1e00-\\u1eff"
+    private const val CYRILLIC_CHARS = "\\u0400-\\u04ff"
+
+    // Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Java's \p{L}\p{M}
+    private const val HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
+        "\\u037f\\u0528-\\u052f\\u08a0-\\u08b2\\u08e4-\\u08ff\\u0978\\u0980\\u0c00\\u0c34\\u0c81" +
+        "\\u0d01\\u0ede\\u0edf\\u10c7\\u10cd\\u10fd-\\u10ff\\u16f1-\\u16f8\\u17b4\\u17b5\\u191d" +
+        "\\u191e\\u1ab0-\\u1abe\\u1bab-\\u1bad\\u1bba-\\u1bbf\\u1cf3-\\u1cf6\\u1cf8\\u1cf9" +
+        "\\u1de7-\\u1df5\\u2cf2\\u2cf3\\u2d27\\u2d2d\\u2d66\\u2d67\\u9fcc\\ua674-\\ua67b\\ua698" +
+        "-\\ua69d\\ua69f\\ua792-\\ua79f\\ua7aa-\\ua7ad\\ua7b0\\ua7b1\\ua7f7-\\ua7f9\\ua9e0-" +
+        "\\ua9ef\\ua9fa-\\ua9fe\\uaa7c-\\uaa7f\\uaae0-\\uaaef\\uaaf2-\\uaaf6\\uab30-\\uab5a" +
+        "\\uab5c-\\uab5f\\uab64\\uab65\\uf870-\\uf87f\\uf882\\uf884-\\uf89f\\uf8b8\\uf8c1-" +
+        "\\uf8d6\\ufa2e\\ufa2f\\ufe27-\\ufe2d\\ud800\\udee0\\ud800\\udf1f\\ud800\\udf50-\\ud800" +
+        "\\udf7a\\ud801\\udd00-\\ud801\\udd27\\ud801\\udd30-\\ud801\\udd63\\ud801\\ude00-\\ud801" +
+        "\\udf36\\ud801\\udf40-\\ud801\\udf55\\ud801\\udf60-\\ud801\\udf67\\ud802\\udc60-\\ud802" +
+        "\\udc76\\ud802\\udc80-\\ud802\\udc9e\\ud802\\udd80-\\ud802\\uddb7\\ud802\\uddbe\\ud802" +
+        "\\uddbf\\ud802\\ude80-\\ud802\\ude9c\\ud802\\udec0-\\ud802\\udec7\\ud802\\udec9-\\ud802" +
+        "\\udee6\\ud802\\udf80-\\ud802\\udf91\\ud804\\udc7f\\ud804\\udcd0-\\ud804\\udce8\\ud804" +
+        "\\udd00-\\ud804\\udd34\\ud804\\udd50-\\ud804\\udd73\\ud804\\udd76\\ud804\\udd80-\\ud804" +
+        "\\uddc4\\ud804\\uddda\\ud804\\ude00-\\ud804\\ude11\\ud804\\ude13-\\ud804\\ude37\\ud804" +
+        "\\udeb0-\\ud804\\udeea\\ud804\\udf01-\\ud804\\udf03\\ud804\\udf05-\\ud804\\udf0c\\ud804" +
+        "\\udf0f\\ud804\\udf10\\ud804\\udf13-\\ud804\\udf28\\ud804\\udf2a-\\ud804\\udf30\\ud804" +
+        "\\udf32\\ud804\\udf33\\ud804\\udf35-\\ud804\\udf39\\ud804\\udf3c-\\ud804\\udf44\\ud804" +
+        "\\udf47\\ud804\\udf48\\ud804\\udf4b-\\ud804\\udf4d\\ud804\\udf57\\ud804\\udf5d-\\ud804" +
+        "\\udf63\\ud804\\udf66-\\ud804\\udf6c\\ud804\\udf70-\\ud804\\udf74\\ud805\\udc80-\\ud805" +
+        "\\udcc5\\ud805\\udcc7\\ud805\\udd80-\\ud805\\uddb5\\ud805\\uddb8-\\ud805\\uddc0\\ud805" +
+        "\\ude00-\\ud805\\ude40\\ud805\\ude44\\ud805\\ude80-\\ud805\\udeb7\\ud806\\udca0-\\ud806" +
+        "\\udcdf\\ud806\\udcff\\ud806\\udec0-\\ud806\\udef8\\ud808\\udf6f-\\ud808\\udf98\\ud81a" +
+        "\\ude40-\\ud81a\\ude5e\\ud81a\\uded0-\\ud81a\\udeed\\ud81a\\udef0-\\ud81a\\udef4\\ud81a" +
+        "\\udf00-\\ud81a\\udf36\\ud81a\\udf40-\\ud81a\\udf43\\ud81a\\udf63-\\ud81a\\udf77\\ud81a" +
+        "\\udf7d-\\ud81a\\udf8f\\ud81b\\udf00-\\ud81b\\udf44\\ud81b\\udf50-\\ud81b\\udf7e\\ud81b" +
+        "\\udf8f-\\ud81b\\udf9f\\ud82f\\udc00-\\ud82f\\udc6a\\ud82f\\udc70-\\ud82f\\udc7c\\ud82f" +
+        "\\udc80-\\ud82f\\udc88\\ud82f\\udc90-\\ud82f\\udc99\\ud82f\\udc9d\\ud82f\\udc9e\\ud83a" +
+        "\\udc00-\\ud83a\\udcc4\\ud83a\\udcd0-\\ud83a\\udcd6\\ud83b\\ude00-\\ud83b\\ude03\\ud83b" +
+        "\\ude05-\\ud83b\\ude1f\\ud83b\\ude21\\ud83b\\ude22\\ud83b\\ude24\\ud83b\\ude27\\ud83b" +
+        "\\ude29-\\ud83b\\ude32\\ud83b\\ude34-\\ud83b\\ude37\\ud83b\\ude39\\ud83b\\ude3b\\ud83b" +
+        "\\ude42\\ud83b\\ude47\\ud83b\\ude49\\ud83b\\ude4b\\ud83b\\ude4d-\\ud83b\\ude4f\\ud83b" +
+        "\\ude51\\ud83b\\ude52\\ud83b\\ude54\\ud83b\\ude57\\ud83b\\ude59\\ud83b\\ude5b\\ud83b" +
+        "\\ude5d\\ud83b\\ude5f\\ud83b\\ude61\\ud83b\\ude62\\ud83b\\ude64\\ud83b\\ude67-\\ud83b" +
+        "\\ude6a\\ud83b\\ude6c-\\ud83b\\ude72\\ud83b\\ude74-\\ud83b\\ude77\\ud83b\\ude79-\\ud83b" +
+        "\\ude7c\\ud83b\\ude7e\\ud83b\\ude80-\\ud83b\\ude89\\ud83b\\ude8b-\\ud83b\\ude9b\\ud83b" +
+        "\\udea1-\\ud83b\\udea3\\ud83b\\udea5-\\ud83b\\udea9\\ud83b\\udeab-\\ud83b\\udebb"
+
+    // Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Java's \p{Nd}
+    private const val HASHTAG_NUMERALS = "\\p{Nd}" +
+        "\\u0de6-\\u0def\\ua9f0-\\ua9f9\\ud804\\udcf0-\\ud804\\udcf9\\ud804\\udd36-\\ud804" +
+        "\\udd3f\\ud804\\uddd0-\\ud804\\uddd9\\ud804\\udef0-\\ud804\\udef9\\ud805\\udcd0-\\ud805" +
+        "\\udcd9\\ud805\\ude50-\\ud805\\ude59\\ud805\\udec0-\\ud805\\udec9\\ud806\\udce0-\\ud806" +
+        "\\udce9\\ud81a\\ude60-\\ud81a\\ude69\\ud81a\\udf50-\\ud81a\\udf59"
+    private const val HASHTAG_SPECIAL_CHARS = "_" + // underscore
+        "\\u200c" + // ZERO WIDTH NON-JOINER (ZWNJ)
+        "\\u200d" + // ZERO WIDTH JOINER (ZWJ)
+        "\\ua67e" + // CYRILLIC KAVYKA
+        "\\u05be" + // HEBREW PUNCTUATION MAQAF
+        "\\u05f3" + // HEBREW PUNCTUATION GERESH
+        "\\u05f4" + // HEBREW PUNCTUATION GERSHAYIM
+        "\\uff5e" + // FULLWIDTH TILDE
+        "\\u301c" + // WAVE DASH
+        "\\u309b" + // KATAKANA-HIRAGANA VOICED SOUND MARK
+        "\\u309c" + // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+        "\\u30a0" + // KATAKANA-HIRAGANA DOUBLE HYPHEN
+        "\\u30fb" + // KATAKANA MIDDLE DOT
+        "\\u3003" + // DITTO MARK
+        "\\u0f0b" + // TIBETAN MARK INTERSYLLABIC TSHEG
+        "\\u0f0c" + // TIBETAN MARK DELIMITER TSHEG BSTAR
+        "\\u00b7" // MIDDLE DOT
+    private const val HASHTAG_LETTERS_NUMERALS =
+        HASHTAG_LETTERS_AND_MARKS + HASHTAG_NUMERALS + HASHTAG_SPECIAL_CHARS
+    private const val HASHTAG_LETTERS_SET = "[$HASHTAG_LETTERS_AND_MARKS]"
+    private const val HASHTAG_LETTERS_NUMERALS_SET = "[$HASHTAG_LETTERS_NUMERALS]"
+
+    /* URL related hash regex collection */
+    private const val URL_VALID_PRECEDING_CHARS =
+        "(?:[^a-z0-9@＠$#＃$INVALID_CHARACTERS]|[$DIRECTIONAL_CHARACTERS]|^)"
+    private const val URL_VALID_CHARS = "[a-z0-9$LATIN_ACCENTS_CHARS]"
+    private const val URL_VALID_SUBDOMAIN =
+        "(?>(?:$URL_VALID_CHARS[$URL_VALID_CHARS\\-_]*)?$URL_VALID_CHARS\\.)"
+    private const val URL_VALID_DOMAIN_NAME =
+        "(?:(?:$URL_VALID_CHARS[$URL_VALID_CHARS\\-]*)?$URL_VALID_CHARS\\.)"
+    private const val PUNCTUATION_CHARS = "-_!\"#$%&'\\(\\)*+,./:;<=>?@\\[\\]^`\\{|}~"
+
+    // Any non-space, non-punctuation characters.
+    // \p{Z} = any kind of whitespace or invisible separator.
+    private const val URL_VALID_UNICODE_CHARS =
+        "[^$PUNCTUATION_CHARS\\s\\p{Z}\\p{InGeneralPunctuation}]"
+    private const val URL_VALID_UNICODE_DOMAIN_NAME =
+        "(?:(?:" + URL_VALID_UNICODE_CHARS + "[" + URL_VALID_UNICODE_CHARS + "\\-]*)?" +
+            URL_VALID_UNICODE_CHARS + "\\.)"
+    private const val URL_PUNYCODE = "(?:xn--[-0-9a-z]+)"
+    private val URL_VALID_DOMAIN = "(?:" + // optional sub-domain + domain + TLD
+        URL_VALID_SUBDOMAIN + "*" + URL_VALID_DOMAIN_NAME + // e.g. twitter.com, foo.co.jp ...
+        "(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + "|" + URL_PUNYCODE + ")" +
+        ")" +
+        "|(?:" + "(?<=https?://)" +
+        "(?:" +
+        "(?:" + URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + ")" + // protocol + domain + ccTLD
+        "|(?:" +
+        URL_VALID_UNICODE_DOMAIN_NAME + // protocol + unicode domain + TLD
+        "(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + ")" +
+        ")" +
+        ")" +
+        ")" +
+        "|(?:" + // domain + ccTLD + '/'
+        URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + "(?=/)" + // e.g. t.co/
+        ")"
+    private const val URL_VALID_PORT_NUMBER = "[0-9]++"
+    private const val URL_VALID_GENERAL_PATH_CHARS =
+        "[a-z0-9!\\*';:=\\+,.\\$/%#\\[\\]\\-\\u2013_~\\|&@" +
+            LATIN_ACCENTS_CHARS + CYRILLIC_CHARS + "]"
+
+    /**
+     * Allow URL paths to contain up to two nested levels of balanced parens
+     * 1. Used in Wikipedia URLs like /Primer_(film)
+     * 2. Used in IIS sessions like /S(dfd346)/
+     * 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
+     */
+    private const val URL_BALANCED_PARENS = "\\(" +
+        "(?:" +
+        URL_VALID_GENERAL_PATH_CHARS + "+" +
+        "|" + // allow one nested level of balanced parentheses
+        "(?:" +
+        URL_VALID_GENERAL_PATH_CHARS + "*" +
+        "\\(" +
+        URL_VALID_GENERAL_PATH_CHARS + "+" +
+        "\\)" +
+        URL_VALID_GENERAL_PATH_CHARS + "*" +
+        ")" +
+        ")" +
+        "\\)"
+
+    /**
+     * Valid end-of-path characters (so /foo. does not gobble the period).
+     * 2. Allow =&# for empty URL parameters and other URL-join artifacts
+     */
+    private const val URL_VALID_PATH_ENDING_CHARS =
+        "[a-z0-9=_#/\\-\\+" + LATIN_ACCENTS_CHARS + CYRILLIC_CHARS + "]|(?:" +
+            URL_BALANCED_PARENS + ")"
+    private const val URL_VALID_PATH = "(?:" +
+        "(?:" +
+        URL_VALID_GENERAL_PATH_CHARS + "*" +
+        "(?:" + URL_BALANCED_PARENS + URL_VALID_GENERAL_PATH_CHARS + "*)*" +
+        URL_VALID_PATH_ENDING_CHARS +
+        ")|(?:@" + URL_VALID_GENERAL_PATH_CHARS + "+/)" +
+        ")"
+    private const val URL_VALID_URL_QUERY_CHARS =
+        "[a-z0-9!?\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~\\|@]"
+    private const val URL_VALID_URL_QUERY_ENDING_CHARS = "[a-z0-9\\-_&=#/]"
+    private val VALID_URL_PATTERN_STRING = "(" + //  $1 total match
+        "(" + URL_VALID_PRECEDING_CHARS + ")" + //  $2 Preceding character
+        "(" + //  $3 URL
+        "(https?://)?" + //  $4 Protocol (optional)
+        "(" + URL_VALID_DOMAIN + ")" + //  $5 Domain(s)
+        "(?::(" + URL_VALID_PORT_NUMBER + "))?" + //  $6 Port number (optional)
+        "(/" +
+        URL_VALID_PATH + "*+" +
+        ")?" + //  $7 URL Path and anchor
+        "(\\?" + URL_VALID_URL_QUERY_CHARS + "*" + //  $8 Query String
+        URL_VALID_URL_QUERY_ENDING_CHARS + ")?" +
+        ")" +
+        ")"
+    private const val AT_SIGNS_CHARS = "@\uFF20"
+
+    /* Begin public constants */
+    private val INVALID_CHARACTERS_PATTERN: Pattern
+    val VALID_HASHTAG: Pattern
+    const val VALID_HASHTAG_GROUP_TAG = 1
+    val INVALID_HASHTAG_MATCH_END: Pattern
+    private val RTL_CHARACTERS: Pattern
+    private val AT_SIGNS: Pattern
+    val VALID_MENTION_OR_LIST: Pattern
+    const val VALID_MENTION_OR_LIST_GROUP_USERNAME = 3
+    const val VALID_MENTION_OR_LIST_GROUP_LIST = 4
+    private val VALID_REPLY: Pattern
+    val INVALID_MENTION_MATCH_END: Pattern
+
+    /**
+     * Regex to extract URL (it also includes the text preceding the url).
+     *
+     * This regex does not reflect its name and [Regex.VALID_URL_GROUP_URL] match
+     * should be checked in order to match a valid url. This is not ideal, but the behavior is
+     * being kept to ensure backwards compatibility. Ideally this regex should be
+     * implemented with a negative lookbehind as opposed to a negated character class
+     * but lack of JS support increases maint overhead if the logic is different by
+     * platform.
+     */
+    val VALID_URL: Pattern
+    const val VALID_URL_GROUP_BEFORE = 2
+    const val VALID_URL_GROUP_URL = 3
+    const val VALID_URL_GROUP_PROTOCOL = 4
+    const val VALID_URL_GROUP_DOMAIN = 5
+    val INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN: Pattern
+    private val VALID_DOMAIN: Pattern
+
+    // Mastodon hashtag regular expressions. Different from the Twitter ones, in particular,
+    // they can be preceded by e.g., Hirigana characters without an intervening space.
+    // See HASHTAG_RE in https://github.com/mastodon/mastodon/blob/main/app/models/tag.rb
+    // (which is VALID_HASHTAG in this file).
+    private const val HASHTAG_SEPARATORS = "_\u00B7\u30FB\u200c"
+    private const val HASHTAG_FIRST_SEQUENCE_CHUNK_ONE = """[\w_][\w$HASHTAG_SEPARATORS]*[\p{Alpha}$HASHTAG_SEPARATORS]"""
+    private const val HASHTAG_FIRST_SEQUENCE_CHUNK_TWO = """[\w$HASHTAG_SEPARATORS]*[\w_]"""
+    private const val HASHTAG_FIRST_SEQUENCE = "($HASHTAG_FIRST_SEQUENCE_CHUNK_ONE$HASHTAG_FIRST_SEQUENCE_CHUNK_TWO)"
+    private const val HASHTAG_LAST_SEQUENCE = """([\w_]*[\p{L}][\w_]*)"""
+    private const val HASHTAG_NAME_PAT: String = "$HASHTAG_FIRST_SEQUENCE|$HASHTAG_LAST_SEQUENCE"
+
+    // initializing in a static synchronized block,
+    // there appears to be thread safety issues with Pattern.compile in android
+    init {
+        synchronized(Regex::class.java) {
+            INVALID_CHARACTERS_PATTERN = Pattern.compile(".*[$INVALID_CHARACTERS].*")
+            VALID_HASHTAG = Pattern.compile(
+                "(?<![=/)a-zA-Z0-9_])[#＃]($HASHTAG_NAME_PAT)",
+                Pattern.CASE_INSENSITIVE,
+            )
+            INVALID_HASHTAG_MATCH_END = Pattern.compile("^(?:[#＃]|://)")
+            RTL_CHARACTERS =
+                Pattern.compile("[\u0600-\u06FF\u0750-\u077F\u0590-\u05FF\uFE70-\uFEFF]")
+            AT_SIGNS = Pattern.compile("[$AT_SIGNS_CHARS]")
+            VALID_MENTION_OR_LIST = Pattern.compile(
+                "([^a-z0-9_!#$%&*=$AT_SIGNS_CHARS]|^|(?:^|[^a-z0-9_+~.-])RT:?)($AT_SIGNS+)([a-z0-9_]+)($AT_SIGNS[a-z][a-z0-9_\\-.]+)?",
+                Pattern.CASE_INSENSITIVE,
+            )
+            VALID_REPLY = Pattern.compile(
+                "^(?:" + UNICODE_SPACES + "|" + DIRECTIONAL_CHARACTERS + ")*" +
+                    AT_SIGNS + "([a-z0-9_]{1,20})",
+                Pattern.CASE_INSENSITIVE,
+            )
+            INVALID_MENTION_MATCH_END = Pattern.compile("^(?:[$AT_SIGNS_CHARS$LATIN_ACCENTS_CHARS]|://)")
+            INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN = Pattern.compile("[-_./]$")
+            VALID_URL = Pattern.compile(VALID_URL_PATTERN_STRING, Pattern.CASE_INSENSITIVE)
+            VALID_DOMAIN = Pattern.compile(URL_VALID_DOMAIN, Pattern.CASE_INSENSITIVE)
+        }
+    }
+
+    private fun join(col: Collection<*>): String {
+        val sb = StringBuilder()
+        val iter = col.iterator()
+        if (iter.hasNext()) {
+            sb.append(iter.next().toString())
+        }
+        while (iter.hasNext()) {
+            sb.append("|")
+            sb.append(iter.next().toString())
+        }
+        return sb.toString()
+    }
+}
diff --git a/core/ui/src/main/kotlin/com/twitter/twittertext/TldLists.kt b/core/ui/src/main/kotlin/com/twitter/twittertext/TldLists.kt
new file mode 100644
index 000000000..dbb01b783
--- /dev/null
+++ b/core/ui/src/main/kotlin/com/twitter/twittertext/TldLists.kt
@@ -0,0 +1,1586 @@
+// Copyright 2018 Twitter, Inc.
+// Licensed under the Apache License, Version 2.0
+// http://www.apache.org/licenses/LICENSE-2.0
+// Auto-generated by conformance/Rakefile
+package com.twitter.twittertext
+
+object TldLists {
+    val GTLDS: List<String> = mutableListOf(
+        "삼성",
+        "닷컴",
+        "닷넷",
+        "香格里拉",
+        "餐厅",
+        "食品",
+        "飞利浦",
+        "電訊盈科",
+        "集团",
+        "通販",
+        "购物",
+        "谷歌",
+        "诺基亚",
+        "联通",
+        "网络",
+        "网站",
+        "网店",
+        "网址",
+        "组织机构",
+        "移动",
+        "珠宝",
+        "点看",
+        "游戏",
+        "淡马锡",
+        "机构",
+        "書籍",
+        "时尚",
+        "新闻",
+        "政府",
+        "政务",
+        "招聘",
+        "手表",
+        "手机",
+        "我爱你",
+        "慈善",
+        "微博",
+        "广东",
+        "工行",
+        "家電",
+        "娱乐",
+        "天主教",
+        "大拿",
+        "大众汽车",
+        "在线",
+        "嘉里大酒店",
+        "嘉里",
+        "商标",
+        "商店",
+        "商城",
+        "公益",
+        "公司",
+        "八卦",
+        "健康",
+        "信息",
+        "佛山",
+        "企业",
+        "中文网",
+        "中信",
+        "世界",
+        "ポイント",
+        "ファッション",
+        "セール",
+        "ストア",
+        "コム",
+        "グーグル",
+        "クラウド",
+        "みんな",
+        "คอม",
+        "संगठन",
+        "नेट",
+        "कॉम",
+        "همراه",
+        "موقع",
+        "موبايلي",
+        "كوم",
+        "كاثوليك",
+        "عرب",
+        "شبكة",
+        "بيتك",
+        "بازار",
+        "العليان",
+        "ارامكو",
+        "اتصالات",
+        "ابوظبي",
+        "קום",
+        "сайт",
+        "рус",
+        "орг",
+        "онлайн",
+        "москва",
+        "ком",
+        "католик",
+        "дети",
+        "zuerich",
+        "zone",
+        "zippo",
+        "zip",
+        "zero",
+        "zara",
+        "zappos",
+        "yun",
+        "youtube",
+        "you",
+        "yokohama",
+        "yoga",
+        "yodobashi",
+        "yandex",
+        "yamaxun",
+        "yahoo",
+        "yachts",
+        "xyz",
+        "xxx",
+        "xperia",
+        "xin",
+        "xihuan",
+        "xfinity",
+        "xerox",
+        "xbox",
+        "wtf",
+        "wtc",
+        "wow",
+        "world",
+        "works",
+        "work",
+        "woodside",
+        "wolterskluwer",
+        "wme",
+        "winners",
+        "wine",
+        "windows",
+        "win",
+        "williamhill",
+        "wiki",
+        "wien",
+        "whoswho",
+        "weir",
+        "weibo",
+        "wedding",
+        "wed",
+        "website",
+        "weber",
+        "webcam",
+        "weatherchannel",
+        "weather",
+        "watches",
+        "watch",
+        "warman",
+        "wanggou",
+        "wang",
+        "walter",
+        "walmart",
+        "wales",
+        "vuelos",
+        "voyage",
+        "voto",
+        "voting",
+        "vote",
+        "volvo",
+        "volkswagen",
+        "vodka",
+        "vlaanderen",
+        "vivo",
+        "viva",
+        "vistaprint",
+        "vista",
+        "vision",
+        "visa",
+        "virgin",
+        "vip",
+        "vin",
+        "villas",
+        "viking",
+        "vig",
+        "video",
+        "viajes",
+        "vet",
+        "versicherung",
+        "vermögensberatung",
+        "vermögensberater",
+        "verisign",
+        "ventures",
+        "vegas",
+        "vanguard",
+        "vana",
+        "vacations",
+        "ups",
+        "uol",
+        "uno",
+        "university",
+        "unicom",
+        "uconnect",
+        "ubs",
+        "ubank",
+        "tvs",
+        "tushu",
+        "tunes",
+        "tui",
+        "tube",
+        "trv",
+        "trust",
+        "travelersinsurance",
+        "travelers",
+        "travelchannel",
+        "travel",
+        "training",
+        "trading",
+        "trade",
+        "toys",
+        "toyota",
+        "town",
+        "tours",
+        "total",
+        "toshiba",
+        "toray",
+        "top",
+        "tools",
+        "tokyo",
+        "today",
+        "tmall",
+        "tkmaxx",
+        "tjx",
+        "tjmaxx",
+        "tirol",
+        "tires",
+        "tips",
+        "tiffany",
+        "tienda",
+        "tickets",
+        "tiaa",
+        "theatre",
+        "theater",
+        "thd",
+        "teva",
+        "tennis",
+        "temasek",
+        "telefonica",
+        "telecity",
+        "tel",
+        "technology",
+        "tech",
+        "team",
+        "tdk",
+        "tci",
+        "taxi",
+        "tax",
+        "tattoo",
+        "tatar",
+        "tatamotors",
+        "target",
+        "taobao",
+        "talk",
+        "taipei",
+        "tab",
+        "systems",
+        "symantec",
+        "sydney",
+        "swiss",
+        "swiftcover",
+        "swatch",
+        "suzuki",
+        "surgery",
+        "surf",
+        "support",
+        "supply",
+        "supplies",
+        "sucks",
+        "style",
+        "study",
+        "studio",
+        "stream",
+        "store",
+        "storage",
+        "stockholm",
+        "stcgroup",
+        "stc",
+        "statoil",
+        "statefarm",
+        "statebank",
+        "starhub",
+        "star",
+        "staples",
+        "stada",
+        "srt",
+        "srl",
+        "spreadbetting",
+        "spot",
+        "sport",
+        "spiegel",
+        "space",
+        "soy",
+        "sony",
+        "song",
+        "solutions",
+        "solar",
+        "sohu",
+        "software",
+        "softbank",
+        "social",
+        "soccer",
+        "sncf",
+        "smile",
+        "smart",
+        "sling",
+        "skype",
+        "sky",
+        "skin",
+        "ski",
+        "site",
+        "singles",
+        "sina",
+        "silk",
+        "shriram",
+        "showtime",
+        "show",
+        "shouji",
+        "shopping",
+        "shop",
+        "shoes",
+        "shiksha",
+        "shia",
+        "shell",
+        "shaw",
+        "sharp",
+        "shangrila",
+        "sfr",
+        "sexy",
+        "sex",
+        "sew",
+        "seven",
+        "ses",
+        "services",
+        "sener",
+        "select",
+        "seek",
+        "security",
+        "secure",
+        "seat",
+        "search",
+        "scot",
+        "scor",
+        "scjohnson",
+        "science",
+        "schwarz",
+        "schule",
+        "school",
+        "scholarships",
+        "schmidt",
+        "schaeffler",
+        "scb",
+        "sca",
+        "sbs",
+        "sbi",
+        "saxo",
+        "save",
+        "sas",
+        "sarl",
+        "sapo",
+        "sap",
+        "sanofi",
+        "sandvikcoromant",
+        "sandvik",
+        "samsung",
+        "samsclub",
+        "salon",
+        "sale",
+        "sakura",
+        "safety",
+        "safe",
+        "saarland",
+        "ryukyu",
+        "rwe",
+        "run",
+        "ruhr",
+        "rugby",
+        "rsvp",
+        "room",
+        "rogers",
+        "rodeo",
+        "rocks",
+        "rocher",
+        "rmit",
+        "rip",
+        "rio",
+        "ril",
+        "rightathome",
+        "ricoh",
+        "richardli",
+        "rich",
+        "rexroth",
+        "reviews",
+        "review",
+        "restaurant",
+        "rest",
+        "republican",
+        "report",
+        "repair",
+        "rentals",
+        "rent",
+        "ren",
+        "reliance",
+        "reit",
+        "reisen",
+        "reise",
+        "rehab",
+        "redumbrella",
+        "redstone",
+        "red",
+        "recipes",
+        "realty",
+        "realtor",
+        "realestate",
+        "read",
+        "raid",
+        "radio",
+        "racing",
+        "qvc",
+        "quest",
+        "quebec",
+        "qpon",
+        "pwc",
+        "pub",
+        "prudential",
+        "pru",
+        "protection",
+        "property",
+        "properties",
+        "promo",
+        "progressive",
+        "prof",
+        "productions",
+        "prod",
+        "pro",
+        "prime",
+        "press",
+        "praxi",
+        "pramerica",
+        "post",
+        "porn",
+        "politie",
+        "poker",
+        "pohl",
+        "pnc",
+        "plus",
+        "plumbing",
+        "playstation",
+        "play",
+        "place",
+        "pizza",
+        "pioneer",
+        "pink",
+        "ping",
+        "pin",
+        "pid",
+        "pictures",
+        "pictet",
+        "pics",
+        "piaget",
+        "physio",
+        "photos",
+        "photography",
+        "photo",
+        "phone",
+        "philips",
+        "phd",
+        "pharmacy",
+        "pfizer",
+        "pet",
+        "pccw",
+        "pay",
+        "passagens",
+        "party",
+        "parts",
+        "partners",
+        "pars",
+        "paris",
+        "panerai",
+        "panasonic",
+        "pamperedchef",
+        "page",
+        "ovh",
+        "ott",
+        "otsuka",
+        "osaka",
+        "origins",
+        "orientexpress",
+        "organic",
+        "org",
+        "orange",
+        "oracle",
+        "open",
+        "ooo",
+        "onyourside",
+        "online",
+        "onl",
+        "ong",
+        "one",
+        "omega",
+        "ollo",
+        "oldnavy",
+        "olayangroup",
+        "olayan",
+        "okinawa",
+        "office",
+        "off",
+        "observer",
+        "obi",
+        "nyc",
+        "ntt",
+        "nrw",
+        "nra",
+        "nowtv",
+        "nowruz",
+        "now",
+        "norton",
+        "northwesternmutual",
+        "nokia",
+        "nissay",
+        "nissan",
+        "ninja",
+        "nikon",
+        "nike",
+        "nico",
+        "nhk",
+        "ngo",
+        "nfl",
+        "nexus",
+        "nextdirect",
+        "next",
+        "news",
+        "newholland",
+        "new",
+        "neustar",
+        "network",
+        "netflix",
+        "netbank",
+        "net",
+        "nec",
+        "nba",
+        "navy",
+        "natura",
+        "nationwide",
+        "name",
+        "nagoya",
+        "nadex",
+        "nab",
+        "mutuelle",
+        "mutual",
+        "museum",
+        "mtr",
+        "mtpc",
+        "mtn",
+        "msd",
+        "movistar",
+        "movie",
+        "mov",
+        "motorcycles",
+        "moto",
+        "moscow",
+        "mortgage",
+        "mormon",
+        "mopar",
+        "montblanc",
+        "monster",
+        "money",
+        "monash",
+        "mom",
+        "moi",
+        "moe",
+        "moda",
+        "mobily",
+        "mobile",
+        "mobi",
+        "mma",
+        "mls",
+        "mlb",
+        "mitsubishi",
+        "mit",
+        "mint",
+        "mini",
+        "mil",
+        "microsoft",
+        "miami",
+        "metlife",
+        "merckmsd",
+        "meo",
+        "menu",
+        "men",
+        "memorial",
+        "meme",
+        "melbourne",
+        "meet",
+        "media",
+        "med",
+        "mckinsey",
+        "mcdonalds",
+        "mcd",
+        "mba",
+        "mattel",
+        "maserati",
+        "marshalls",
+        "marriott",
+        "markets",
+        "marketing",
+        "market",
+        "map",
+        "mango",
+        "management",
+        "man",
+        "makeup",
+        "maison",
+        "maif",
+        "madrid",
+        "macys",
+        "luxury",
+        "luxe",
+        "lupin",
+        "lundbeck",
+        "ltda",
+        "ltd",
+        "lplfinancial",
+        "lpl",
+        "love",
+        "lotto",
+        "lotte",
+        "london",
+        "lol",
+        "loft",
+        "locus",
+        "locker",
+        "loans",
+        "loan",
+        "llp",
+        "llc",
+        "lixil",
+        "living",
+        "live",
+        "lipsy",
+        "link",
+        "linde",
+        "lincoln",
+        "limo",
+        "limited",
+        "lilly",
+        "like",
+        "lighting",
+        "lifestyle",
+        "lifeinsurance",
+        "life",
+        "lidl",
+        "liaison",
+        "lgbt",
+        "lexus",
+        "lego",
+        "legal",
+        "lefrak",
+        "leclerc",
+        "lease",
+        "lds",
+        "lawyer",
+        "law",
+        "latrobe",
+        "latino",
+        "lat",
+        "lasalle",
+        "lanxess",
+        "landrover",
+        "land",
+        "lancome",
+        "lancia",
+        "lancaster",
+        "lamer",
+        "lamborghini",
+        "ladbrokes",
+        "lacaixa",
+        "kyoto",
+        "kuokgroup",
+        "kred",
+        "krd",
+        "kpn",
+        "kpmg",
+        "kosher",
+        "komatsu",
+        "koeln",
+        "kiwi",
+        "kitchen",
+        "kindle",
+        "kinder",
+        "kim",
+        "kia",
+        "kfh",
+        "kerryproperties",
+        "kerrylogistics",
+        "kerryhotels",
+        "kddi",
+        "kaufen",
+        "juniper",
+        "juegos",
+        "jprs",
+        "jpmorgan",
+        "joy",
+        "jot",
+        "joburg",
+        "jobs",
+        "jnj",
+        "jmp",
+        "jll",
+        "jlc",
+        "jio",
+        "jewelry",
+        "jetzt",
+        "jeep",
+        "jcp",
+        "jcb",
+        "java",
+        "jaguar",
+        "iwc",
+        "iveco",
+        "itv",
+        "itau",
+        "istanbul",
+        "ist",
+        "ismaili",
+        "iselect",
+        "irish",
+        "ipiranga",
+        "investments",
+        "intuit",
+        "international",
+        "intel",
+        "int",
+        "insure",
+        "insurance",
+        "institute",
+        "ink",
+        "ing",
+        "info",
+        "infiniti",
+        "industries",
+        "inc",
+        "immobilien",
+        "immo",
+        "imdb",
+        "imamat",
+        "ikano",
+        "iinet",
+        "ifm",
+        "ieee",
+        "icu",
+        "ice",
+        "icbc",
+        "ibm",
+        "hyundai",
+        "hyatt",
+        "hughes",
+        "htc",
+        "hsbc",
+        "how",
+        "house",
+        "hotmail",
+        "hotels",
+        "hoteles",
+        "hot",
+        "hosting",
+        "host",
+        "hospital",
+        "horse",
+        "honeywell",
+        "honda",
+        "homesense",
+        "homes",
+        "homegoods",
+        "homedepot",
+        "holiday",
+        "holdings",
+        "hockey",
+        "hkt",
+        "hiv",
+        "hitachi",
+        "hisamitsu",
+        "hiphop",
+        "hgtv",
+        "hermes",
+        "here",
+        "helsinki",
+        "help",
+        "healthcare",
+        "health",
+        "hdfcbank",
+        "hdfc",
+        "hbo",
+        "haus",
+        "hangout",
+        "hamburg",
+        "hair",
+        "guru",
+        "guitars",
+        "guide",
+        "guge",
+        "gucci",
+        "guardian",
+        "group",
+        "grocery",
+        "gripe",
+        "green",
+        "gratis",
+        "graphics",
+        "grainger",
+        "gov",
+        "got",
+        "gop",
+        "google",
+        "goog",
+        "goodyear",
+        "goodhands",
+        "goo",
+        "golf",
+        "goldpoint",
+        "gold",
+        "godaddy",
+        "gmx",
+        "gmo",
+        "gmbh",
+        "gmail",
+        "globo",
+        "global",
+        "gle",
+        "glass",
+        "glade",
+        "giving",
+        "gives",
+        "gifts",
+        "gift",
+        "ggee",
+        "george",
+        "genting",
+        "gent",
+        "gea",
+        "gdn",
+        "gbiz",
+        "gay",
+        "garden",
+        "gap",
+        "games",
+        "game",
+        "gallup",
+        "gallo",
+        "gallery",
+        "gal",
+        "fyi",
+        "futbol",
+        "furniture",
+        "fund",
+        "fun",
+        "fujixerox",
+        "fujitsu",
+        "ftr",
+        "frontier",
+        "frontdoor",
+        "frogans",
+        "frl",
+        "fresenius",
+        "free",
+        "fox",
+        "foundation",
+        "forum",
+        "forsale",
+        "forex",
+        "ford",
+        "football",
+        "foodnetwork",
+        "food",
+        "foo",
+        "fly",
+        "flsmidth",
+        "flowers",
+        "florist",
+        "flir",
+        "flights",
+        "flickr",
+        "fitness",
+        "fit",
+        "fishing",
+        "fish",
+        "firmdale",
+        "firestone",
+        "fire",
+        "financial",
+        "finance",
+        "final",
+        "film",
+        "fido",
+        "fidelity",
+        "fiat",
+        "ferrero",
+        "ferrari",
+        "feedback",
+        "fedex",
+        "fast",
+        "fashion",
+        "farmers",
+        "farm",
+        "fans",
+        "fan",
+        "family",
+        "faith",
+        "fairwinds",
+        "fail",
+        "fage",
+        "extraspace",
+        "express",
+        "exposed",
+        "expert",
+        "exchange",
+        "everbank",
+        "events",
+        "eus",
+        "eurovision",
+        "etisalat",
+        "esurance",
+        "estate",
+        "esq",
+        "erni",
+        "ericsson",
+        "equipment",
+        "epson",
+        "epost",
+        "enterprises",
+        "engineering",
+        "engineer",
+        "energy",
+        "emerck",
+        "email",
+        "education",
+        "edu",
+        "edeka",
+        "eco",
+        "eat",
+        "earth",
+        "dvr",
+        "dvag",
+        "durban",
+        "dupont",
+        "duns",
+        "dunlop",
+        "duck",
+        "dubai",
+        "dtv",
+        "drive",
+        "download",
+        "dot",
+        "doosan",
+        "domains",
+        "doha",
+        "dog",
+        "dodge",
+        "doctor",
+        "docs",
+        "dnp",
+        "diy",
+        "dish",
+        "discover",
+        "discount",
+        "directory",
+        "direct",
+        "digital",
+        "diet",
+        "diamonds",
+        "dhl",
+        "dev",
+        "design",
+        "desi",
+        "dentist",
+        "dental",
+        "democrat",
+        "delta",
+        "deloitte",
+        "dell",
+        "delivery",
+        "degree",
+        "deals",
+        "dealer",
+        "deal",
+        "dds",
+        "dclk",
+        "day",
+        "datsun",
+        "dating",
+        "date",
+        "data",
+        "dance",
+        "dad",
+        "dabur",
+        "cyou",
+        "cymru",
+        "cuisinella",
+        "csc",
+        "cruises",
+        "cruise",
+        "crs",
+        "crown",
+        "cricket",
+        "creditunion",
+        "creditcard",
+        "credit",
+        "cpa",
+        "courses",
+        "coupons",
+        "coupon",
+        "country",
+        "corsica",
+        "coop",
+        "cool",
+        "cookingchannel",
+        "cooking",
+        "contractors",
+        "contact",
+        "consulting",
+        "construction",
+        "condos",
+        "comsec",
+        "computer",
+        "compare",
+        "company",
+        "community",
+        "commbank",
+        "comcast",
+        "com",
+        "cologne",
+        "college",
+        "coffee",
+        "codes",
+        "coach",
+        "clubmed",
+        "club",
+        "cloud",
+        "clothing",
+        "clinique",
+        "clinic",
+        "click",
+        "cleaning",
+        "claims",
+        "cityeats",
+        "city",
+        "citic",
+        "citi",
+        "citadel",
+        "cisco",
+        "circle",
+        "cipriani",
+        "church",
+        "chrysler",
+        "chrome",
+        "christmas",
+        "chloe",
+        "chintai",
+        "cheap",
+        "chat",
+        "chase",
+        "charity",
+        "channel",
+        "chanel",
+        "cfd",
+        "cfa",
+        "cern",
+        "ceo",
+        "center",
+        "ceb",
+        "cbs",
+        "cbre",
+        "cbn",
+        "cba",
+        "catholic",
+        "catering",
+        "cat",
+        "casino",
+        "cash",
+        "caseih",
+        "case",
+        "casa",
+        "cartier",
+        "cars",
+        "careers",
+        "career",
+        "care",
+        "cards",
+        "caravan",
+        "car",
+        "capitalone",
+        "capital",
+        "capetown",
+        "canon",
+        "cancerresearch",
+        "camp",
+        "camera",
+        "cam",
+        "calvinklein",
+        "call",
+        "cal",
+        "cafe",
+        "cab",
+        "bzh",
+        "buzz",
+        "buy",
+        "business",
+        "builders",
+        "build",
+        "bugatti",
+        "budapest",
+        "brussels",
+        "brother",
+        "broker",
+        "broadway",
+        "bridgestone",
+        "bradesco",
+        "box",
+        "boutique",
+        "bot",
+        "boston",
+        "bostik",
+        "bosch",
+        "boots",
+        "booking",
+        "book",
+        "boo",
+        "bond",
+        "bom",
+        "bofa",
+        "boehringer",
+        "boats",
+        "bnpparibas",
+        "bnl",
+        "bmw",
+        "bms",
+        "blue",
+        "bloomberg",
+        "blog",
+        "blockbuster",
+        "blanco",
+        "blackfriday",
+        "black",
+        "biz",
+        "bio",
+        "bingo",
+        "bing",
+        "bike",
+        "bid",
+        "bible",
+        "bharti",
+        "bet",
+        "bestbuy",
+        "best",
+        "berlin",
+        "bentley",
+        "beer",
+        "beauty",
+        "beats",
+        "bcn",
+        "bcg",
+        "bbva",
+        "bbt",
+        "bbc",
+        "bayern",
+        "bauhaus",
+        "basketball",
+        "baseball",
+        "bargains",
+        "barefoot",
+        "barclays",
+        "barclaycard",
+        "barcelona",
+        "bar",
+        "bank",
+        "band",
+        "bananarepublic",
+        "banamex",
+        "baidu",
+        "baby",
+        "azure",
+        "axa",
+        "aws",
+        "avianca",
+        "autos",
+        "auto",
+        "author",
+        "auspost",
+        "audio",
+        "audible",
+        "audi",
+        "auction",
+        "attorney",
+        "athleta",
+        "associates",
+        "asia",
+        "asda",
+        "arte",
+        "art",
+        "arpa",
+        "army",
+        "archi",
+        "aramco",
+        "arab",
+        "aquarelle",
+        "apple",
+        "app",
+        "apartments",
+        "aol",
+        "anz",
+        "anquan",
+        "android",
+        "analytics",
+        "amsterdam",
+        "amica",
+        "amfam",
+        "amex",
+        "americanfamily",
+        "americanexpress",
+        "alstom",
+        "alsace",
+        "ally",
+        "allstate",
+        "allfinanz",
+        "alipay",
+        "alibaba",
+        "alfaromeo",
+        "akdn",
+        "airtel",
+        "airforce",
+        "airbus",
+        "aigo",
+        "aig",
+        "agency",
+        "agakhan",
+        "africa",
+        "afl",
+        "afamilycompany",
+        "aetna",
+        "aero",
+        "aeg",
+        "adult",
+        "ads",
+        "adac",
+        "actor",
+        "active",
+        "aco",
+        "accountants",
+        "accountant",
+        "accenture",
+        "academy",
+        "abudhabi",
+        "abogado",
+        "able",
+        "abc",
+        "abbvie",
+        "abbott",
+        "abb",
+        "abarth",
+        "aarp",
+        "aaa",
+        "onion",
+    )
+    val CTLDS: List<String> = mutableListOf(
+        "한국",
+        "香港",
+        "澳門",
+        "新加坡",
+        "台灣",
+        "台湾",
+        "中國",
+        "中国",
+        "გე",
+        "ລາວ",
+        "ไทย",
+        "ලංකා",
+        "ഭാരതം",
+        "ಭಾರತ",
+        "భారత్",
+        "சிங்கப்பூர்",
+        "இலங்கை",
+        "இந்தியா",
+        "ଭାରତ",
+        "ભારત",
+        "ਭਾਰਤ",
+        "ভাৰত",
+        "ভারত",
+        "বাংলা",
+        "भारोत",
+        "भारतम्",
+        "भारत",
+        "ڀارت",
+        "پاکستان",
+        "موريتانيا",
+        "مليسيا",
+        "مصر",
+        "قطر",
+        "فلسطين",
+        "عمان",
+        "عراق",
+        "سورية",
+        "سودان",
+        "تونس",
+        "بھارت",
+        "بارت",
+        "ایران",
+        "امارات",
+        "المغرب",
+        "السعودية",
+        "الجزائر",
+        "البحرين",
+        "الاردن",
+        "հայ",
+        "қаз",
+        "укр",
+        "срб",
+        "рф",
+        "мон",
+        "мкд",
+        "ею",
+        "бел",
+        "бг",
+        "ευ",
+        "ελ",
+        "zw",
+        "zm",
+        "za",
+        "yt",
+        "ye",
+        "ws",
+        "wf",
+        "vu",
+        "vn",
+        "vi",
+        "vg",
+        "ve",
+        "vc",
+        "va",
+        "uz",
+        "uy",
+        "us",
+        "um",
+        "uk",
+        "ug",
+        "ua",
+        "tz",
+        "tw",
+        "tv",
+        "tt",
+        "tr",
+        "tp",
+        "to",
+        "tn",
+        "tm",
+        "tl",
+        "tk",
+        "tj",
+        "th",
+        "tg",
+        "tf",
+        "td",
+        "tc",
+        "sz",
+        "sy",
+        "sx",
+        "sv",
+        "su",
+        "st",
+        "ss",
+        "sr",
+        "so",
+        "sn",
+        "sm",
+        "sl",
+        "sk",
+        "sj",
+        "si",
+        "sh",
+        "sg",
+        "se",
+        "sd",
+        "sc",
+        "sb",
+        "sa",
+        "rw",
+        "ru",
+        "rs",
+        "ro",
+        "re",
+        "qa",
+        "py",
+        "pw",
+        "pt",
+        "ps",
+        "pr",
+        "pn",
+        "pm",
+        "pl",
+        "pk",
+        "ph",
+        "pg",
+        "pf",
+        "pe",
+        "pa",
+        "om",
+        "nz",
+        "nu",
+        "nr",
+        "np",
+        "no",
+        "nl",
+        "ni",
+        "ng",
+        "nf",
+        "ne",
+        "nc",
+        "na",
+        "mz",
+        "my",
+        "mx",
+        "mw",
+        "mv",
+        "mu",
+        "mt",
+        "ms",
+        "mr",
+        "mq",
+        "mp",
+        "mo",
+        "mn",
+        "mm",
+        "ml",
+        "mk",
+        "mh",
+        "mg",
+        "mf",
+        "me",
+        "md",
+        "mc",
+        "ma",
+        "ly",
+        "lv",
+        "lu",
+        "lt",
+        "ls",
+        "lr",
+        "lk",
+        "li",
+        "lc",
+        "lb",
+        "la",
+        "kz",
+        "ky",
+        "kw",
+        "kr",
+        "kp",
+        "kn",
+        "km",
+        "ki",
+        "kh",
+        "kg",
+        "ke",
+        "jp",
+        "jo",
+        "jm",
+        "je",
+        "it",
+        "is",
+        "ir",
+        "iq",
+        "io",
+        "in",
+        "im",
+        "il",
+        "ie",
+        "id",
+        "hu",
+        "ht",
+        "hr",
+        "hn",
+        "hm",
+        "hk",
+        "gy",
+        "gw",
+        "gu",
+        "gt",
+        "gs",
+        "gr",
+        "gq",
+        "gp",
+        "gn",
+        "gm",
+        "gl",
+        "gi",
+        "gh",
+        "gg",
+        "gf",
+        "ge",
+        "gd",
+        "gb",
+        "ga",
+        "fr",
+        "fo",
+        "fm",
+        "fk",
+        "fj",
+        "fi",
+        "eu",
+        "et",
+        "es",
+        "er",
+        "eh",
+        "eg",
+        "ee",
+        "ec",
+        "dz",
+        "do",
+        "dm",
+        "dk",
+        "dj",
+        "de",
+        "cz",
+        "cy",
+        "cx",
+        "cw",
+        "cv",
+        "cu",
+        "cr",
+        "co",
+        "cn",
+        "cm",
+        "cl",
+        "ck",
+        "ci",
+        "ch",
+        "cg",
+        "cf",
+        "cd",
+        "cc",
+        "ca",
+        "bz",
+        "by",
+        "bw",
+        "bv",
+        "bt",
+        "bs",
+        "br",
+        "bq",
+        "bo",
+        "bn",
+        "bm",
+        "bl",
+        "bj",
+        "bi",
+        "bh",
+        "bg",
+        "bf",
+        "be",
+        "bd",
+        "bb",
+        "ba",
+        "az",
+        "ax",
+        "aw",
+        "au",
+        "at",
+        "as",
+        "ar",
+        "aq",
+        "ao",
+        "an",
+        "am",
+        "al",
+        "ai",
+        "ag",
+        "af",
+        "ae",
+        "ad",
+        "ac",
+    )
+}
diff --git a/licenses/libraries/twittertext.json b/licenses/libraries/twittertext.json
new file mode 100644
index 000000000..59a71dcaa
--- /dev/null
+++ b/licenses/libraries/twittertext.json
@@ -0,0 +1,17 @@
+{
+    "uniqueId": "com.twitter:twittertext",
+    "developers": [
+        {
+            "name": "Twitter",
+            "organisationUrl": "https://www.twitter.com"
+        }
+    ],
+    "artifactVersion": "1.14.3",
+    "description": "Libraries and conformance tests to standardize parsing of Tweet text.",
+    "name": "Twitter text parsing",
+    "tag": "text",
+    "licenses": [
+        "Apache_2_0"
+    ],
+    "website": "https://github.com/twitter/twitter-text"
+}