improve span highlighting (#4480)
At first I thought simply changing the regex might help, but then I found more and more differences between Mastodon and Tusky, so I decided to reimplement the thing. I added 74 testcases that I all compared to Mastodon to make sure they are correct. On an Fairphone 4 the new implementation is faster, on an Samsung Galaxy Tab S3 slower. Testcases for the benchmark: ``` test of a status with #one hashtag http ``` ``` test http:// #hashtag https://connyduck.at/ http://example.org this is a #test and this is a @mention@test.com @test @test@test456@test.com ``` ``` @mention@test.social Just your ordinary mention with a hashtag #test ``` ``` @mention@test.social Just your ordinary mention with a url https://riot.im/app/#/room/#Tusky:matrix.org ``` FP4: ``` 11.159 ns 15 allocs Benchmark.new_1 119.701 ns 43 allocs Benchmark.new_2 21.895 ns 24 allocs Benchmark.new_3 87.512 ns 32 allocs Benchmark.new_4 16.592 ns 46 allocs Benchmark.old_1 134.381 ns 169 allocs Benchmark.old_2 28.355 ns 68 allocs Benchmark.old_3 45.221 ns 77 allocs Benchmark.old_4 ``` SGT3: ``` 43,785 ns 18 allocs Benchmark.new_1 446,074 ns 43 allocs Benchmark.new_2 78,802 ns 26 allocs Benchmark.new_3 315,478 ns 32 allocs Benchmark.new_4 42,186 ns 45 allocs Benchmark.old_1 353,570 ns 157 allocs Benchmark.old_2 72,376 ns 66 allocs Benchmark.old_3 122,985 ns 74 allocs Benchmark.old_4 ``` benchmark code is here: https://github.com/tuskyapp/tusky-span-benchmark closes https://github.com/tuskyapp/Tusky/issues/4425
This commit is contained in:
parent
e05ded2e32
commit
8aaca3bb2c
|
@ -90,6 +90,7 @@ import com.keylesspalace.tusky.settings.PrefKeys
|
|||
import com.keylesspalace.tusky.settings.PrefKeys.APP_THEME
|
||||
import com.keylesspalace.tusky.util.MentionSpan
|
||||
import com.keylesspalace.tusky.util.PickMediaFiles
|
||||
import com.keylesspalace.tusky.util.defaultFinders
|
||||
import com.keylesspalace.tusky.util.getInitialLanguages
|
||||
import com.keylesspalace.tusky.util.getLocaleList
|
||||
import com.keylesspalace.tusky.util.getMediaSize
|
||||
|
@ -144,6 +145,9 @@ class ComposeActivity :
|
|||
|
||||
private var photoUploadUri: Uri? = null
|
||||
|
||||
@VisibleForTesting
|
||||
var highlightFinders = defaultFinders
|
||||
|
||||
@VisibleForTesting
|
||||
var maximumTootCharacters = InstanceInfoRepository.DEFAULT_CHARACTER_LIMIT
|
||||
var charactersReservedPerUrl = InstanceInfoRepository.DEFAULT_CHARACTERS_RESERVED_PER_URL
|
||||
|
@ -468,9 +472,9 @@ class ComposeActivity :
|
|||
binding.composeEditField.setSelection(binding.composeEditField.length())
|
||||
|
||||
val mentionColour = binding.composeEditField.linkTextColors.defaultColor
|
||||
highlightSpans(binding.composeEditField.text, mentionColour)
|
||||
binding.composeEditField.text.highlightSpans(mentionColour, highlightFinders)
|
||||
binding.composeEditField.doAfterTextChanged { editable ->
|
||||
highlightSpans(editable!!, mentionColour)
|
||||
editable!!.highlightSpans(mentionColour, highlightFinders)
|
||||
updateVisibleCharactersLeft()
|
||||
viewModel.updateContent(editable.toString())
|
||||
}
|
||||
|
|
|
@ -9,105 +9,99 @@ import android.text.style.DynamicDrawableSpan
|
|||
import android.text.style.ForegroundColorSpan
|
||||
import android.text.style.ImageSpan
|
||||
import android.text.style.URLSpan
|
||||
import com.keylesspalace.tusky.util.twittertext.Regex
|
||||
import com.mikepenz.iconics.IconicsDrawable
|
||||
import com.mikepenz.iconics.typeface.library.googlematerial.GoogleMaterial
|
||||
import java.util.regex.Pattern
|
||||
import kotlin.math.max
|
||||
|
||||
/**
|
||||
* @see <a href="https://github.com/tootsuite/mastodon/blob/master/app/models/tag.rb">
|
||||
* Tag#HASHTAG_RE</a>.
|
||||
*/
|
||||
private const val HASHTAG_SEPARATORS = "_\\u00B7\\u200c"
|
||||
private const val UNICODE_WORD = "\\p{L}\\p{Mn}\\p{Nd}\\p{Nl}\\p{Pc}" // Ugh, java ( https://stackoverflow.com/questions/4304928/unicode-equivalents-for-w-and-b-in-java-regular-expressions )
|
||||
private const val TAG_REGEX = "(?:^|[^/)\\w])#(([${UNICODE_WORD}_][$UNICODE_WORD$HASHTAG_SEPARATORS]*[\\p{Alpha}$HASHTAG_SEPARATORS][$UNICODE_WORD$HASHTAG_SEPARATORS]*[${UNICODE_WORD}_])|([${UNICODE_WORD}_]*[\\p{Alpha}][${UNICODE_WORD}_]*))"
|
||||
private const val HASHTAG_SEPARATORS = "_\\u00B7\\u30FB\\u200c"
|
||||
internal const val TAG_PATTERN_STRING = "(?<![=/)\\p{Alnum}])(#(([\\w_][\\w$HASHTAG_SEPARATORS]*[\\p{Alpha}$HASHTAG_SEPARATORS][\\w$HASHTAG_SEPARATORS]*[\\w_])|([\\w_]*[\\p{Alpha}][\\w_]*)))"
|
||||
private val TAG_PATTERN = TAG_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE)
|
||||
|
||||
/**
|
||||
* @see <a href="https://github.com/tootsuite/mastodon/blob/master/app/models/account.rb">
|
||||
* Account#MENTION_RE</a>
|
||||
*/
|
||||
private const val USERNAME_REGEX = "[\\w]+([\\w\\.-]+[\\w]+)?"
|
||||
private const val MENTION_REGEX = "(?<=^|[^\\/$UNICODE_WORD])@(($USERNAME_REGEX)(?:@[$UNICODE_WORD\\.\\-]+[$UNICODE_WORD]+)?)"
|
||||
private const val USERNAME_PATTERN_STRING = "[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?"
|
||||
internal const val MENTION_PATTERN_STRING = "(?<![=/\\w])(@($USERNAME_PATTERN_STRING)(?:@[\\w.-]+[\\w]+)?)"
|
||||
private val MENTION_PATTERN = MENTION_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE)
|
||||
|
||||
private const val HTTP_URL_REGEX = "(?:(^|\\b)http://[^\\s]+)"
|
||||
private const val HTTPS_URL_REGEX = "(?:(^|\\b)https://[^\\s]+)"
|
||||
|
||||
/**
|
||||
* Dump of android.util.Patterns.WEB_URL
|
||||
*/
|
||||
private val STRICT_WEB_URL_PATTERN = Pattern.compile(
|
||||
"(((?:(?i:http|https|rtsp)://(?:(?:[a-zA-Z0-9\\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?(?:(([a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 ]]](?:[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 ]]_\\-]{0,61}[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 ]]]){0,1}\\.)+(xn\\-\\-[\\w\\-]{0,58}\\w|[a-zA-Z[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 ]]]{2,63})|((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9]))))(?:\\:\\d{1,5})?)([/\\?](?:(?:[a-zA-Z0-9[ -\uD7FF豈-\uFDCFﷰ-\uFFEF\uD800\uDC00-\uD83F\uDFFD\uD840\uDC00-\uD87F\uDFFD\uD880\uDC00-\uD8BF\uDFFD\uD8C0\uDC00-\uD8FF\uDFFD\uD900\uDC00-\uD93F\uDFFD\uD940\uDC00-\uD97F\uDFFD\uD980\uDC00-\uD9BF\uDFFD\uD9C0\uDC00-\uD9FF\uDFFD\uDA00\uDC00-\uDA3F\uDFFD\uDA40\uDC00-\uDA7F\uDFFD\uDA80\uDC00-\uDABF\uDFFD\uDAC0\uDC00-\uDAFF\uDFFD\uDB00\uDC00-\uDB3F\uDFFD\uDB44\uDC00-\uDB7F\uDFFD&&[^ [ - ]\u2028\u2029 ]];/\\?:@&=#~\\-\\.\\+!\\*'\\(\\),_\\\$])|(?:%[a-fA-F0-9]{2}))*)?(?:\\b|\$|^))"
|
||||
)
|
||||
private val VALID_URL_PATTERN = Regex.VALID_URL_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE)
|
||||
|
||||
private val spanClasses = listOf(ForegroundColorSpan::class.java, URLSpan::class.java)
|
||||
private val finders = mapOf(
|
||||
FoundMatchType.HTTP_URL to PatternFinder(':', HTTP_URL_REGEX, 5, Character::isWhitespace),
|
||||
FoundMatchType.HTTPS_URL to PatternFinder(':', HTTPS_URL_REGEX, 6, Character::isWhitespace),
|
||||
FoundMatchType.TAG to PatternFinder('#', TAG_REGEX, 1, ::isValidForTagPrefix),
|
||||
// TODO: We also need a proper validator for mentions
|
||||
FoundMatchType.MENTION to PatternFinder('@', MENTION_REGEX, 1, Character::isWhitespace)
|
||||
|
||||
// url must come first, it may contain the other patterns
|
||||
val defaultFinders = listOf(
|
||||
PatternFinder("http", FoundMatchType.HTTP_URL, VALID_URL_PATTERN),
|
||||
PatternFinder("#", FoundMatchType.TAG, TAG_PATTERN),
|
||||
PatternFinder("@", FoundMatchType.MENTION, MENTION_PATTERN)
|
||||
)
|
||||
|
||||
private enum class FoundMatchType {
|
||||
enum class FoundMatchType {
|
||||
HTTP_URL,
|
||||
HTTPS_URL,
|
||||
TAG,
|
||||
MENTION
|
||||
}
|
||||
|
||||
private class FindCharsResult {
|
||||
lateinit var matchType: FoundMatchType
|
||||
var start: Int = -1
|
||||
var end: Int = -1
|
||||
}
|
||||
|
||||
private class PatternFinder(
|
||||
val searchCharacter: Char,
|
||||
regex: String,
|
||||
val searchPrefixWidth: Int,
|
||||
val prefixValidator: (Int) -> Boolean
|
||||
) {
|
||||
val pattern: Pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE)
|
||||
}
|
||||
class PatternFinder(
|
||||
val searchString: String,
|
||||
val type: FoundMatchType,
|
||||
val pattern: Pattern
|
||||
)
|
||||
|
||||
/**
|
||||
* Takes text containing mentions and hashtags and urls and makes them the given colour.
|
||||
* @param finders The finders to use. This is here so they can be overridden from unit tests.
|
||||
*/
|
||||
fun highlightSpans(text: Spannable, colour: Int) {
|
||||
fun Spannable.highlightSpans(colour: Int, finders: List<PatternFinder> = defaultFinders) {
|
||||
// Strip all existing colour spans.
|
||||
for (spanClass in spanClasses) {
|
||||
clearSpans(text, spanClass)
|
||||
clearSpans(spanClass)
|
||||
}
|
||||
|
||||
// Colour the mentions and hashtags.
|
||||
val string = text.toString()
|
||||
val length = text.length
|
||||
var start = 0
|
||||
var end = 0
|
||||
while (end in 0 until length && start >= 0) {
|
||||
// Search for url first because it can contain the other characters
|
||||
val found = findPattern(string, end)
|
||||
start = found.start
|
||||
end = found.end
|
||||
if (start in 0 until end) {
|
||||
text.setSpan(
|
||||
getSpan(found.matchType, string, colour, start, end),
|
||||
start,
|
||||
end,
|
||||
Spanned.SPAN_INCLUSIVE_EXCLUSIVE
|
||||
)
|
||||
start += finders[found.matchType]!!.searchPrefixWidth
|
||||
for (finder in finders) {
|
||||
// before running the regular expression, check if there is even a chance of it finding something
|
||||
if (this.contains(finder.searchString)) {
|
||||
val matcher = finder.pattern.matcher(this)
|
||||
|
||||
while (matcher.find()) {
|
||||
// we found a match
|
||||
val start = matcher.start(1)
|
||||
|
||||
val end = matcher.end(1)
|
||||
|
||||
// only add a span if there is no other one yet (e.g. the #anchor part of an url might match as hashtag, but must be ignored)
|
||||
if (this.getSpans(start, end, URLSpan::class.java).isEmpty()) {
|
||||
this.setSpan(
|
||||
getSpan(finder.type, this, colour, start, end),
|
||||
start,
|
||||
end,
|
||||
Spanned.SPAN_INCLUSIVE_EXCLUSIVE
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun <T> Spannable.clearSpans(spanClass: Class<T>) {
|
||||
for (span in getSpans(0, length, spanClass)) {
|
||||
removeSpan(span)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces text of the form [iconics name] with their spanned counterparts (ImageSpan).
|
||||
*/
|
||||
fun addDrawables(text: CharSequence, color: Int, size: Int, context: Context): Spannable {
|
||||
val builder = SpannableStringBuilder(text)
|
||||
|
||||
val pattern = Pattern.compile("\\[iconics ([0-9a-z_]+)\\]")
|
||||
val pattern = Pattern.compile("\\[iconics ([0-9a-z_]+)]")
|
||||
val matcher = pattern.matcher(builder)
|
||||
while (matcher.find()) {
|
||||
val resourceName = matcher.group(1)
|
||||
|
@ -123,98 +117,16 @@ fun addDrawables(text: CharSequence, color: Int, size: Int, context: Context): S
|
|||
return builder
|
||||
}
|
||||
|
||||
private fun <T> clearSpans(text: Spannable, spanClass: Class<T>) {
|
||||
for (span in text.getSpans(0, text.length, spanClass)) {
|
||||
text.removeSpan(span)
|
||||
}
|
||||
}
|
||||
|
||||
private fun findPattern(string: String, fromIndex: Int): FindCharsResult {
|
||||
val result = FindCharsResult()
|
||||
for (i in fromIndex..string.lastIndex) {
|
||||
val c = string[i]
|
||||
for (matchType in FoundMatchType.entries) {
|
||||
val finder = finders[matchType]
|
||||
if (finder!!.searchCharacter == c &&
|
||||
(
|
||||
(i - fromIndex) < finder.searchPrefixWidth ||
|
||||
finder.prefixValidator(string.codePointAt(i - finder.searchPrefixWidth))
|
||||
)
|
||||
) {
|
||||
result.matchType = matchType
|
||||
result.start = max(0, i - finder.searchPrefixWidth)
|
||||
findEndOfPattern(string, result, finder.pattern)
|
||||
if (result.start + finder.searchPrefixWidth <= i + 1 && // The found result is actually triggered by the correct search character
|
||||
result.end >= result.start
|
||||
) { // ...and we actually found a valid result
|
||||
return result
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private fun findEndOfPattern(string: String, result: FindCharsResult, pattern: Pattern) {
|
||||
val matcher = pattern.matcher(string)
|
||||
if (matcher.find(result.start)) {
|
||||
// Once we have API level 26+, we can use named captures...
|
||||
val end = matcher.end()
|
||||
result.start = matcher.start()
|
||||
when (result.matchType) {
|
||||
FoundMatchType.TAG -> {
|
||||
if (isValidForTagPrefix(string.codePointAt(result.start))) {
|
||||
if (string[result.start] != '#' ||
|
||||
(string[result.start] == '#' && string[result.start + 1] == '#')
|
||||
) {
|
||||
++result.start
|
||||
}
|
||||
}
|
||||
}
|
||||
else -> {
|
||||
if (Character.isWhitespace(string.codePointAt(result.start))) {
|
||||
++result.start
|
||||
}
|
||||
}
|
||||
}
|
||||
when (result.matchType) {
|
||||
FoundMatchType.HTTP_URL, FoundMatchType.HTTPS_URL -> {
|
||||
// Preliminary url patterns are fast/permissive, now we'll do full validation
|
||||
if (STRICT_WEB_URL_PATTERN.matcher(string.substring(result.start, end)).matches()) {
|
||||
result.end = end
|
||||
}
|
||||
}
|
||||
else -> result.end = end
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun getSpan(
|
||||
matchType: FoundMatchType,
|
||||
string: String,
|
||||
string: CharSequence,
|
||||
colour: Int,
|
||||
start: Int,
|
||||
end: Int
|
||||
): CharacterStyle {
|
||||
return when (matchType) {
|
||||
FoundMatchType.HTTP_URL -> NoUnderlineURLSpan(string.substring(start, end))
|
||||
FoundMatchType.HTTPS_URL -> NoUnderlineURLSpan(string.substring(start, end))
|
||||
FoundMatchType.HTTP_URL, FoundMatchType.HTTPS_URL -> NoUnderlineURLSpan(string.substring(start, end))
|
||||
FoundMatchType.MENTION -> MentionSpan(string.substring(start, end))
|
||||
else -> ForegroundColorSpan(colour)
|
||||
}
|
||||
}
|
||||
|
||||
private fun isWordCharacters(codePoint: Int): Boolean {
|
||||
return (codePoint in 0x30..0x39) || // [0-9]
|
||||
(codePoint in 0x41..0x5a) || // [A-Z]
|
||||
(codePoint == 0x5f) || // _
|
||||
(codePoint in 0x61..0x7a) // [a-z]
|
||||
}
|
||||
|
||||
private fun isValidForTagPrefix(codePoint: Int): Boolean {
|
||||
return !(
|
||||
isWordCharacters(codePoint) || // \w
|
||||
(codePoint == 0x2f) || // /
|
||||
(codePoint == 0x29)
|
||||
) // )
|
||||
}
|
||||
|
|
|
@ -0,0 +1,348 @@
|
|||
// Copyright 2018 Twitter, Inc.
|
||||
// Licensed under the Apache License, Version 2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
// Tusky changed: slight adaptions for Mastodon compatibility
|
||||
|
||||
package com.keylesspalace.tusky.util.twittertext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.regex.Pattern;
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
public class Regex {
|
||||
|
||||
protected Regex() {
|
||||
}
|
||||
|
||||
private static final String URL_VALID_GTLD =
|
||||
"(?:(?:" +
|
||||
join(TldLists.GTLDS) +
|
||||
")(?=[^a-z0-9@+-]|$))";
|
||||
private static final String URL_VALID_CCTLD =
|
||||
"(?:(?:" +
|
||||
join(TldLists.CTLDS) +
|
||||
")(?=[^a-z0-9@+-]|$))";
|
||||
|
||||
private static final String INVALID_CHARACTERS =
|
||||
"\\uFFFE" + // BOM
|
||||
"\\uFEFF" + // BOM
|
||||
"\\uFFFF"; // Special
|
||||
|
||||
private static final String DIRECTIONAL_CHARACTERS =
|
||||
"\\u061C" + // ARABIC LETTER MARK (ALM)
|
||||
"\\u200E" + // LEFT-TO-RIGHT MARK (LRM)
|
||||
"\\u200F" + // RIGHT-TO-LEFT MARK (RLM)
|
||||
"\\u202A" + // LEFT-TO-RIGHT EMBEDDING (LRE)
|
||||
"\\u202B" + // RIGHT-TO-LEFT EMBEDDING (RLE)
|
||||
"\\u202C" + // POP DIRECTIONAL FORMATTING (PDF)
|
||||
"\\u202D" + // LEFT-TO-RIGHT OVERRIDE (LRO)
|
||||
"\\u202E" + // RIGHT-TO-LEFT OVERRIDE (RLO)
|
||||
"\\u2066" + // LEFT-TO-RIGHT ISOLATE (LRI)
|
||||
"\\u2067" + // RIGHT-TO-LEFT ISOLATE (RLI)
|
||||
"\\u2068" + // FIRST STRONG ISOLATE (FSI)
|
||||
"\\u2069"; // POP DIRECTIONAL ISOLATE (PDI)
|
||||
|
||||
|
||||
private static final String UNICODE_SPACES = "[" +
|
||||
"\\u0009-\\u000d" + // # White_Space # Cc [5] <control-0009>..<control-000D>
|
||||
"\\u0020" + // White_Space # Zs SPACE
|
||||
"\\u0085" + // White_Space # Cc <control-0085>
|
||||
"\\u00a0" + // White_Space # Zs NO-BREAK SPACE
|
||||
"\\u1680" + // White_Space # Zs OGHAM SPACE MARK
|
||||
"\\u180E" + // White_Space # Zs MONGOLIAN VOWEL SEPARATOR
|
||||
"\\u2000-\\u200a" + // # White_Space # Zs [11] EN QUAD..HAIR SPACE
|
||||
"\\u2028" + // White_Space # Zl LINE SEPARATOR
|
||||
"\\u2029" + // White_Space # Zp PARAGRAPH SEPARATOR
|
||||
"\\u202F" + // White_Space # Zs NARROW NO-BREAK SPACE
|
||||
"\\u205F" + // White_Space # Zs MEDIUM MATHEMATICAL SPACE
|
||||
"\\u3000" + // White_Space # Zs IDEOGRAPHIC SPACE
|
||||
"]";
|
||||
|
||||
private static final String LATIN_ACCENTS_CHARS =
|
||||
// Latin-1
|
||||
"\\u00c0-\\u00d6\\u00d8-\\u00f6\\u00f8-\\u00ff" +
|
||||
// Latin Extended A and B
|
||||
"\\u0100-\\u024f" +
|
||||
// IPA Extensions
|
||||
"\\u0253\\u0254\\u0256\\u0257\\u0259\\u025b\\u0263\\u0268\\u026f\\u0272\\u0289\\u028b" +
|
||||
// Hawaiian
|
||||
"\\u02bb" +
|
||||
// Combining diacritics
|
||||
"\\u0300-\\u036f" +
|
||||
// Latin Extended Additional (mostly for Vietnamese)
|
||||
"\\u1e00-\\u1eff";
|
||||
|
||||
private static final String CYRILLIC_CHARS = "\\u0400-\\u04ff";
|
||||
|
||||
// Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Java's \p{L}\p{M}
|
||||
private static final String HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
|
||||
"\\u037f\\u0528-\\u052f\\u08a0-\\u08b2\\u08e4-\\u08ff\\u0978\\u0980\\u0c00\\u0c34\\u0c81" +
|
||||
"\\u0d01\\u0ede\\u0edf\\u10c7\\u10cd\\u10fd-\\u10ff\\u16f1-\\u16f8\\u17b4\\u17b5\\u191d" +
|
||||
"\\u191e\\u1ab0-\\u1abe\\u1bab-\\u1bad\\u1bba-\\u1bbf\\u1cf3-\\u1cf6\\u1cf8\\u1cf9" +
|
||||
"\\u1de7-\\u1df5\\u2cf2\\u2cf3\\u2d27\\u2d2d\\u2d66\\u2d67\\u9fcc\\ua674-\\ua67b\\ua698" +
|
||||
"-\\ua69d\\ua69f\\ua792-\\ua79f\\ua7aa-\\ua7ad\\ua7b0\\ua7b1\\ua7f7-\\ua7f9\\ua9e0-" +
|
||||
"\\ua9ef\\ua9fa-\\ua9fe\\uaa7c-\\uaa7f\\uaae0-\\uaaef\\uaaf2-\\uaaf6\\uab30-\\uab5a" +
|
||||
"\\uab5c-\\uab5f\\uab64\\uab65\\uf870-\\uf87f\\uf882\\uf884-\\uf89f\\uf8b8\\uf8c1-" +
|
||||
"\\uf8d6\\ufa2e\\ufa2f\\ufe27-\\ufe2d\\ud800\\udee0\\ud800\\udf1f\\ud800\\udf50-\\ud800" +
|
||||
"\\udf7a\\ud801\\udd00-\\ud801\\udd27\\ud801\\udd30-\\ud801\\udd63\\ud801\\ude00-\\ud801" +
|
||||
"\\udf36\\ud801\\udf40-\\ud801\\udf55\\ud801\\udf60-\\ud801\\udf67\\ud802\\udc60-\\ud802" +
|
||||
"\\udc76\\ud802\\udc80-\\ud802\\udc9e\\ud802\\udd80-\\ud802\\uddb7\\ud802\\uddbe\\ud802" +
|
||||
"\\uddbf\\ud802\\ude80-\\ud802\\ude9c\\ud802\\udec0-\\ud802\\udec7\\ud802\\udec9-\\ud802" +
|
||||
"\\udee6\\ud802\\udf80-\\ud802\\udf91\\ud804\\udc7f\\ud804\\udcd0-\\ud804\\udce8\\ud804" +
|
||||
"\\udd00-\\ud804\\udd34\\ud804\\udd50-\\ud804\\udd73\\ud804\\udd76\\ud804\\udd80-\\ud804" +
|
||||
"\\uddc4\\ud804\\uddda\\ud804\\ude00-\\ud804\\ude11\\ud804\\ude13-\\ud804\\ude37\\ud804" +
|
||||
"\\udeb0-\\ud804\\udeea\\ud804\\udf01-\\ud804\\udf03\\ud804\\udf05-\\ud804\\udf0c\\ud804" +
|
||||
"\\udf0f\\ud804\\udf10\\ud804\\udf13-\\ud804\\udf28\\ud804\\udf2a-\\ud804\\udf30\\ud804" +
|
||||
"\\udf32\\ud804\\udf33\\ud804\\udf35-\\ud804\\udf39\\ud804\\udf3c-\\ud804\\udf44\\ud804" +
|
||||
"\\udf47\\ud804\\udf48\\ud804\\udf4b-\\ud804\\udf4d\\ud804\\udf57\\ud804\\udf5d-\\ud804" +
|
||||
"\\udf63\\ud804\\udf66-\\ud804\\udf6c\\ud804\\udf70-\\ud804\\udf74\\ud805\\udc80-\\ud805" +
|
||||
"\\udcc5\\ud805\\udcc7\\ud805\\udd80-\\ud805\\uddb5\\ud805\\uddb8-\\ud805\\uddc0\\ud805" +
|
||||
"\\ude00-\\ud805\\ude40\\ud805\\ude44\\ud805\\ude80-\\ud805\\udeb7\\ud806\\udca0-\\ud806" +
|
||||
"\\udcdf\\ud806\\udcff\\ud806\\udec0-\\ud806\\udef8\\ud808\\udf6f-\\ud808\\udf98\\ud81a" +
|
||||
"\\ude40-\\ud81a\\ude5e\\ud81a\\uded0-\\ud81a\\udeed\\ud81a\\udef0-\\ud81a\\udef4\\ud81a" +
|
||||
"\\udf00-\\ud81a\\udf36\\ud81a\\udf40-\\ud81a\\udf43\\ud81a\\udf63-\\ud81a\\udf77\\ud81a" +
|
||||
"\\udf7d-\\ud81a\\udf8f\\ud81b\\udf00-\\ud81b\\udf44\\ud81b\\udf50-\\ud81b\\udf7e\\ud81b" +
|
||||
"\\udf8f-\\ud81b\\udf9f\\ud82f\\udc00-\\ud82f\\udc6a\\ud82f\\udc70-\\ud82f\\udc7c\\ud82f" +
|
||||
"\\udc80-\\ud82f\\udc88\\ud82f\\udc90-\\ud82f\\udc99\\ud82f\\udc9d\\ud82f\\udc9e\\ud83a" +
|
||||
"\\udc00-\\ud83a\\udcc4\\ud83a\\udcd0-\\ud83a\\udcd6\\ud83b\\ude00-\\ud83b\\ude03\\ud83b" +
|
||||
"\\ude05-\\ud83b\\ude1f\\ud83b\\ude21\\ud83b\\ude22\\ud83b\\ude24\\ud83b\\ude27\\ud83b" +
|
||||
"\\ude29-\\ud83b\\ude32\\ud83b\\ude34-\\ud83b\\ude37\\ud83b\\ude39\\ud83b\\ude3b\\ud83b" +
|
||||
"\\ude42\\ud83b\\ude47\\ud83b\\ude49\\ud83b\\ude4b\\ud83b\\ude4d-\\ud83b\\ude4f\\ud83b" +
|
||||
"\\ude51\\ud83b\\ude52\\ud83b\\ude54\\ud83b\\ude57\\ud83b\\ude59\\ud83b\\ude5b\\ud83b" +
|
||||
"\\ude5d\\ud83b\\ude5f\\ud83b\\ude61\\ud83b\\ude62\\ud83b\\ude64\\ud83b\\ude67-\\ud83b" +
|
||||
"\\ude6a\\ud83b\\ude6c-\\ud83b\\ude72\\ud83b\\ude74-\\ud83b\\ude77\\ud83b\\ude79-\\ud83b" +
|
||||
"\\ude7c\\ud83b\\ude7e\\ud83b\\ude80-\\ud83b\\ude89\\ud83b\\ude8b-\\ud83b\\ude9b\\ud83b" +
|
||||
"\\udea1-\\ud83b\\udea3\\ud83b\\udea5-\\ud83b\\udea9\\ud83b\\udeab-\\ud83b\\udebb";
|
||||
|
||||
// Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Java's \p{Nd}
|
||||
private static final String HASHTAG_NUMERALS = "\\p{Nd}" +
|
||||
"\\u0de6-\\u0def\\ua9f0-\\ua9f9\\ud804\\udcf0-\\ud804\\udcf9\\ud804\\udd36-\\ud804" +
|
||||
"\\udd3f\\ud804\\uddd0-\\ud804\\uddd9\\ud804\\udef0-\\ud804\\udef9\\ud805\\udcd0-\\ud805" +
|
||||
"\\udcd9\\ud805\\ude50-\\ud805\\ude59\\ud805\\udec0-\\ud805\\udec9\\ud806\\udce0-\\ud806" +
|
||||
"\\udce9\\ud81a\\ude60-\\ud81a\\ude69\\ud81a\\udf50-\\ud81a\\udf59";
|
||||
|
||||
private static final String HASHTAG_SPECIAL_CHARS = "_" + //underscore
|
||||
"\\u200c" + // ZERO WIDTH NON-JOINER (ZWNJ)
|
||||
"\\u200d" + // ZERO WIDTH JOINER (ZWJ)
|
||||
"\\ua67e" + // CYRILLIC KAVYKA
|
||||
"\\u05be" + // HEBREW PUNCTUATION MAQAF
|
||||
"\\u05f3" + // HEBREW PUNCTUATION GERESH
|
||||
"\\u05f4" + // HEBREW PUNCTUATION GERSHAYIM
|
||||
"\\uff5e" + // FULLWIDTH TILDE
|
||||
"\\u301c" + // WAVE DASH
|
||||
"\\u309b" + // KATAKANA-HIRAGANA VOICED SOUND MARK
|
||||
"\\u309c" + // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
"\\u30a0" + // KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
"\\u30fb" + // KATAKANA MIDDLE DOT
|
||||
"\\u3003" + // DITTO MARK
|
||||
"\\u0f0b" + // TIBETAN MARK INTERSYLLABIC TSHEG
|
||||
"\\u0f0c" + // TIBETAN MARK DELIMITER TSHEG BSTAR
|
||||
"\\u00b7"; // MIDDLE DOT
|
||||
|
||||
private static final String HASHTAG_LETTERS_NUMERALS =
|
||||
HASHTAG_LETTERS_AND_MARKS + HASHTAG_NUMERALS + HASHTAG_SPECIAL_CHARS;
|
||||
private static final String HASHTAG_LETTERS_SET = "[" + HASHTAG_LETTERS_AND_MARKS + "]";
|
||||
private static final String HASHTAG_LETTERS_NUMERALS_SET = "[" + HASHTAG_LETTERS_NUMERALS + "]";
|
||||
|
||||
/* URL related hash regex collection */
|
||||
private static final String URL_VALID_PRECEDING_CHARS =
|
||||
"(?:[^a-z0-9@@$##" + INVALID_CHARACTERS + "]|[" + DIRECTIONAL_CHARACTERS + "]|^)";
|
||||
|
||||
private static final String URL_VALID_CHARS = "[a-z0-9" + LATIN_ACCENTS_CHARS + "]";
|
||||
private static final String URL_VALID_SUBDOMAIN =
|
||||
"(?>(?:" + URL_VALID_CHARS + "[" + URL_VALID_CHARS + "\\-_]*)?" + URL_VALID_CHARS + "\\.)";
|
||||
private static final String URL_VALID_DOMAIN_NAME =
|
||||
"(?:(?:" + URL_VALID_CHARS + "[" + URL_VALID_CHARS + "\\-]*)?" + URL_VALID_CHARS + "\\.)";
|
||||
|
||||
private static final String PUNCTUATION_CHARS = "-_!\"#$%&'\\(\\)*+,./:;<=>?@\\[\\]^`\\{|}~";
|
||||
|
||||
// Any non-space, non-punctuation characters.
|
||||
// \p{Z} = any kind of whitespace or invisible separator.
|
||||
private static final String URL_VALID_UNICODE_CHARS =
|
||||
"[^" + PUNCTUATION_CHARS + "\\s\\p{Z}\\p{InGeneralPunctuation}]";
|
||||
private static final String URL_VALID_UNICODE_DOMAIN_NAME =
|
||||
"(?:(?:" + URL_VALID_UNICODE_CHARS + "[" + URL_VALID_UNICODE_CHARS + "\\-]*)?" +
|
||||
URL_VALID_UNICODE_CHARS + "\\.)";
|
||||
|
||||
private static final String URL_PUNYCODE = "(?:xn--[-0-9a-z]+)";
|
||||
|
||||
private static final String URL_VALID_DOMAIN =
|
||||
"(?:" + // optional sub-domain + domain + TLD
|
||||
URL_VALID_SUBDOMAIN + "*" + URL_VALID_DOMAIN_NAME + // e.g. twitter.com, foo.co.jp ...
|
||||
"(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + "|" + URL_PUNYCODE + ")" +
|
||||
")" +
|
||||
"|(?:" + "(?<=https?://)" +
|
||||
"(?:" +
|
||||
"(?:" + URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + ")" + // protocol + domain + ccTLD
|
||||
"|(?:" +
|
||||
URL_VALID_UNICODE_DOMAIN_NAME + // protocol + unicode domain + TLD
|
||||
"(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + ")" +
|
||||
")" +
|
||||
")" +
|
||||
")" +
|
||||
"|(?:" + // domain + ccTLD + '/'
|
||||
URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + "(?=/)" + // e.g. t.co/
|
||||
")";
|
||||
|
||||
private static final String URL_VALID_PORT_NUMBER = "[0-9]++";
|
||||
|
||||
private static final String URL_VALID_GENERAL_PATH_CHARS =
|
||||
"[a-z0-9!\\*';:=\\+,.\\$/%#\\[\\]\\-\\u2013_~\\|&@" +
|
||||
LATIN_ACCENTS_CHARS + CYRILLIC_CHARS + "]";
|
||||
|
||||
/**
|
||||
* Allow URL paths to contain up to two nested levels of balanced parens
|
||||
* 1. Used in Wikipedia URLs like /Primer_(film)
|
||||
* 2. Used in IIS sessions like /S(dfd346)/
|
||||
* 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
|
||||
*/
|
||||
private static final String URL_BALANCED_PARENS = "\\(" +
|
||||
"(?:" +
|
||||
URL_VALID_GENERAL_PATH_CHARS + "+" +
|
||||
"|" +
|
||||
// allow one nested level of balanced parentheses
|
||||
"(?:" +
|
||||
URL_VALID_GENERAL_PATH_CHARS + "*" +
|
||||
"\\(" +
|
||||
URL_VALID_GENERAL_PATH_CHARS + "+" +
|
||||
"\\)" +
|
||||
URL_VALID_GENERAL_PATH_CHARS + "*" +
|
||||
")" +
|
||||
")" +
|
||||
"\\)";
|
||||
|
||||
/**
|
||||
* Valid end-of-path characters (so /foo. does not gobble the period).
|
||||
* 2. Allow =&# for empty URL parameters and other URL-join artifacts
|
||||
*/
|
||||
private static final String URL_VALID_PATH_ENDING_CHARS =
|
||||
"[a-z0-9=_#/\\-\\+" + LATIN_ACCENTS_CHARS + CYRILLIC_CHARS + "]|(?:" +
|
||||
URL_BALANCED_PARENS + ")";
|
||||
|
||||
private static final String URL_VALID_PATH = "(?:" +
|
||||
"(?:" +
|
||||
URL_VALID_GENERAL_PATH_CHARS + "*" +
|
||||
"(?:" + URL_BALANCED_PARENS + URL_VALID_GENERAL_PATH_CHARS + "*)*" +
|
||||
URL_VALID_PATH_ENDING_CHARS +
|
||||
")|(?:@" + URL_VALID_GENERAL_PATH_CHARS + "+/)" +
|
||||
")";
|
||||
|
||||
private static final String URL_VALID_URL_QUERY_CHARS =
|
||||
"[a-z0-9!?\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~\\|@]";
|
||||
private static final String URL_VALID_URL_QUERY_ENDING_CHARS = "[a-z0-9\\-_&=#/]";
|
||||
public static final String VALID_URL_PATTERN_STRING =
|
||||
URL_VALID_PRECEDING_CHARS +
|
||||
"(" +
|
||||
"https?://" +
|
||||
"(" + URL_VALID_DOMAIN + ")" +
|
||||
"(?::(" + URL_VALID_PORT_NUMBER + "))?" +
|
||||
"(/" +
|
||||
URL_VALID_PATH + "*+" +
|
||||
")?" +
|
||||
"(\\?" + URL_VALID_URL_QUERY_CHARS + "*" +
|
||||
URL_VALID_URL_QUERY_ENDING_CHARS + ")?" +
|
||||
")";
|
||||
|
||||
private static final String AT_SIGNS_CHARS = "@\uFF20";
|
||||
private static final String DOLLAR_SIGN_CHAR = "\\$";
|
||||
private static final String CASHTAG = "[a-z]{1,6}(?:[._][a-z]{1,2})?";
|
||||
|
||||
/* Begin public constants */
|
||||
|
||||
public static final Pattern INVALID_CHARACTERS_PATTERN;
|
||||
public static final Pattern VALID_HASHTAG;
|
||||
public static final int VALID_HASHTAG_GROUP_BEFORE = 1;
|
||||
public static final int VALID_HASHTAG_GROUP_HASH = 2;
|
||||
public static final int VALID_HASHTAG_GROUP_TAG = 3;
|
||||
public static final Pattern INVALID_HASHTAG_MATCH_END;
|
||||
public static final Pattern RTL_CHARACTERS;
|
||||
|
||||
public static final Pattern AT_SIGNS;
|
||||
public static final Pattern VALID_MENTION_OR_LIST;
|
||||
public static final int VALID_MENTION_OR_LIST_GROUP_BEFORE = 1;
|
||||
public static final int VALID_MENTION_OR_LIST_GROUP_AT = 2;
|
||||
public static final int VALID_MENTION_OR_LIST_GROUP_USERNAME = 3;
|
||||
public static final int VALID_MENTION_OR_LIST_GROUP_LIST = 4;
|
||||
|
||||
public static final Pattern VALID_REPLY;
|
||||
public static final int VALID_REPLY_GROUP_USERNAME = 1;
|
||||
|
||||
public static final Pattern INVALID_MENTION_MATCH_END;
|
||||
|
||||
/**
|
||||
* Regex to extract URL (it also includes the text preceding the url).
|
||||
*
|
||||
* This regex does not reflect its name and {@link Regex#VALID_URL_GROUP_URL} match
|
||||
* should be checked in order to match a valid url. This is not ideal, but the behavior is
|
||||
* being kept to ensure backwards compatibility. Ideally this regex should be
|
||||
* implemented with a negative lookbehind as opposed to a negated character class
|
||||
* but lack of JS support increases maint overhead if the logic is different by
|
||||
* platform.
|
||||
*/
|
||||
|
||||
public static final Pattern VALID_URL;
|
||||
public static final int VALID_URL_GROUP_ALL = 1;
|
||||
public static final int VALID_URL_GROUP_BEFORE = 2;
|
||||
public static final int VALID_URL_GROUP_URL = 3;
|
||||
public static final int VALID_URL_GROUP_PROTOCOL = 4;
|
||||
public static final int VALID_URL_GROUP_DOMAIN = 5;
|
||||
public static final int VALID_URL_GROUP_PORT = 6;
|
||||
public static final int VALID_URL_GROUP_PATH = 7;
|
||||
public static final int VALID_URL_GROUP_QUERY_STRING = 8;
|
||||
|
||||
public static final Pattern VALID_TCO_URL;
|
||||
public static final Pattern INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN;
|
||||
|
||||
public static final Pattern VALID_CASHTAG;
|
||||
public static final int VALID_CASHTAG_GROUP_BEFORE = 1;
|
||||
public static final int VALID_CASHTAG_GROUP_DOLLAR = 2;
|
||||
public static final int VALID_CASHTAG_GROUP_CASHTAG = 3;
|
||||
|
||||
public static final Pattern VALID_DOMAIN;
|
||||
|
||||
// initializing in a static synchronized block,
|
||||
// there appears to be thread safety issues with Pattern.compile in android
|
||||
static {
|
||||
synchronized (Regex.class) {
|
||||
INVALID_CHARACTERS_PATTERN = Pattern.compile(".*[" + INVALID_CHARACTERS + "].*");
|
||||
VALID_HASHTAG = Pattern.compile("(^|\\uFE0E|\\uFE0F|[^&" + HASHTAG_LETTERS_NUMERALS +
|
||||
"])([#\uFF03])(?![\uFE0F\u20E3])(" + HASHTAG_LETTERS_NUMERALS_SET + "*" +
|
||||
HASHTAG_LETTERS_SET + HASHTAG_LETTERS_NUMERALS_SET + "*)", Pattern.CASE_INSENSITIVE);
|
||||
INVALID_HASHTAG_MATCH_END = Pattern.compile("^(?:[##]|://)");
|
||||
RTL_CHARACTERS = Pattern.compile("[\u0600-\u06FF\u0750-\u077F\u0590-\u05FF\uFE70-\uFEFF]");
|
||||
AT_SIGNS = Pattern.compile("[" + AT_SIGNS_CHARS + "]");
|
||||
VALID_MENTION_OR_LIST = Pattern.compile("([^a-z0-9_!#$%&*" + AT_SIGNS_CHARS +
|
||||
"]|^|(?:^|[^a-z0-9_+~.-])RT:?)(" + AT_SIGNS +
|
||||
"+)([a-z0-9_]{1,20})(/[a-z][a-z0-9_\\-]{0,24})?", Pattern.CASE_INSENSITIVE);
|
||||
VALID_REPLY = Pattern.compile("^(?:" + UNICODE_SPACES + "|" + DIRECTIONAL_CHARACTERS + ")*" +
|
||||
AT_SIGNS + "([a-z0-9_]{1,20})", Pattern.CASE_INSENSITIVE);
|
||||
INVALID_MENTION_MATCH_END =
|
||||
Pattern.compile("^(?:[" + AT_SIGNS_CHARS + LATIN_ACCENTS_CHARS + "]|://)");
|
||||
INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN = Pattern.compile("[-_./]$");
|
||||
|
||||
VALID_URL = Pattern.compile(VALID_URL_PATTERN_STRING, Pattern.CASE_INSENSITIVE);
|
||||
VALID_TCO_URL = Pattern.compile("^https?://t\\.co/([a-z0-9]+)(?:\\?" +
|
||||
URL_VALID_URL_QUERY_CHARS + "*" + URL_VALID_URL_QUERY_ENDING_CHARS + ")?",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
VALID_CASHTAG = Pattern.compile("(^|" + UNICODE_SPACES + "|" + DIRECTIONAL_CHARACTERS + ")(" +
|
||||
DOLLAR_SIGN_CHAR + ")(" + CASHTAG + ")" + "(?=$|\\s|\\p{Punct})",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
VALID_DOMAIN = Pattern.compile(URL_VALID_DOMAIN, Pattern.CASE_INSENSITIVE);
|
||||
}
|
||||
}
|
||||
|
||||
private static String join(@Nonnull Collection<?> col) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final Iterator<?> iter = col.iterator();
|
||||
if (iter.hasNext()) {
|
||||
sb.append(iter.next().toString());
|
||||
}
|
||||
while (iter.hasNext()) {
|
||||
sb.append("|");
|
||||
sb.append(iter.next().toString());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,182 +1,187 @@
|
|||
package com.keylesspalace.tusky
|
||||
|
||||
import android.text.Spannable
|
||||
import com.keylesspalace.tusky.util.FoundMatchType
|
||||
import com.keylesspalace.tusky.util.MENTION_PATTERN_STRING
|
||||
import com.keylesspalace.tusky.util.PatternFinder
|
||||
import com.keylesspalace.tusky.util.TAG_PATTERN_STRING
|
||||
import com.keylesspalace.tusky.util.highlightSpans
|
||||
import org.junit.Assert
|
||||
import com.keylesspalace.tusky.util.twittertext.Regex
|
||||
import java.util.regex.Pattern
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import org.junit.runners.Parameterized
|
||||
|
||||
class SpanUtilsTest {
|
||||
@Test
|
||||
fun matchesMixedSpans() {
|
||||
val input = "one #one two: @two three : https://thr.ee/meh?foo=bar&wat=@at#hmm four #four five @five ろく#six"
|
||||
val inputSpannable = FakeSpannable(input)
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(6, spans.size)
|
||||
/** The [Pattern.UNICODE_CHARACTER_CLASS] flag is not supported on Android, on Android it is just always on.
|
||||
* Since thesse tests run on a regular Jvm, we need a to set this flag or they would behave differently.
|
||||
* */
|
||||
private val urlPattern = Regex.VALID_URL_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE or Pattern.UNICODE_CHARACTER_CLASS)
|
||||
private val tagPattern = TAG_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE or Pattern.UNICODE_CHARACTER_CLASS)
|
||||
private val mentionPattern = MENTION_PATTERN_STRING.toPattern(Pattern.CASE_INSENSITIVE or Pattern.UNICODE_CHARACTER_CLASS)
|
||||
|
||||
val finders = listOf(
|
||||
PatternFinder("http", FoundMatchType.HTTPS_URL, urlPattern),
|
||||
PatternFinder("#", FoundMatchType.TAG, tagPattern),
|
||||
PatternFinder("@", FoundMatchType.MENTION, mentionPattern)
|
||||
)
|
||||
|
||||
@RunWith(Parameterized::class)
|
||||
class SpanUtilsTest(
|
||||
private val stringToHighlight: String,
|
||||
private val highlights: List<Pair<Int, Int>>
|
||||
) {
|
||||
|
||||
companion object {
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
@JvmStatic
|
||||
fun data() = listOf(
|
||||
arrayOf("@mention", listOf(0 to 8)),
|
||||
arrayOf("@mention@server.com", listOf(0 to 19)),
|
||||
arrayOf("#tag", listOf(0 to 4)),
|
||||
arrayOf("#tåg", listOf(0 to 4)),
|
||||
arrayOf("https://thr.ee/meh?foo=bar&wat=@at#hmm", listOf(0 to 38)),
|
||||
arrayOf("http://thr.ee/meh?foo=bar&wat=@at#hmm", listOf(0 to 37)),
|
||||
arrayOf(
|
||||
"one #one two: @two three : https://thr.ee/meh?foo=bar&wat=@at#hmm four #four five @five 6 #six",
|
||||
listOf(4 to 8, 14 to 18, 27 to 65, 71 to 76, 82 to 87, 90 to 94)
|
||||
),
|
||||
arrayOf("http://first.link https://second.link", listOf(0 to 17, 18 to 37)),
|
||||
arrayOf("#test", listOf(0 to 5)),
|
||||
arrayOf(" #AfterSpace", listOf(1 to 12)),
|
||||
arrayOf("#BeforeSpace ", listOf(0 to 12)),
|
||||
arrayOf("@#after_at", listOf(1 to 10)),
|
||||
arrayOf("あいうえお#after_hiragana", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("##DoubleHash", listOf(1 to 12)),
|
||||
arrayOf("###TripleHash", listOf(2 to 13)),
|
||||
arrayOf("something#notAHashtag", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test##maybeAHashtag", listOf(5 to 19)),
|
||||
arrayOf("testhttp://not.a.url.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test@notAMention", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test@notAMention#notAHashtag", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test@notAMention@server.com", listOf<Pair<Int, Int>>()),
|
||||
// Mastodon will not highlight this mention, although it would be valid according to their regex
|
||||
// arrayOf("@test@notAMention@server.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("testhttps://not.a.url.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("#hashtag1", listOf(0 to 9)),
|
||||
arrayOf("#1hashtag", listOf(0 to 9)),
|
||||
arrayOf("#サイクリング", listOf(0 to 7)),
|
||||
arrayOf("#自転車に乗る", listOf(0 to 7)),
|
||||
arrayOf("(#test)", listOf(1 to 6)),
|
||||
arrayOf(")#test(", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("{#test}", listOf(1 to 6)),
|
||||
arrayOf("[#test]", listOf(1 to 6)),
|
||||
arrayOf("}#test{", listOf(1 to 6)),
|
||||
arrayOf("]#test[", listOf(1 to 6)),
|
||||
arrayOf("<#test>", listOf(1 to 6)),
|
||||
arrayOf(">#test<", listOf(1 to 6)),
|
||||
arrayOf("((#Test))", listOf(2 to 7)),
|
||||
arrayOf("((##Te)st)", listOf(3 to 6)),
|
||||
arrayOf("[@ConnyDuck]", listOf(1 to 11)),
|
||||
arrayOf("(@ConnyDuck)", listOf(1 to 11)),
|
||||
arrayOf("(@ConnyDuck@chaos.social)", listOf(1 to 24)),
|
||||
arrayOf("Test(https://test.xyz/blubb(test)))))))))))", listOf(5 to 33)),
|
||||
arrayOf("Test https://test.xyz/blubb(test)))))))))))", listOf(5 to 33)),
|
||||
arrayOf("Test https://test.xyz/blubbtest)))))))))))", listOf(5 to 31)),
|
||||
arrayOf("#https://test.com", listOf(0 to 6)),
|
||||
arrayOf("#https://t", listOf(0 to 6)),
|
||||
arrayOf("(https://blubb.com", listOf(1 to 18)),
|
||||
arrayOf("https://example.com/path#anchor", listOf(0 to 31)),
|
||||
arrayOf("test httpx2345://wrong.protocol.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test https://nonexistent.topleveldomain.testtest", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("test https://example.com:1234 domain with port", listOf(5 to 29)),
|
||||
arrayOf("http://1.1.1.1", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("http://foo.bar/?q=Test%20URL-encoded%20stuff", listOf(0 to 44)),
|
||||
arrayOf("http://userid:password@example.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("http://userid@example.com", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("http://foo.com/blah_blah_(brackets)_(again)", listOf(0 to 43)),
|
||||
arrayOf("test example.com/no/protocol", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("protocol only https://", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("no tld https://test", listOf<Pair<Int, Int>>()),
|
||||
arrayOf("mention in url https://test.com/@test@domain.cat", listOf(15 to 48)),
|
||||
arrayOf("#hash_tag", listOf(0 to 9)),
|
||||
arrayOf("#hashtag_", listOf(0 to 9)),
|
||||
arrayOf("#hashtag_#tag", listOf(0 to 9, 9 to 13)),
|
||||
arrayOf("#hash_tag#tag", listOf(0 to 9)),
|
||||
arrayOf("_#hashtag", listOf(1 to 9)),
|
||||
arrayOf("@@ConnyDuck@chaos.social", listOf(1 to 24)),
|
||||
arrayOf("http://https://connyduck.at", listOf(7 to 27)),
|
||||
arrayOf("https://https://connyduck.at", listOf(8 to 28)),
|
||||
arrayOf("http:// http://connyduck.at", listOf(8 to 27)),
|
||||
arrayOf("https:// https://connyduck.at", listOf(9 to 29)),
|
||||
arrayOf("https:// #test https://connyduck.at", listOf(9 to 14, 15 to 35)),
|
||||
arrayOf("http:// @connyduck http://connyduck.at", listOf(8 to 18, 19 to 38)),
|
||||
// emojis count as multiple characters
|
||||
arrayOf("😜https://connyduck.at", listOf(2 to 22)),
|
||||
arrayOf("😜#tag", listOf(2 to 6)),
|
||||
arrayOf("😜@user@mastodon.example", listOf(2 to 24)),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun doesntMergeAdjacentURLs() {
|
||||
val firstURL = "http://first.thing"
|
||||
val secondURL = "https://second.thing"
|
||||
val inputSpannable = FakeSpannable("$firstURL $secondURL")
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(2, spans.size)
|
||||
Assert.assertEquals(firstURL.length, spans[0].end - spans[0].start)
|
||||
Assert.assertEquals(secondURL.length, spans[1].end - spans[1].start)
|
||||
}
|
||||
fun testHighlighting() {
|
||||
val inputSpannable = FakeSpannable(stringToHighlight)
|
||||
inputSpannable.highlightSpans(0xffffff, finders)
|
||||
|
||||
@RunWith(Parameterized::class)
|
||||
class MatchingTests(private val thingToHighlight: String) {
|
||||
companion object {
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
@JvmStatic
|
||||
fun data(): Iterable<Any> {
|
||||
return listOf(
|
||||
"@mention",
|
||||
"#tag",
|
||||
"#tåg",
|
||||
"https://thr.ee/meh?foo=bar&wat=@at#hmm",
|
||||
"http://thr.ee/meh?foo=bar&wat=@at#hmm"
|
||||
)
|
||||
assertEquals(highlights.size, inputSpannable.spans.size)
|
||||
|
||||
inputSpannable.spans
|
||||
.sortedBy { span -> span.start }
|
||||
.forEachIndexed { index, span ->
|
||||
assertEquals(highlights[index].first, span.start)
|
||||
assertEquals(highlights[index].second, span.end)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun matchesSpanAtStart() {
|
||||
val inputSpannable = FakeSpannable(thingToHighlight)
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(1, spans.size)
|
||||
Assert.assertEquals(thingToHighlight.length, spans[0].end - spans[0].start)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun matchesSpanNotAtStart() {
|
||||
val inputSpannable = FakeSpannable(" $thingToHighlight")
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(1, spans.size)
|
||||
Assert.assertEquals(thingToHighlight.length, spans[0].end - spans[0].start)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun doesNotMatchSpanEmbeddedInText() {
|
||||
val inputSpannable = FakeSpannable("aa${thingToHighlight}aa")
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertTrue(spans.isEmpty())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun doesNotMatchSpanEmbeddedInAnotherSpan() {
|
||||
val inputSpannable = FakeSpannable("@aa${thingToHighlight}aa")
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(1, spans.size)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun spansDoNotOverlap() {
|
||||
val begin = "@begin"
|
||||
val end = "#end"
|
||||
val inputSpannable = FakeSpannable("$begin $thingToHighlight $end")
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(3, spans.size)
|
||||
|
||||
val middleSpan = spans.single { span -> span.start > 0 && span.end < inputSpannable.lastIndex }
|
||||
Assert.assertEquals(begin.length + 1, middleSpan.start)
|
||||
Assert.assertEquals(inputSpannable.length - end.length - 1, middleSpan.end)
|
||||
}
|
||||
}
|
||||
|
||||
@RunWith(Parameterized::class)
|
||||
class HighlightingTestsForTag(
|
||||
private val text: String,
|
||||
private val expectedStartIndex: Int,
|
||||
private val expectedEndIndex: Int
|
||||
) {
|
||||
companion object {
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
@JvmStatic
|
||||
fun data(): Iterable<Any> {
|
||||
return listOf(
|
||||
arrayOf("#test", 0, 5),
|
||||
arrayOf(" #AfterSpace", 1, 12),
|
||||
arrayOf("#BeforeSpace ", 0, 12),
|
||||
arrayOf("@#after_at", 1, 10),
|
||||
arrayOf("あいうえお#after_hiragana", 5, 20),
|
||||
arrayOf("##DoubleHash", 1, 12),
|
||||
arrayOf("###TripleHash", 2, 13)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun matchExpectations() {
|
||||
val inputSpannable = FakeSpannable(text)
|
||||
highlightSpans(inputSpannable, 0xffffff)
|
||||
val spans = inputSpannable.spans
|
||||
Assert.assertEquals(1, spans.size)
|
||||
val span = spans.first()
|
||||
Assert.assertEquals(expectedStartIndex, span.start)
|
||||
Assert.assertEquals(expectedEndIndex, span.end)
|
||||
}
|
||||
}
|
||||
|
||||
class FakeSpannable(private val text: String) : Spannable {
|
||||
val spans = mutableListOf<BoundedSpan>()
|
||||
|
||||
override fun setSpan(what: Any?, start: Int, end: Int, flags: Int) {
|
||||
spans.add(BoundedSpan(what, start, end))
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
override fun <T : Any> getSpans(start: Int, end: Int, type: Class<T>): Array<T> {
|
||||
return spans.filter { it.start >= start && it.end <= end && type.isInstance(it.span) }
|
||||
.map { it.span }
|
||||
.toTypedArray() as Array<T>
|
||||
}
|
||||
|
||||
override fun removeSpan(what: Any?) {
|
||||
spans.removeIf { span -> span.span == what }
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return text
|
||||
}
|
||||
|
||||
override val length: Int
|
||||
get() = text.length
|
||||
|
||||
class BoundedSpan(val span: Any?, val start: Int, val end: Int)
|
||||
|
||||
override fun nextSpanTransition(start: Int, limit: Int, type: Class<*>?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun getSpanEnd(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun getSpanFlags(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun get(index: Int): Char {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun getSpanStart(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class FakeSpannable(private val text: String) : Spannable {
|
||||
val spans = mutableListOf<BoundedSpan>()
|
||||
|
||||
override fun setSpan(what: Any?, start: Int, end: Int, flags: Int) {
|
||||
spans.add(BoundedSpan(what, start, end))
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
override fun <T : Any> getSpans(start: Int, end: Int, type: Class<T>): Array<T> {
|
||||
return spans.filter { it.start >= start && it.end <= end && type.isInstance(it.span) }
|
||||
.map { it.span }
|
||||
.toTypedArray() as Array<T>
|
||||
}
|
||||
|
||||
override fun removeSpan(what: Any?) {
|
||||
spans.removeIf { span -> span.span == what }
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return text
|
||||
}
|
||||
|
||||
override val length: Int
|
||||
get() = text.length
|
||||
|
||||
class BoundedSpan(val span: Any?, val start: Int, val end: Int)
|
||||
|
||||
override fun nextSpanTransition(start: Int, limit: Int, type: Class<*>?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun getSpanEnd(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun getSpanFlags(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
|
||||
override fun get(index: Int): Char {
|
||||
return text[index]
|
||||
}
|
||||
|
||||
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
|
||||
return text.subSequence(startIndex, endIndex)
|
||||
}
|
||||
|
||||
override fun getSpanStart(tag: Any?): Int {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import com.keylesspalace.tusky.entity.InstanceConfiguration
|
|||
import com.keylesspalace.tusky.entity.InstanceV1
|
||||
import com.keylesspalace.tusky.entity.SearchResult
|
||||
import com.keylesspalace.tusky.entity.StatusConfiguration
|
||||
import com.keylesspalace.tusky.finders
|
||||
import com.keylesspalace.tusky.network.MastodonApi
|
||||
import com.squareup.moshi.adapter
|
||||
import java.util.Locale
|
||||
|
@ -171,6 +172,8 @@ class ComposeActivityTest {
|
|||
}
|
||||
})
|
||||
|
||||
activity.highlightFinders = finders
|
||||
|
||||
controller.create().start()
|
||||
shadowOf(getMainLooper()).idle()
|
||||
}
|
||||
|
@ -285,7 +288,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsUrl_onlyEllipsizedURLIsCounted() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = "Check out this @image #search result: "
|
||||
insertSomeTextInContent(additionalContent + url)
|
||||
assertEquals(additionalContent.length + InstanceInfoRepository.DEFAULT_CHARACTERS_RESERVED_PER_URL, activity.calculateTextLength())
|
||||
|
@ -294,7 +297,7 @@ class ComposeActivityTest {
|
|||
@Test
|
||||
fun whenTextContainsShortUrls_allUrlsGetEllipsized() {
|
||||
val shortUrl = "https://tusky.app"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
insertSomeTextInContent(shortUrl + additionalContent + url)
|
||||
assertEquals(additionalContent.length + (InstanceInfoRepository.DEFAULT_CHARACTERS_RESERVED_PER_URL * 2), activity.calculateTextLength())
|
||||
|
@ -302,7 +305,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsMultipleURLs_allURLsGetEllipsized() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
insertSomeTextInContent(url + additionalContent + url)
|
||||
assertEquals(additionalContent.length + (InstanceInfoRepository.DEFAULT_CHARACTERS_RESERVED_PER_URL * 2), activity.calculateTextLength())
|
||||
|
@ -310,7 +313,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsUrl_onlyEllipsizedURLIsCounted_withCustomConfiguration() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = "Check out this @image #search result: "
|
||||
val customUrlLength = 16
|
||||
instanceResponseCallback = { getInstanceWithCustomConfiguration(null, customUrlLength) }
|
||||
|
@ -322,7 +325,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsUrl_onlyEllipsizedURLIsCounted_withCustomConfigurationV1() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = "Check out this @image #search result: "
|
||||
val customUrlLength = 16
|
||||
instanceV1ResponseCallback = { getInstanceV1WithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
|
||||
|
@ -335,7 +338,7 @@ class ComposeActivityTest {
|
|||
@Test
|
||||
fun whenTextContainsShortUrls_allUrlsGetEllipsized_withCustomConfiguration() {
|
||||
val shortUrl = "https://tusky.app"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
val customUrlLength = 18 // The intention is that this is longer than shortUrl.length
|
||||
instanceResponseCallback = { getInstanceWithCustomConfiguration(null, customUrlLength) }
|
||||
|
@ -348,7 +351,7 @@ class ComposeActivityTest {
|
|||
@Test
|
||||
fun whenTextContainsShortUrls_allUrlsGetEllipsized_withCustomConfigurationV1() {
|
||||
val shortUrl = "https://tusky.app"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
val customUrlLength = 18 // The intention is that this is longer than shortUrl.length
|
||||
instanceV1ResponseCallback = { getInstanceV1WithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
|
||||
|
@ -360,7 +363,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsMultipleURLs_allURLsGetEllipsized_withCustomConfiguration() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
val customUrlLength = 16
|
||||
instanceResponseCallback = { getInstanceWithCustomConfiguration(null, customUrlLength) }
|
||||
|
@ -372,7 +375,7 @@ class ComposeActivityTest {
|
|||
|
||||
@Test
|
||||
fun whenTextContainsMultipleURLs_allURLsGetEllipsized_withCustomConfigurationV1() {
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM:"
|
||||
val url = "https://www.google.dk/search?biw=1920&bih=990&tbm=isch&sa=1&ei=bmDrWuOoKMv6kwWOkIaoDQ&q=indiana+jones+i+hate+snakes+animated&oq=indiana+jones+i+hate+snakes+animated&gs_l=psy-ab.3...54174.55443.0.55553.9.7.0.0.0.0.255.333.1j0j1.2.0....0...1c.1.64.psy-ab..7.0.0....0.40G-kcDkC6A#imgdii=PSp15hQjN1JqvM:&imgrc=H0hyE2JW5wrpBM"
|
||||
val additionalContent = " Check out this @image #search result: "
|
||||
val customUrlLength = 16
|
||||
instanceV1ResponseCallback = { getInstanceV1WithCustomConfiguration(configuration = getCustomInstanceConfiguration(charactersReservedPerUrl = customUrlLength)) }
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
|
||||
package com.keylesspalace.tusky.components.compose
|
||||
|
||||
import com.keylesspalace.tusky.SpanUtilsTest
|
||||
import com.keylesspalace.tusky.FakeSpannable
|
||||
import com.keylesspalace.tusky.finders
|
||||
import com.keylesspalace.tusky.util.highlightSpans
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Test
|
||||
|
@ -43,21 +44,23 @@ class StatusLengthTest(
|
|||
// "@user@server" should be treated as "@user"
|
||||
arrayOf("123 @example@example.org", 12),
|
||||
// URLs under 23 chars are treated as 23 chars
|
||||
arrayOf("123 http://example.url", 27),
|
||||
arrayOf("123 http://example.org", 27),
|
||||
// URLs over 23 chars are treated as 23 chars
|
||||
arrayOf("123 http://urlthatislongerthan23characters.example.org", 27),
|
||||
// Short hashtags are treated as is
|
||||
arrayOf("123 #basictag", 13),
|
||||
// Long hashtags are *also* treated as is (not treated as 23, like URLs)
|
||||
arrayOf("123 #atagthatislongerthan23characters", 37)
|
||||
arrayOf("123 #atagthatislongerthan23characters", 37),
|
||||
// urls can have balanced parenthesis, otherwise they are ignored https://github.com/tuskyapp/Tusky/issues/4425
|
||||
arrayOf("(https://en.wikipedia.org/wiki/Beethoven_(horse))", 25)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun statusLength_matchesExpectations() {
|
||||
val spannedText = SpanUtilsTest.FakeSpannable(text)
|
||||
highlightSpans(spannedText, 0)
|
||||
val spannedText = FakeSpannable(text)
|
||||
spannedText.highlightSpans(0, finders)
|
||||
|
||||
assertEquals(
|
||||
expectedLength,
|
||||
|
@ -67,10 +70,10 @@ class StatusLengthTest(
|
|||
|
||||
@Test
|
||||
fun statusLength_withCwText_matchesExpectations() {
|
||||
val spannedText = SpanUtilsTest.FakeSpannable(text)
|
||||
highlightSpans(spannedText, 0)
|
||||
val spannedText = FakeSpannable(text)
|
||||
spannedText.highlightSpans(0, finders)
|
||||
|
||||
val cwText = SpanUtilsTest.FakeSpannable(
|
||||
val cwText = FakeSpannable(
|
||||
"a @example@example.org #hashtagmention and http://example.org URL"
|
||||
)
|
||||
assertEquals(
|
||||
|
|
Loading…
Reference in New Issue