Pattern.compile に指定する正規表現を変換する

This commit is contained in:
tateisu 2020-02-04 11:04:07 +09:00
parent 5e12494ae2
commit 5658679180
39 changed files with 521 additions and 254 deletions

View File

@ -0,0 +1,84 @@
package jp.juggler.subwaytooter
import androidx.test.runner.AndroidJUnit4
import jp.juggler.util.asciiPattern
import jp.juggler.util.asciiPatternInternal
import org.junit.Assert
import org.junit.Assert.assertEquals
import org.junit.Test
import org.junit.runner.RunWith
// Android instrumentation test は run configuration を編集しないと Empty tests とかいうエラーになります
@RunWith(AndroidJUnit4::class)
class TestMisskeyMentionAndroid {
// @Test
// @Throws(Exception::class)
// fun test1() {
// fun findMention(str:String):String?{
// val m = TootAccount.reMention.matcher(str)
// return if(m.find()) m.group(0) else null
// }
// assertEquals(null, findMention(""))
// assertEquals(null, findMention("tateisu"))
// assertEquals("@tateisu", findMention("@tateisu"))
// assertEquals("@tateisu", findMention("@tateisuほげ"))
// assertEquals(
// "@tateisu@mastodon.juggler.jp",
// findMention("@tateisu@mastodon.juggler.jp")
// )
// assertEquals(
// "@tateisu@mastodon.juggler.jp",
// findMention("@tateisu@mastodon.juggler.jpほげ")
// )
// assertEquals("@tateisu", findMention("@tateisu@マストドン3.juggler.jp"))
// assertEquals(
// "@tateisu@xn--3-pfuzbe6htf.juggler.jp",
// findMention("@tateisu@xn--3-pfuzbe6htf.juggler.jp")
// )
// }
@Test
@Throws(Exception::class)
fun testAsciiPatternInternal() {
// \w \d \W \D 以外の文字は素通しする
assertEquals("""ab\c\\""", """ab\c\\""".asciiPatternInternal())
assertEquals("""[A-Za-z0-9_]""", """\w""".asciiPatternInternal())
assertEquals("""[A-Za-z0-9_-]""", """[\w-]""".asciiPatternInternal())
assertEquals("""[^A-Za-z0-9_]""", """\W""".asciiPatternInternal())
assertEquals("""[0-9]""", """\d""".asciiPatternInternal())
assertEquals("""[0-9:-]""", """[\d:-]""".asciiPatternInternal())
assertEquals("""[^0-9]""", """\D""".asciiPatternInternal())
// 文字セットの中の \W \D は変換できないので素通しする
assertEquals("""[\W]""", """[\W]""".asciiPatternInternal())
assertEquals("""[\D]""", """[\D]""".asciiPatternInternal())
// エスケープ文字の後に何もない場合も素通しする
assertEquals("""\""", """\""".asciiPatternInternal())
}
@Test
@Throws(Exception::class)
fun test2() {
// val pu = Pattern.compile("""\w+""",Pattern.UNICODE_CHARACTER_CLASS)
// on Android: java.lang.IllegalArgumentException: Unsupported flags: 256
fun matchOrNull(pattern : String, input : String) : String? {
// no UNICODE_CHARACTER_CLASS
val m = pattern.asciiPattern().matcher(input)
return if(m.find()) m.group(0) else null
}
assertEquals(null, matchOrNull("\\w+", "-"))
assertEquals(null, matchOrNull("\\w+", ""))
assertEquals("a", matchOrNull("\\w+", "a"))
assertEquals("a", matchOrNull("\\w+", "aあ"))
assertEquals("0", matchOrNull("\\w+", "0"))
assertEquals(null, matchOrNull("\\w+", ""))
assertEquals("0", matchOrNull("\\d+", "0"))
assertEquals(null, matchOrNull("\\d+", ""))
}
}

View File

@ -20,7 +20,7 @@ class TestDuplicateMap {
MockContext(),
SavedAccount(
db_id = 1,
acct = "user1@host1",
acctArg = "user1@host1",
hostArg = null
)
)

View File

@ -4,6 +4,7 @@ package jp.juggler.subwaytooter.api
import androidx.test.InstrumentationRegistry
import androidx.test.runner.AndroidJUnit4
import jp.juggler.subwaytooter.api.entity.Host
import jp.juggler.subwaytooter.api.entity.TootInstance
import jp.juggler.subwaytooter.table.SavedAccount
import jp.juggler.subwaytooter.util.CurrentCallCallback
@ -993,7 +994,7 @@ class TestTootApiClient {
httpClient = createHttpClientNormal(),
callback = callback
)
val instance = "unit-test"
val instance = Host.parse("unit-test")
client.instance = instance
val clientName = "SubwayTooterUnitTest"
val scope_string = "read+write+follow+push"
@ -1070,7 +1071,7 @@ class TestTootApiClient {
httpClient = createHttpClientNormal(),
callback = callback
)
val instance = "unit-test"
val instance = Host.parse("unit-test")
client.instance = instance
val (instanceInfo, instanceResult) = TootInstance.get(client)
assertNotNull(instanceInfo)
@ -1101,7 +1102,7 @@ class TestTootApiClient {
val accessInfo = SavedAccount(
db_id = 1,
acct = "user1@host1",
acctArg = "user1@host1",
hostArg = null,
token_info = tokenInfo
)
@ -1128,7 +1129,7 @@ class TestTootApiClient {
val accessInfo = SavedAccount(
db_id = 1,
acct = "user1@host1",
acctArg = "user1@host1",
hostArg = null,
token_info = tokenInfo
)

View File

@ -25,8 +25,8 @@ class TestTootAccount {
assertEquals("host",TootAccount.findHostFromUrl("user@HOST",null,null))
// find from accessHost
assertEquals("",TootAccount.findHostFromUrl(null,"",null))
assertEquals("any string is allowed",TootAccount.findHostFromUrl(null,"any string is allowed",null))
assertEquals("",TootAccount.findHostFromUrl(null,Host.parse(""),null))
assertEquals("any string is allowed",TootAccount.findHostFromUrl(null,Host.parse("any string is allowed"),null))
// find from url
assertEquals(null,TootAccount.findHostFromUrl(null,null,""))

View File

@ -2,6 +2,7 @@ package jp.juggler.subwaytooter.util
import androidx.test.InstrumentationRegistry
import androidx.test.runner.AndroidJUnit4
import jp.juggler.subwaytooter.api.entity.Host
import jp.juggler.util.neatSpaces
import org.junit.Assert.assertEquals
import org.junit.Test
@ -25,7 +26,7 @@ class TestHtmlDecoder {
// Context of the app under test.
val appContext = InstrumentationRegistry.getTargetContext()
val options = DecodeOptions(appContext,LinkHelper.newLinkHelper("instance.test"))
val options = DecodeOptions(appContext,LinkHelper.newLinkHelper(Host.parse("instance.test")))
val html = """
日本語で楽しめるMastodonサーバを提供しています

View File

@ -1310,7 +1310,7 @@ class ActAccountSetting
val sv = etNote.text.toString()
if(! bConfirmed) {
val length = TootStatus.countText(sv)
val length = TootAccount.countText(sv)
if(length > max_length_note) {
AlertDialog.Builder(this)
.setMessage(

View File

@ -6,6 +6,8 @@ import android.view.ViewGroup
import android.widget.*
import androidx.appcompat.app.AppCompatActivity
import jp.juggler.util.LogCategory
import jp.juggler.util.asciiPattern
import jp.juggler.util.replaceFirst
import kotlinx.coroutines.*
import kotlin.coroutines.CoroutineContext
@ -51,14 +53,15 @@ class ActDrawableList : AppCompatActivity(), CoroutineScope {
private fun load() = launch {
try {
val rePackageSpec = """.+/""".toRegex()
val reSkipName = """^(abc_|avd_|btn_checkbox_|btn_radio_|googleg_|ic_keyboard_arrow_|ic_menu_arrow_|notification_|common_|emj_|cpv_|design_|exo_|mtrl_|ic_mtrl_)""".toRegex()
val rePackageSpec = """.+/""".asciiPattern()
val reSkipName = """^(abc_|avd_|btn_checkbox_|btn_radio_|googleg_|ic_keyboard_arrow_|ic_menu_arrow_|notification_|common_|emj_|cpv_|design_|exo_|mtrl_|ic_mtrl_)"""
.asciiPattern()
val list = withContext(Dispatchers.IO) {
R.drawable::class.java.fields
.mapNotNull {
val id = it.get(null) as? Int ?: return@mapNotNull null
val name = resources.getResourceName(id).replaceFirst(rePackageSpec, "")
if(reSkipName.find(name)!=null) return@mapNotNull null
if(reSkipName.matcher(name).find() ) return@mapNotNull null
MyItem(id, name)
}
.toMutableList()

View File

@ -747,8 +747,8 @@ class ActMediaViewer : AppCompatActivity(), View.OnClickListener {
if(fileName == null) {
fileName = url
.replaceFirst("https?://".toRegex(), "")
.replace("[^.\\w\\d]+".toRegex(), "-")
.replaceFirst("https?://".asciiPattern(), "")
.replaceAll("[^.\\w\\d]+".asciiPattern(), "-")
}
if(fileName.length >= 20) fileName = fileName.substring(fileName.length - 20)

View File

@ -1346,11 +1346,11 @@ class ActPost : AppCompatActivity(),
private fun updateTextCount() {
var length = 0
length += TootStatus.countText(
length += TootAccount.countText(
EmojiDecoder.decodeShortCode(etContent.text.toString())
)
length += TootStatus.countText(
length += TootAccount.countText(
if(cbContentWarning.isChecked)
EmojiDecoder.decodeShortCode(etContentWarning.text.toString())
else
@ -1361,7 +1361,7 @@ class ActPost : AppCompatActivity(),
fun checkEnqueteLength() {
for(et in list_etChoice) {
length += TootStatus.countText(
length += TootAccount.countText(
EmojiDecoder.decodeShortCode(et.text.toString())
)
}
@ -2165,7 +2165,7 @@ class ActPost : AppCompatActivity(),
fun fixDocumentName(s : String) : String {
val s_length = s.length
val m = Pattern.compile("""([^\x20-\x7f])""").matcher(s)
val m = """([^\x20-\x7f])""".asciiPattern().matcher(s)
m.reset()
val sb = StringBuilder(s_length)
var lastEnd = 0

View File

@ -219,7 +219,7 @@ class App1 : Application() {
// return maxSize * 1024;
// }
val reNotAllowedInUserAgent : Pattern = Pattern.compile("[^\\x21-\\x7e]+")
val reNotAllowedInUserAgent ="[^\\x21-\\x7e]+".asciiPattern()
val userAgentDefault =
"SubwayTooter/${BuildConfig.VERSION_NAME} Android/${Build.VERSION.RELEASE}"

View File

@ -432,7 +432,7 @@ object AppDataExporter {
}
}
private val reBackgroundImage = Pattern.compile("background-image/(.+)")
private val reBackgroundImage = "background-image/(.+)".asciiPattern()
// エントリが背景画像のソレなら真を返す
// column.column_bg_image を更新する場合がある

View File

@ -54,7 +54,7 @@ class AppState(internal val context : Context, internal val pref : SharedPrefere
private const val tts_speak_wait_expire = 1000L * 100
private val random = Random()
private val reSpaces = Pattern.compile("[\\s ]+")
private val reSpaces = "[\\s ]+".asciiPattern()
private var utteranceIdSeed = 0

View File

@ -246,16 +246,13 @@ class Column(
private val channelIdSeed = AtomicInteger(0)
// より古いデータの取得に使う
internal val reMaxId =
Pattern.compile("""[&?]max_id=([^&?;\s]+)""")
internal val reMaxId ="""[&?]max_id=([^&?;\s]+)""".asciiPattern()
// より新しいデータの取得に使う (マストドン2.6.0以降)
private val reMinId =
Pattern.compile("""[&?]min_id=([^&?;\s]+)""")
private val reMinId ="""[&?]min_id=([^&?;\s]+)""".asciiPattern()
// より新しいデータの取得に使う(マストドン2.6.0未満)
private val reSinceId =
Pattern.compile("""[&?]since_id=([^&?;\s]+)""")
private val reSinceId ="""[&?]since_id=([^&?;\s]+)""".asciiPattern()
val COLUMN_REGEX_FILTER_DEFAULT : (CharSequence?) -> Boolean = { false }
@ -1468,7 +1465,7 @@ class Column(
val regex_text = this.regex_text
if(regex_text.isNotEmpty()) {
try {
val re = Pattern.compile(regex_text)
val re = regex_text.asciiPattern()
column_regex_filter =
{ text : CharSequence? ->
if(text?.isEmpty() != false) false else re.matcher(

View File

@ -184,7 +184,7 @@ class ColumnViewHolder(
if(src.isEmpty()) {
return null
}
val m = Pattern.compile(src).matcher("")
val m = src.asciiPattern().matcher("")
if(m.find()) {
// 空文字列にマッチする正規表現はエラー扱いにする
// そうしないとCWの警告テキストにマッチしてしまう

View File

@ -40,8 +40,7 @@ internal class StreamReader(
const val MISSKEY_ALIVE_INTERVAL = 60000L
@Suppress("HasPlatformType")
val reAuthorizeError = Pattern.compile("authorize", Pattern.CASE_INSENSITIVE)
val reAuthorizeError = "authorize".asciiPattern(Pattern.CASE_INSENSITIVE )
}
private val reader_list = LinkedList<Reader>()

View File

@ -17,7 +17,7 @@ import java.util.regex.Pattern
object Action_ListMember {
private val reFollowError = Pattern.compile("follow", Pattern.CASE_INSENSITIVE)
private val reFollowError ="follow".asciiPattern(Pattern.CASE_INSENSITIVE)
interface Callback {
fun onListMemberUpdated(willRegistered : Boolean, bSuccess : Boolean)

View File

@ -14,7 +14,6 @@ import jp.juggler.subwaytooter.util.SavedAccountCallback
import jp.juggler.util.*
import okhttp3.Request
import java.util.*
import java.util.regex.Pattern
import kotlin.math.max
object Action_Toot {
@ -22,7 +21,8 @@ object Action_Toot {
private val log = LogCategory("Action_Toot")
private val reDetailedStatusTime =
Pattern.compile("""<a\b[^>]*?\bdetailed-status__datetime\b[^>]*href="https://[^/]+/@[^/]+/([^\s?#/"]+)""")
"""<a\b[^>]*?\bdetailed-status__datetime\b[^>]*href="https://[^/]+/@[^/]+/([^\s?#/"]+)"""
.asciiPattern()
// アカウントを選んでお気に入り
fun favouriteFromAnotherAccount(

View File

@ -66,9 +66,9 @@ class TootApiClient(
private const val NO_INFORMATION = "(no information)"
private val reStartJsonArray = Pattern.compile("""\A\s*\[""")
private val reStartJsonObject = Pattern.compile("""\A\s*\{""")
private val reWhiteSpace = Pattern.compile("""\s+""")
private val reStartJsonArray = """\A\s*\[""".asciiPattern()
private val reStartJsonObject = """\A\s*\{""".asciiPattern()
private val reWhiteSpace = """\s+""".asciiPattern()
private const val mspTokenUrl = "http://mastodonsearch.jp/api/v1.0.1/utoken"
private const val mspSearchUrl = "http://mastodonsearch.jp/api/v1.0.1/cross"

View File

@ -1,11 +1,8 @@
package jp.juggler.subwaytooter.api
import jp.juggler.util.JsonArray
import jp.juggler.util.JsonObject
import jp.juggler.util.*
import java.util.regex.Pattern
import jp.juggler.util.LogCategory
import jp.juggler.util.groupEx
import okhttp3.Response
import okhttp3.WebSocket
@ -18,7 +15,7 @@ open class TootApiResult(
companion object {
private val log = LogCategory("TootApiResult")
private val reLinkURL = Pattern.compile("<([^>]+)>;\\s*rel=\"([^\"]+)\"")
private val reLinkURL = """<([^>]+)>;\s*rel="([^"]+)"""".asciiPattern()
private const val NO_INSTANCE = "missing instance name"

View File

@ -174,7 +174,7 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
this.time_created_at = TootStatus.parseTime(this.created_at)
// https://github.com/syuilo/misskey/blob/develop/src/client/scripts/get-static-image-url.ts
fun String.getStaticImageUrl():String?{
fun String.getStaticImageUrl() : String? {
val uri = this.mayUri() ?: return null
val dummy = "${uri.encodedAuthority}${uri.encodedPath}"
return "https://${parser.linkHelper.host?.ascii}/proxy/$dummy?url=${encodePercent()}&static=1"
@ -466,29 +466,81 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
companion object {
private val log = LogCategory("TootAccount")
internal val reWhitespace : Pattern = Pattern.compile("[\\s\\t\\x0d\\x0a]+")
internal val reWhitespace = "[\\s\\t\\x0d\\x0a]+".asciiPattern()
// noteをディレクトリに表示する際、制御文字や空白を変換する
private val reNoteLineFeed : Pattern = Pattern.compile("""[\x00-\x20\x7f ]+""")
private val reNoteLineFeed : Pattern = """[\x00-\x20\x7f ]+""".asciiPattern()
// IDNドメインを含むホスト名の正規表現
const val reHostIdn = """(?:(?:[\p{L}\p{N}][\p{L}\p{N}-_]*\.)+[\p{L}\p{N}]{2,})"""
internal val reHostInUrl : Pattern = """\Ahttps://($reHostIdn)/"""
.asciiPattern()
// 文字数カウントに使う正規表現
private val reCountLink = """(https?://$reHostIdn[\w/:%#@${'$'}&?!()\[\]~.=+\-]*)"""
.asciiPattern()
// 投稿中のURLは23文字として扱う
private val strUrlReplacement = (1 .. 23).joinToString(transform = { " " })
// \p{L} : アルファベット (Letter)。
//   Ll(小文字)、Lm(擬似文字)、Lo(その他の文字)、Lt(タイトル文字)、Lu(大文字アルファベット)を含む
// \p{M} : 記号 (Mark)
// \p{Nd} : 10 進数字 (Decimal number)
// \p{Pc} : 連結用句読記号 (Connector punctuation)
// rubyの [:word:] 単語構成文字 (Letter | Mark | Decimal_Number | Connector_Punctuation)
const val reRubyWord = """\p{L}\p{M}\p{Nd}\p{Pc}"""
// rubyの [:alpha:] : 英字 (Letter | Mark)
const val reRubyAlpha = """\p{L}\p{M}"""
private const val reMastodonUserName = """[A-Za-z0-9_]+(?:[A-Za-z0-9_.-]+[A-Za-z0-9_]+)?"""
private const val reMastodonMention =
"""(?<=^|[^/$reRubyWord])@(($reMastodonUserName)(?:@[$reRubyWord.-]+[A-Za-z0-9]+)?)"""
private val reCountMention = reMastodonMention.asciiPattern()
fun countText(s : String) : Int {
return s
.replaceAll(reCountLink, strUrlReplacement)
.replaceAll(reCountMention, "@$2")
.codePointCount()
}
// MisskeyのMFMのメンションのドメイン部分はIDN非対応
private const val reMisskeyHost = """\w[\w.-]*\w"""
// https://misskey.io/@tateisu@%E3%83%9E%E3%82%B9%E3%83%88%E3%83%89%E3%83%B33.juggler.jp
// のようなURLがMisskeyのメンションから生成されることがある
// %エンコーディングのデコードが必要
private const val reMisskeyHostEncoded = """[\w%][\w.%-]*[\w%]"""
// MFMのメンション @username @username@host
// (Mastodonのカラムでは使われていない)
internal val reMention = Pattern.compile("""\A@(\w+(?:[\w-]*\w)?)(?:@(\w[\w.-]*\w))?""")
// MisskeyのMFMはIDNをサポートしていない
private val reMisskeyMentionBase = """@(\w+(?:[\w-]*\w)?)(?:@($reMisskeyHost))?"""
.asciiPattern()
// MFMパース時に使う
internal val reMisskeyMentionMFM = """\A$reMisskeyMentionBase"""
.asciiPattern()
// for IDN domain... Misskeyはまだサポートしていない
// internal val reMention = Pattern.compile("""\A@(\w+(?:[\w-]*\w)?)(?:@([${TootTag.w}][${TootTag.w}.-]*[${TootTag.w}]))?""")
internal val reUrlHost : Pattern =
Pattern.compile("""\Ahttps://(\w[\w.-]*\w)/""")
// 投稿送信時にメンションを見つけてuserIdを調べるために使う
internal val reMisskeyMentionPost = """(?:\A|\s)$reMisskeyMentionBase"""
.asciiPattern()
// host, user ,(instance)
// Misskeyだけではないのでusernameの定義が違う
internal val reAccountUrl : Pattern =
Pattern.compile("""\Ahttps://(\w[\w.-]*\w)/@(\w+[\w-]*)(?:@(\w[\w.-]*\w))?(?=\z|[?#])""")
internal val reAccountUrl =
"""\Ahttps://($reHostIdn)/@(\w+[\w-]*)(?:@($reMisskeyHostEncoded))?(?=\z|[?#])"""
.asciiPattern()
// host,user
internal val reAccountUrl2 : Pattern =
Pattern.compile("""\Ahttps://(\w[\w.-]*\w)/users/(\w|\w+[\w-]*\w)(?=\z|[?#])""")
internal val reAccountUrl2 =
"""\Ahttps://($reHostIdn)/users/(\w|\w+[\w-]*\w)(?=\z|[?#])"""
.asciiPattern()
fun getAcctFromUrl(url : String?) : Acct? {
@ -497,7 +549,7 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
var m = reAccountUrl.matcher(url)
if(m.find()) {
val host = m.groupEx(1)
val user = m.groupEx(2) !!.decodePercent()
val user = m.groupEx(2) !!
val instance = m.groupEx(3)?.decodePercent()
return Acct.parse(user, instance?.notEmpty() ?: host)
}
@ -505,7 +557,7 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
m = reAccountUrl2.matcher(url)
if(m.find()) {
val host = m.groupEx(1)
val user = m.groupEx(2) !!.decodePercent()
val user = m.groupEx(2) !!
return Acct.parse(user, host)
}
@ -529,7 +581,7 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
// acctから調べる
if(acctArg != null) {
val acct = Acct.parse(acctArg)
if( acct.host != null) return acct.host
if(acct.host != null) return acct.host
}
// accessHostから調べる
@ -619,6 +671,5 @@ open class TootAccount(parser : TootParser, src : JsonObject) {
return if(dst?.isNotEmpty() == true) dst else null
}
}
}

View File

@ -10,6 +10,7 @@ import jp.juggler.subwaytooter.table.SavedAccount
import jp.juggler.subwaytooter.util.LinkHelper
import jp.juggler.subwaytooter.util.VersionString
import jp.juggler.util.JsonObject
import jp.juggler.util.asciiPattern
import jp.juggler.util.groupEx
import jp.juggler.util.toPostRequestBuilder
import okhttp3.Request
@ -168,8 +169,8 @@ class TootInstance(parser : TootParser, src : JsonObject) {
}
companion object {
private val rePleroma = Pattern.compile("""\bpleroma\b""", Pattern.CASE_INSENSITIVE)
private val rePixelfed = Pattern.compile("""\bpixelfed\b""", Pattern.CASE_INSENSITIVE)
private val rePleroma = """\bpleroma\b""".asciiPattern(Pattern.CASE_INSENSITIVE)
private val rePixelfed = """\bpixelfed\b""".asciiPattern( Pattern.CASE_INSENSITIVE)
val VERSION_1_6 = VersionString("1.6")
val VERSION_2_4_0_rc1 = VersionString("2.4.0rc1")
@ -184,7 +185,7 @@ class TootInstance(parser : TootParser, src : JsonObject) {
val MISSKEY_VERSION_11 = VersionString("11.0")
private val reDigits = Pattern.compile("(\\d+)")
private val reDigits = """(\d+)""".asciiPattern()
private const val EXPIRE = (1000 * 3600).toLong()

View File

@ -44,7 +44,7 @@ class TootList(parser:TootParser,src : JsonObject): TimelineItem(), Comparable<T
companion object {
private var log = LogCategory("TootList")
private val reNumber = Pattern.compile("(\\d+)")
private val reNumber = """(\d+)""".asciiPattern()
private fun makeTitleForSort(title : String?) : ArrayList<Any> {
val list = ArrayList<Any>()

View File

@ -3,7 +3,6 @@ package jp.juggler.subwaytooter.api.entity
import jp.juggler.subwaytooter.api.TootParser
import jp.juggler.subwaytooter.api.entity.TootAnnouncement.Reaction
import jp.juggler.util.*
import java.util.regex.Pattern
object TootPayload {
@ -11,8 +10,7 @@ object TootPayload {
private const val PAYLOAD = "payload"
@Suppress("HasPlatformType")
private val reNumber = Pattern.compile("([-]?\\d+)")
private val reNumber = "([-]?\\d+)".asciiPattern()
// ストリーミングAPIのペイロード部分をTootStatus,TootNotification,整数IDのどれかに解釈する
fun parsePayload(

View File

@ -221,7 +221,7 @@ class TootPolls private constructor(
const val TYPE_ENQUETE_RESULT = "enquete_result"
@Suppress("HasPlatformType")
private val reWhitespace = Pattern.compile("[\\s\\t\\x0d\\x0a]+")
private val reWhitespace = """[\s\t\x0d\x0a]+""".asciiPattern()
fun parse(
parser : TootParser,

View File

@ -307,10 +307,10 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
val sv = src.string("cw")?.cleanCW()
this.spoiler_text = when {
sv == null -> "" // CWなし
sv.replace('\u0323',' ').isBlank() ->
sv.replace('\u0323', ' ').isBlank() ->
parser.context.getString(R.string.blank_cw)
else -> sv
}
@ -804,9 +804,10 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
class FindStatusIdFromUrlResult(
val statusId : EntityId?, // may null
hostArg:String,
hostArg : String,
val url : String
){
) {
val host = Host.parse(hostArg)
}
@ -825,42 +826,40 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
val EMPTY_SPANNABLE = SpannableString("")
val reHostIdn = TootAccount.reHostIdn
// OStatus
private val reTootUriOS = Pattern.compile(
"tag:([^,]*),[^:]*:objectId=([^:?#/\\s]+):objectType=Status",
Pattern.CASE_INSENSITIVE
)
private val reTootUriOS = """tag:([^,]*),[^:]*:objectId=([^:?#/\s]+):objectType=Status"""
.asciiPattern(Pattern.CASE_INSENSITIVE)
// ActivityPub 1
private val reTootUriAP1 =
Pattern.compile("https?://([^/]+)/users/[A-Za-z0-9_]+/statuses/([^?#/\\s]+)")
private val reTootUriAP1 = """https?://([^/]+)/users/\w+/statuses/([^?#/\s]+)"""
.asciiPattern()
// ActivityPub 2
private val reTootUriAP2 =
Pattern.compile("https?://([^/]+)/@[A-Za-z0-9_]+/([^?#/\\s]+)")
private val reTootUriAP2 = """https?://([^/]+)/@\w+/([^?#/\s]+)"""
.asciiPattern()
// 公開ステータスページのURL マストドン
private val reStatusPage =
Pattern.compile("""\Ahttps://([^/]+)/@([A-Za-z0-9_]+)/([^?#/\s]+)(?:\z|[?#])""")
private val reStatusPage ="""\Ahttps://([^/]+)/@(\w+)/([^?#/\s]+)(?:\z|[?#])"""
.asciiPattern()
// 公開ステータスページのURL Misskey
internal val reStatusPageMisskey = Pattern.compile(
"""\Ahttps://([^/]+)/notes/([0-9a-f]{24}|[0-9a-z]{10})\b""",
Pattern.CASE_INSENSITIVE
)
internal val reStatusPageMisskey = """\Ahttps://([^/]+)/notes/([0-9a-f]{24}|[0-9a-z]{10})\b"""
.asciiPattern(Pattern.CASE_INSENSITIVE )
// PleromaのStatusのUri
private val reStatusPageObjects =
Pattern.compile("""\Ahttps://([^/]+)/objects/([^?#/\s]+)(?:\z|[?#])""")
private val reStatusPageObjects ="""\Ahttps://([^/]+)/objects/([^?#/\s]+)(?:\z|[?#])"""
.asciiPattern()
// PleromaのStatusの公開ページ
private val reStatusPageNotice =
Pattern.compile("""\Ahttps://([^/]+)/notice/([^?#/\s]+)(?:\z|[?#])""")
private val reStatusPageNotice ="""\Ahttps://([^/]+)/notice/([^?#/\s]+)(?:\z|[?#])"""
.asciiPattern()
// PixelfedのStatusの公開ページ
// https://pixelfed.tokyo/p/tateisu/84169185147621376
private val reStatusPagePixelfed =
Pattern.compile("""\Ahttps://([^/]+)/p/([A-Za-z0-9_]+)/([^?#/\s]+)(?:\z|[?#])""")
private val reStatusPagePixelfed ="""\Ahttps://([^/]+)/p/([A-Za-z0-9_]+)/([^?#/\s]+)(?:\z|[?#])"""
.asciiPattern()
// returns null or pair( status_id, host ,url )
fun String.findStatusIdFromUrl() : FindStatusIdFromUrlResult? {
@ -934,11 +933,11 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
private val tz_utc = TimeZone.getTimeZone("UTC")
private val reTime =
Pattern.compile("\\A(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)")
private val reTime ="""\A(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)"""
.asciiPattern()
private val reMSPTime =
Pattern.compile("\\A(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)\\D+(\\d+)")
private val reMSPTime = """\A(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)"""
.asciiPattern()
fun parseTime(strTime : String?) : Long {
if(strTime != null && strTime.isNotEmpty()) {
@ -998,7 +997,7 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
@SuppressLint("SimpleDateFormat")
internal val date_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
@SuppressLint("SimpleDateFormat")
internal val date_format2 = SimpleDateFormat("yyyy-MM-dd")
@ -1056,21 +1055,21 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
}
}
return formatDate(t,date_format,omitZeroSecond = false,omitYear = false)
return formatDate(t, date_format, omitZeroSecond = false, omitYear = false)
}
// 告知の開始/終了日付
private fun formatDate(
t : Long,
format:SimpleDateFormat ,
omitZeroSecond:Boolean,
omitYear:Boolean
format : SimpleDateFormat,
omitZeroSecond : Boolean,
omitYear : Boolean
) : String {
var dateTarget = format.format(Date(t))
// 秒の部分を省略する
if( omitZeroSecond && dateTarget.endsWith(":00")){
dateTarget = dateTarget.substring(0,dateTarget.length -3)
if(omitZeroSecond && dateTarget.endsWith(":00")) {
dateTarget = dateTarget.substring(0, dateTarget.length - 3)
}
// 年の部分が現在と同じなら省略する
@ -1084,31 +1083,31 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
dateTarget = dateTarget.substring(delm + 1)
}
}
return dateTarget
}
fun formatTimeRange(start : Long, end : Long, allDay : Boolean):Pair<String,String>{
fun formatTimeRange(start : Long, end : Long, allDay : Boolean) : Pair<String, String> {
val strStart = when {
start <= 0L -> ""
allDay-> formatDate(start,date_format2,omitZeroSecond = false,omitYear = true)
else -> formatDate(start, date_format,omitZeroSecond = true,omitYear = true)
allDay -> formatDate(start, date_format2, omitZeroSecond = false, omitYear = true)
else -> formatDate(start, date_format, omitZeroSecond = true, omitYear = true)
}
val strEnd = when {
end <= 0L -> ""
allDay-> formatDate(end,date_format2,omitZeroSecond = false,omitYear = true)
else -> formatDate(end, date_format,omitZeroSecond = true,omitYear = true)
allDay -> formatDate(end, date_format2, omitZeroSecond = false, omitYear = true)
else -> formatDate(end, date_format, omitZeroSecond = true, omitYear = true)
}
// 終了日は先頭と同じ部分を省略する
var skip = 0
for(i in 0 until min(strStart.length,strEnd.length)){
val c =strStart[i]
if(c != strEnd[i] ) break
if( c.isDigit() ) continue
skip= i+1
if( c == ' ') break // 時間以降は省略しない
for(i in 0 until min(strStart.length, strEnd.length)) {
val c = strStart[i]
if(c != strEnd[i]) break
if(c.isDigit()) continue
skip = i + 1
if(c == ' ') break // 時間以降は省略しない
}
return Pair( strStart,strEnd.substring(skip,strEnd.length))
return Pair(strStart, strEnd.substring(skip, strEnd.length))
}
fun parseStringArray(src : JsonArray?) : ArrayList<String>? {
@ -1126,14 +1125,14 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
}
private fun parseReactionCounts(src : JsonObject?) : LinkedHashMap<String, Int>? {
// カスタム絵文字などが含まれるようになったので、内容のバリデーションはできない
var rv : LinkedHashMap<String, Int>? = null
src?.entries?.forEach { entry ->
val key = entry.key.notEmpty() ?: return@forEach
val v = src.int(key)?.notZero() ?: return@forEach
if(rv == null) rv = LinkedHashMap()
rv!![key] = v
rv !![key] = v
}
return rv
}
@ -1152,7 +1151,6 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
return rv
}
fun validStatusId(src : EntityId?) : EntityId? =
when {
src == null -> null
@ -1237,19 +1235,6 @@ class TootStatus(parser : TootParser, src : JsonObject) : TimelineItem() {
return null
}
private val reLinkUrl = Pattern.compile("""(https?://[\w/:%#@${'$'}&?!()\[\]~.=+\-]+)""")
private val reMention = Pattern.compile(
"""(?<=^|[^/\w\p{Pc}])@((\w+([\w.-]+\w+)?)(?:@[a-z0-9.\-]+[a-z0-9]+)?)""",
Pattern.CASE_INSENSITIVE
)
private val strUrlReplacement = (0 until 23).map { ' ' }.joinToString()
fun countText(s : String) : Int {
return s
.replaceAll(reLinkUrl, strUrlReplacement)
.replaceAll(reMention, "@$2")
.codePointCount()
}
}
}

View File

@ -104,21 +104,16 @@ open class TootTag constructor(
return result
}
// \p{L} : アルファベット (Letter)。
//   Ll(小文字)、Lm(擬似文字)、Lo(その他の文字)、Lt(タイトル文字)、Lu(大文字アルファベット)を含む
// \p{M} : 記号 (Mark)
// \p{Nd} : 10 進数字 (Decimal number)
// \p{Pc} : 連結用句読記号 (Connector punctuation)
private const val w = TootAccount.reRubyWord
private const val a = TootAccount.reRubyAlpha
private const val s = "_\\u00B7\\u200c" // separators
private fun generateMastodonTagPattern():Pattern{
val reMastodonTagName = """([_$w][$s$w]*[$s$a][$s$w]*[_$w])|([_$w]*[$a][_$w]*)"""
return """(?:^|[^\w/)])#($reMastodonTagName)""".asciiPattern()
}
// rubyの [:word:] 単語構成文字 (Letter | Mark | Decimal_Number | Connector_Punctuation)
const val w = """\p{L}\p{M}\p{Nd}\p{Pc}"""
// rubyの [:alpha:] : 英字 (Letter | Mark)
private const val a = """\p{L}\p{M}"""
// 2019/7/20 https://github.com/tootsuite/mastodon/pull/11363/files
private val reTagMastodon : Pattern =
Pattern.compile("""(?:^|[^\w)])#([_$w][·_$w]*[·_$a][·_$w]*[_$w]|[_$w]*[$a][_$w]*)""")
private val reMastodonTag = generateMastodonTagPattern()
// https://medium.com/@alice/some-article#.abcdef123 => タグにならない
// https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit => タグにならない
@ -135,8 +130,8 @@ open class TootTag constructor(
// タグに使えない文字
// 入力補完用なのでやや緩め
private val reCharsNotTagMastodon = Pattern.compile("""[^·_$w$a]""")
private val reCharsNotTagMisskey = Pattern.compile("""[\s.,!?'${'"'}:/\[\]【】]""")
private val reCharsNotTagMastodon = """[^$s$w$a]""".asciiPattern()
private val reCharsNotTagMisskey = """[\s.,!?'${'"'}:/\[\]【】]""".asciiPattern()
// find hashtags in content text(raw)
// returns null if hashtags not found, or ArrayList of String (tag without #)
@ -145,7 +140,7 @@ open class TootTag constructor(
MisskeyMarkdownDecoder.findHashtags(src)
} else {
var result : ArrayList<String>? = null
val m = reTagMastodon.matcher(src)
val m = reMastodonTag.matcher(src)
while(m.find()) {
if(result == null) result = ArrayList()
result.add(m.groupEx(1) !!)
@ -161,12 +156,12 @@ open class TootTag constructor(
}
// https://mastodon.juggler.jp/tags/%E3%83%8F%E3%83%83%E3%82%B7%E3%83%A5%E3%82%BF%E3%82%B0
private val reUrlHashTag =
Pattern.compile("""\Ahttps://([^/]+)/tags/([^?#・\s\-+.,:;/]+)(?:\z|[?#])""")
private val reUrlHashTag ="""\Ahttps://([^/]+)/tags/([^?#・\s\-+.,:;/]+)(?:\z|[?#])"""
.asciiPattern()
// https://pixelfed.tokyo/discover/tags/SubwayTooter?src=hash
private val reUrlHashTagPixelfed =
Pattern.compile("""\Ahttps://([^/]+)/discover/tags/([^?#・\s\-+.,:;/]+)(?:\z|[?#])""")
private val reUrlHashTagPixelfed ="""\Ahttps://([^/]+)/discover/tags/([^?#・\s\-+.,:;/]+)(?:\z|[?#])"""
.asciiPattern()
// returns null or pair of ( decoded tag without sharp, host)
fun String.findHashtagFromUrl() : Pair<String, String>? {

View File

@ -17,6 +17,7 @@ import jp.juggler.subwaytooter.span.HighlightSpan
import jp.juggler.subwaytooter.span.NetworkEmojiSpan
import jp.juggler.subwaytooter.span.createSpan
import jp.juggler.subwaytooter.table.HighlightWord
import jp.juggler.util.asciiPattern
import jp.juggler.util.codePointBefore
import java.util.*
import java.util.regex.Pattern
@ -330,8 +331,8 @@ object EmojiDecoder {
}
}
private val reNicoru = Pattern.compile("\\Anicoru\\d*\\z", Pattern.CASE_INSENSITIVE)
private val reHohoemi = Pattern.compile("\\Ahohoemi\\d*\\z", Pattern.CASE_INSENSITIVE)
private val reNicoru = """\Anicoru\d*\z""".asciiPattern( Pattern.CASE_INSENSITIVE)
private val reHohoemi = """\Ahohoemi\d*\z""".asciiPattern( Pattern.CASE_INSENSITIVE)
fun decodeEmoji(options : DecodeOptions, s : String) : Spannable {

View File

@ -34,11 +34,11 @@ object HTMLDecoder {
private const val TAG_TEXT = "<>text"
private const val TAG_END = "<>end"
private val reTag = Pattern.compile("<(/?)(\\w+)")
private val reTagEnd = Pattern.compile("(/?)>$")
private val reHref = Pattern.compile("\\bhref=\"([^\"]*)\"")
private val reAttribute = Pattern.compile("\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2")
private val reShortcode = Pattern.compile(":[A-Za-z0-9_-]+:")
private val reTag = "<(/?)(\\w+)".asciiPattern()
private val reTagEnd = "(/?)>$".asciiPattern()
private val reHref = "\\bhref=\"([^\"]*)\"".asciiPattern()
private val reAttribute = "\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2".asciiPattern()
private val reShortcode = ":[A-Za-z0-9_-]+:".asciiPattern()
// Block-level Elements
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
@ -116,7 +116,7 @@ object HTMLDecoder {
"wbr"
).toHashSet()
private val reEntity = Pattern.compile("&(#?)(\\w+);")
private val reEntity = "&(#?)(\\w+);".asciiPattern()
private val entity_map = HashMap<String, Char>()
private fun _addEntity(s : String, c : Char) {
entity_map[s] = c
@ -203,8 +203,8 @@ object HTMLDecoder {
//////////////////////////////////////////////////////////////////////////////////////
private val reDoctype = Pattern.compile("\\A\\s*<!doctype[^>]*>", Pattern.CASE_INSENSITIVE)
private val reComment = Pattern.compile("<!--.*?-->", Pattern.DOTALL)
private val reDoctype = """\A\s*<!doctype[^>]*>""".asciiPattern( Pattern.CASE_INSENSITIVE)
private val reComment = """<!--.*?-->""".asciiPattern( Pattern.DOTALL)
private fun String.quoteMeta() = Pattern.quote(this)
@ -539,7 +539,7 @@ object HTMLDecoder {
return sb
}
private val reNormalLink = Pattern.compile("""\A(\w+://)[^/]*""")
private val reNormalLink = """\A(\w+://)[^/]*""".asciiPattern()
// URLの表記を短くする
// Punycode のデコードはサーバ側で行われる?ので、ここでは元リンクの表示テキストを元にURL短縮を試みる
@ -579,7 +579,7 @@ object HTMLDecoder {
return originalUrl
}
private val reNicodic = Pattern.compile("""\Ahttps?://dic.nicovideo.jp/a/([^?#/]+)""")
private val reNicodic = """\Ahttps?://dic.nicovideo.jp/a/([^?#/]+)""".asciiPattern()
private fun formatLinkCaption(
options : DecodeOptions,

View File

@ -6,6 +6,7 @@ import android.graphics.drawable.GradientDrawable
import android.os.SystemClock
import jp.juggler.subwaytooter.App1
import jp.juggler.util.LogCategory
import jp.juggler.util.asciiPattern
import jp.juggler.util.ellipsize
import jp.juggler.util.groupEx
import java.util.concurrent.ConcurrentHashMap
@ -17,11 +18,13 @@ object InstanceTicker {
private fun parseHex(group : String?) : Int = group?.toInt(16) ?: 0
private val reColor6 =
Pattern.compile("""#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})""", Pattern.CASE_INSENSITIVE)
private const val alnum = """[0-9a-fA-F]"""
private val reColor3 =
Pattern.compile("""#([0-9a-f])([0-9a-f])([0-9a-f])\b""", Pattern.CASE_INSENSITIVE)
private val reColor6 ="""#($alnum{2})($alnum{2})($alnum{2})"""
.asciiPattern( Pattern.CASE_INSENSITIVE)
private val reColor3 ="""#($alnum)($alnum)($alnum)\b"""
.asciiPattern( Pattern.CASE_INSENSITIVE)
private fun parseColor(v : String) : Int? {
var m = reColor6.matcher(v)
@ -154,7 +157,7 @@ object InstanceTicker {
var lastList = ConcurrentHashMap<String, Item>()
private var timeNextLoad = 0L
private val reLine = Pattern.compile("""([^\x0d\x0a]+)""")
private val reLine = """([^\x0d\x0a]+)""".asciiPattern()
fun load() {
synchronized(this) {

View File

@ -66,7 +66,7 @@ fun getFullAcctOrNull(
if(fullAcct != null) return fullAcct
// URLのホスト名部分を補う
val m = TootAccount.reUrlHost.matcher(url)
val m = TootAccount.reHostInUrl.matcher(url)
if(m.find()) return Acct.parse(src, m.groupEx(1))
// https://fedibird.com/@noellabo/103350050191159092
@ -93,7 +93,7 @@ fun getFullAcctOrNull(
if(fullAcct != null) return fullAcct
// URLのホスト名部分を補う
val m = TootAccount.reUrlHost.matcher(url)
val m = TootAccount.reHostInUrl.matcher(url)
if(m.find()) return src.followHost(Host.parse(m.groupEx(1) !!))
// https://fedibird.com/@noellabo/103350050191159092

View File

@ -472,13 +472,23 @@ object MisskeySyntaxHighlighter {
}
}
private val reLineComment = Pattern.compile("""\A//.*""")
private val reBlockComment = Pattern.compile("""\A/\*.*?\*/""", Pattern.DOTALL)
private val reNumber = Pattern.compile("""\A[\-+]?[\d.]+""")
private val reLabel = Pattern.compile("""\A@([A-Z_-][A-Z0-9_-]*)""", Pattern.CASE_INSENSITIVE)
private val reKeyword =
Pattern.compile("""\A([A-Z_-][A-Z0-9_-]*)([ \t]*\()?""", Pattern.CASE_INSENSITIVE)
private val reContainsAlpha = Pattern.compile("""[A-Za-z_]""")
private val reLineComment = """\A//.*"""
.asciiPattern()
private val reBlockComment = """\A/\*.*?\*/"""
.asciiPattern( Pattern.DOTALL)
private val reNumber = """\A[\-+]?[\d.]+"""
.asciiPattern()
private val reLabel = """\A@([A-Z_-][A-Z0-9_-]*)"""
.asciiPattern( Pattern.CASE_INSENSITIVE)
private val reKeyword ="""\A([A-Z_-][A-Z0-9_-]*)([ \t]*\()?"""
.asciiPattern( Pattern.CASE_INSENSITIVE)
private val reContainsAlpha = """[A-Za-z_]"""
.asciiPattern()
private const val charH80 = 0x80.toChar()
@ -780,7 +790,12 @@ object MisskeyMarkdownDecoder {
}
// リンクを追加する
fun appendLink(text : String, url : String, allowShort : Boolean = false,mention:TootMention?=null) {
fun appendLink(
text : String,
url : String,
allowShort : Boolean = false,
mention : TootMention? = null
) {
when {
allowShort -> appendLinkText(text, url)
else -> appendText(text)
@ -839,7 +854,7 @@ object MisskeyMarkdownDecoder {
else -> rawAcct
}.pretty}"
var mention :TootMention? = null
var mention : TootMention? = null
val url = when(strHost) {
// https://github.com/syuilo/misskey/pull/3603
@ -858,7 +873,7 @@ object MisskeyMarkdownDecoder {
.also { url ->
val mentions = prepareMentions()
mention = mentions.find { m -> m.acct == shortAcct }
if( mention == null){
if(mention == null) {
val newMention = TootMention(
EntityId.DEFAULT
, url
@ -870,7 +885,7 @@ object MisskeyMarkdownDecoder {
}
}
}
appendLink(caption, url,mention = mention)
appendLink(caption, url, mention = mention)
}
}
@ -1412,7 +1427,7 @@ object MisskeyMarkdownDecoder {
addParser(
"~"
, simpleParser(
Pattern.compile("""\A~~(.+?)~~""")
"""\A~~(.+?)~~""".asciiPattern()
, NodeType.STRIKE
)
)
@ -1421,17 +1436,15 @@ object MisskeyMarkdownDecoder {
addParser(
"\""
, simpleParser(
Pattern.compile("""\A"([^\x0d\x0a]+?)\n"[\x0d\x0a]*""")
"""\A"([^\x0d\x0a]+?)\n"[\x0d\x0a]*""".asciiPattern()
, NodeType.QUOTE_INLINE
)
)
// Quote (行頭)>...(改行)
val reQuoteBlock = Pattern.compile(
// この正規表現の場合は \A ではなく ^ で各行の始端にマッチさせる
"""^>(?:[  ]?)([^\x0d\x0a]*)(\x0a|\x0d\x0a?)?""",
Pattern.MULTILINE
)
// この正規表現の場合は \A ではなく ^ で各行の始端にマッチさせる
val reQuoteBlock = """^>(?:[  ]?)([^\x0d\x0a]*)(\x0a|\x0d\x0a?)?"""
.asciiPattern(Pattern.MULTILINE)
addParser(">", {
if(pos > 0) {
@ -1474,30 +1487,21 @@ object MisskeyMarkdownDecoder {
addParser(
":"
, simpleParser(
Pattern.compile("""\A:([a-zA-Z0-9+-_]+):""")
"""\A:([a-zA-Z0-9+-_]+):""".asciiPattern()
, NodeType.EMOJI
)
)
// // プロフ絵文字
// addParser(
// ":"
// , simpleParser(
// Pattern.compile("""\A:(@[a-zA-Z0-9+-_]+(?:@[${TootTag.w}.-]+[a-z0-9]+)?):""",Pattern.CASE_INSENSITIVE)
// , NodeType.EMOJI
// )
// )
// モーション
addParser(
"("
, simpleParser(
Pattern.compile("""\A\Q(((\E(.+?)\Q)))\E""", Pattern.DOTALL)
"""\A\Q(((\E(.+?)\Q)))\E""".asciiPattern(Pattern.DOTALL)
, NodeType.MOTION
)
)
val reHtmlTag = Pattern.compile("""\A<([a-z]+)>(.+?)</\1>""", Pattern.DOTALL)
val reHtmlTag = """\A<([a-z]+)>(.+?)</\1>""".asciiPattern(Pattern.DOTALL)
addParser("<", {
val matcher = remainMatcher(reHtmlTag)
@ -1532,19 +1536,21 @@ object MisskeyMarkdownDecoder {
// 処理順序に意味があるので入れ替えないこと
// 記号列が長い順にパースを試す
, simpleParser(
Pattern.compile("""^\Q***\E(.+?)\Q***\E""")
"""^\Q***\E(.+?)\Q***\E""".asciiPattern()
, NodeType.BIG
)
, simpleParser(
Pattern.compile("""^\Q**\E(.+?)\Q**\E""")
"""^\Q**\E(.+?)\Q**\E""".asciiPattern()
, NodeType.BOLD
)
)
val reAlnum = Pattern.compile("""[A-Z0-9]""", Pattern.CASE_INSENSITIVE)
val reAlnum = """[A-Za-z0-9]""".asciiPattern()
// http(s)://....
val reUrl = Pattern.compile("""\A(https?://[\w/:%#@${'$'}&?!()\[\]~.=+\-]+)""")
val reUrl = """\A(https?://[\w/:%#@${'$'}&?!()\[\]~.=+\-]+)"""
.asciiPattern()
addParser("h", {
// 直前の文字が英数字ならURLの開始とはみなさない
@ -1567,10 +1573,8 @@ object MisskeyMarkdownDecoder {
})
// 検索
val reSearchButton = Pattern.compile(
"""\A(検索|\[検索]|Search|\[Search])(\n|\z)"""
, Pattern.CASE_INSENSITIVE
)
val reSearchButton = """\A(検索|\[検索]|Search|\[Search])(\n|\z)"""
.asciiPattern(Pattern.CASE_INSENSITIVE)
fun NodeParseEnv.parseSearchPrev() : String? {
val prev = text.substring(lastEnd, pos)
@ -1608,14 +1612,13 @@ object MisskeyMarkdownDecoder {
// [title] 【title】
// 直後に改行が必要だったが文末でも良いことになった https://github.com/syuilo/misskey/commit/79ffbf95db9d0cc019d06ab93b1bfa6ba0d4f9ae
val titleParser = simpleParser(
Pattern.compile("""\A[【\[](.+?)[】\]](\n|\z)""")
"""\A[【\[](.+?)[】\]](\n|\z)""".asciiPattern()
, NodeType.TITLE
)
// Link
val reLink = Pattern.compile(
"""\A\??\[([^\n\[\]]+?)]\((https?://[\w/:%#@${'$'}&?!()\[\]~.=+\-]+?)\)"""
)
val reLink = """\A\??\[([^\n\[\]]+?)]\((https?://[\w/:%#@${'$'}&?!()\[\]~.=+\-]+?)\)"""
.asciiPattern()
val linkParser : NodeParseEnv.() -> NodeDetected? = {
val matcher = remainMatcher(reLink)
@ -1662,7 +1665,7 @@ object MisskeyMarkdownDecoder {
addParser("@", {
val matcher = remainMatcher(TootAccount.reMention)
val matcher = remainMatcher(TootAccount.reMisskeyMentionMFM)
when {
! matcher.find() -> null
@ -1671,22 +1674,30 @@ object MisskeyMarkdownDecoder {
// 直前の文字がメールアドレスの@の手前に使える文字ならメンションではない
pos > 0 && mailChars.get(text.codePointBefore(pos)) -> null
else -> makeDetected(
NodeType.MENTION,
arrayOf(
matcher.groupEx(1) !!,
matcher.groupEx(2) ?: "" // username, host
),
matcher.start(), matcher.end(),
"", 0, 0
)
else -> {
log.d(
"mention detected: ${matcher.group(1)},${matcher.group(2)},${matcher.group(
0
)}"
)
makeDetected(
NodeType.MENTION,
arrayOf(
matcher.groupEx(1) !!,
matcher.groupEx(2) ?: "" // username, host
),
matcher.start(), matcher.end(),
"", 0, 0
)
}
}
}
})
// Hashtag
val reHashtag = Pattern.compile("""\A#([^\s.,!?#:]+)""")
val reDigitsOnly = Pattern.compile("""\A\d*\z""")
val reHashtag = """\A#([^\s.,!?#:]+)""".asciiPattern()
val reDigitsOnly = """\A\d*\z""".asciiPattern()
addParser("#", {
if(pos > 0 && MatcherCache.matcher(reAlnum, text, pos - 1, pos).find()) {
@ -1720,7 +1731,7 @@ object MisskeyMarkdownDecoder {
addParser(
"`"
, simpleParser(
Pattern.compile("""\A```(?:.*)\n([\s\S]+?)\n```(?:\n|$)""")
"""\A```(?:.*)\n([\s\S]+?)\n```(?:\n|$)""".asciiPattern()
, NodeType.CODE_BLOCK
/*
(A)
@ -1738,7 +1749,7 @@ object MisskeyMarkdownDecoder {
)
, simpleParser(
// インラインコードは内部にとある文字を含むと認識されない。理由は顔文字と衝突するからだとか
Pattern.compile("""\A`([^`´\x0d\x0a]+)`""")
"""\A`([^`´\x0d\x0a]+)`""".asciiPattern()
, NodeType.CODE_INLINE
)
)

View File

@ -18,6 +18,7 @@ import jp.juggler.subwaytooter.R
import jp.juggler.subwaytooter.api.entity.Acct
import jp.juggler.subwaytooter.view.MyEditText
import jp.juggler.util.LogCategory
import jp.juggler.util.asciiPattern
import jp.juggler.util.getAttributeColor
import jp.juggler.util.groupEx
import java.util.*
@ -37,7 +38,7 @@ internal class PopupAutoCompleteAcct(
internal val log = LogCategory("PopupAutoCompleteAcct")
// 絵文字ショートコードにマッチするとても雑な正規表現
private val reLastShortCode = Pattern.compile(""":([^\s:]+):\z""")
private val reLastShortCode = """:([^\s:]+):\z""".asciiPattern()
}
private val acct_popup : PopupWindow

View File

@ -40,11 +40,10 @@ class PostHelper(
companion object {
private val log = LogCategory("PostHelper")
private val reCharsNotEmoji = Pattern.compile("[^0-9A-Za-z_-]")
private val reAscii = Pattern.compile("""[\x00-\x7f]""")
private val reNotAscii = Pattern.compile("""[^\x00-\x7f]""")
private val reCharsNotEmoji = "[^0-9A-Za-z_-]".asciiPattern()
private val reAscii = """[\x00-\x7f]""".asciiPattern()
private val reNotAscii = """[^\x00-\x7f]""".asciiPattern()
}
@ -384,26 +383,20 @@ class PostHelper(
if(visibility_checked == TootVisibility.DirectSpecified || visibility_checked == TootVisibility.DirectPrivate) {
val userIds = JsonArray()
val reMention =
Pattern.compile("(?:\\A|\\s)@([a-zA-Z0-9_]{1,20})(?:@([\\w.:-]+))?(?:\\z|\\s)")
val m = reMention.matcher(content)
val m = TootAccount.reMisskeyMentionPost.matcher(content)
while(m.find()) {
val username = m.groupEx(1)
val host = m.groupEx(2)
val host = m.groupEx(2) // may null
result = client.request(
"/api/users/show",
account.putMisskeyApiToken().apply {
if(username?.isNotEmpty() == true) put(
"username",
username
)
if(host?.isNotEmpty() == true) put(
"host",
host
)
}
.toPostRequestBuilder()
if(username?.isNotEmpty() == true)
put("username",username)
if(host?.isNotEmpty() == true)
put("host",host)
}.toPostRequestBuilder()
)
val id = result?.jsonObject?.string("id")
if(id?.isNotEmpty() == true) {

View File

@ -1,5 +1,6 @@
package jp.juggler.subwaytooter.util
import jp.juggler.util.asciiPattern
import jp.juggler.util.groupEx
import java.math.BigInteger
import java.util.ArrayList
@ -104,7 +105,7 @@ class VersionString(src : String?) : Comparable<VersionString> {
return c == '.' || c == ' '
}
private val reRcX = Pattern.compile("rc(\\d*)")
private val reRcX = "rc(\\d*)".asciiPattern()
private fun checkTail(b : Any) : Int {
// 1.0 < 1.0.n => -1

View File

@ -0,0 +1,97 @@
package jp.juggler.util
import java.util.regex.Pattern
/*
java.util.regex.Patternは Oracle JVM Android で大きく異なる
Androidの正規表現エンジンはICUベースで文字クラスは常にUnicodeで扱われる
AndroidのAPIリファレンスで UNICODE_CHARACTER_CLASS を見ると
"This flag has no effect on Android, unicode character classes are always used." と書いてある
JVMでUNICODE_CHARACTER_CLASSフラグなしの場合はこう
\s 空白文字: [\t\n\x0B\f\r]
\d 数字: [0-9]
\w 単語構成文字: [a-zA-Z_0-9]
JVMでJVMでUNICODE_CHARACTER_CLASSフラグありの場合はこう
\s 空白文字: \p{IsWhite_Space}
\d 数字: \p{IsDigit}
\w 単語構成文字: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}]
ICUの場合はこう
http://userguide.icu-project.org/strings/regexp
\s Match a white space character. White space is defined as [\t\n\f\r\p{Z}].
\w Match a word character. Word characters are [\p{Alphabetic}\p{Mark}\p{Decimal_Number}\p{Connector_Punctuation}\u200c\u200d].
\d Match any character with the Unicode General Category of Nd (Number, Decimal Digit.)
とりあえず \d \D \w \W は凄く困るので 正規表現を書き換えてなんとかしたい
なおJVMもICUも [A-Z[a-z]] と書くと [A-Za-z]と同じ事になる
よって [^\w.-] [^[A-Za-z0-9].-] に変換しても問題ない
困るのは\W \D の方だがSTのコードを見た感じ\Wは使っていないしDを文字クラスの中で使っていることもなかった
*/
fun String.asciiPattern(flags : Int = 0) : Pattern =
Pattern.compile(this.asciiPatternString(), flags)
fun String.asciiPatternString() : String {
val dst = StringBuilder()
dst.ensureCapacity(this.length)
var escaped = false
var insideSet = false
for(c in this) {
if(escaped) {
escaped = false
when(c) {
'w' -> if(insideSet) {
dst.append("A-Za-z0-9_")
} else {
dst.append("[A-Za-z0-9_]")
}
'd' -> if(insideSet) {
dst.append("0-9")
} else {
dst.append("[0-9]")
}
'W' -> {
if(insideSet) {
// 対応できないのでそのまま通す
dst.append('\\')
dst.append(c)
} else {
dst.append("[^A-Za-z0-9_]")
}
}
'D' -> {
if(insideSet) {
// 対応できないのでそのまま通す
dst.append('\\')
dst.append(c)
} else {
dst.append("[^0-9]")
}
}
else -> {
dst.append('\\')
dst.append(c)
}
}
} else if(c == '\\') {
escaped = true
} else {
dst.append(c)
if(c == '[') {
insideSet = true
} else if(c == ']' && insideSet) {
insideSet = false
}
}
}
if(escaped) dst.append('\\')
return dst.toString()
}

View File

@ -78,7 +78,7 @@ object CharacterGroup {
}
}.toString()
)
Pattern.compile("[${quotedKeys}]+")
"[${quotedKeys}]+".asciiPattern()
}
private fun SparseBooleanArray.keys() = (0 until size()).map { keyAt(it) }
@ -88,7 +88,7 @@ object CharacterGroup {
.map { it.toChar() }
.filter { it != '\n' }
.joinToString("")
Pattern.compile("[${whitespaces}]+\n")
"[${whitespaces}]+\n".asciiPattern()
}
// 文字列のリストからグループIDを決定する

View File

@ -808,7 +808,7 @@ class JsonTokenizer(reader : Reader) {
}
}
private val reNumber = Pattern.compile("""-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?""")
private val reNumber = """-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?""".asciiPattern()
private fun Writer.writeQuote(string : String) : Writer {
if(string.isEmpty()) {

View File

@ -1,8 +1,8 @@
package jp.juggler.subwaytooter
import org.junit.Assert.assertEquals
import org.junit.Test
import java.net.IDN
import kotlin.test.assertEquals
class TestIDN {

View File

@ -0,0 +1,48 @@
package jp.juggler.subwaytooter
import jp.juggler.subwaytooter.api.entity.TootAccount
import jp.juggler.util.asciiPattern
import jp.juggler.util.asciiPatternInternal
import org.junit.Assert.assertEquals
import org.junit.Assert.fail
import org.junit.Test
class TestMisskeyMention {
// @Test
// fun test1(){
// fun findMention(str:String):String?{
// val m = TootAccount.reMention.matcher(str)
// return if(m.find()) m.group(0) else null
// }
// assertEquals(null,findMention(""))
// assertEquals(null,findMention("tateisu"))
// assertEquals("@tateisu",findMention("@tateisu"))
// assertEquals("@tateisu",findMention("@tateisuほげ"))
// assertEquals("@tateisu@mastodon.juggler.jp",findMention("@tateisu@mastodon.juggler.jp"))
// assertEquals("@tateisu@mastodon.juggler.jp",findMention("@tateisu@mastodon.juggler.jpほげ"))
// assertEquals("@tateisu",findMention("@tateisu@マストドン3.juggler.jp"))
// assertEquals("@tateisu@xn--3-pfuzbe6htf.juggler.jp",findMention("@tateisu@xn--3-pfuzbe6htf.juggler.jp"))
// }
@Test
@Throws(Exception::class)
fun testAsciiPatternInternal() {
// \w \d \W \D 以外の文字は素通しする
assertEquals("""ab\c\\""", """ab\c\\""".asciiPatternInternal())
assertEquals("""[A-Za-z0-9_]""", """\w""".asciiPatternInternal())
assertEquals("""[A-Za-z0-9_-]""", """[\w-]""".asciiPatternInternal())
assertEquals("""[^A-Za-z0-9_]""", """\W""".asciiPatternInternal())
assertEquals("""[0-9]""", """\d""".asciiPatternInternal())
assertEquals("""[0-9:-]""", """[\d:-]""".asciiPatternInternal())
assertEquals("""[^0-9]""", """\D""".asciiPatternInternal())
// 文字セットの中の \W \D は変換できないので素通しする
assertEquals("""[\W]""", """[\W]""".asciiPatternInternal())
assertEquals("""[\D]""", """[\D]""".asciiPatternInternal())
// エスケープ文字の後に何もない場合も素通しする
assertEquals("""\""", """\""".asciiPatternInternal())
}
}