diff --git a/_Emoji/emojiConverter/.idea/compiler.xml b/_Emoji/emojiConverter/.idea/compiler.xml
index 3e828077..245a82c8 100644
--- a/_Emoji/emojiConverter/.idea/compiler.xml
+++ b/_Emoji/emojiConverter/.idea/compiler.xml
@@ -1,6 +1,6 @@
-
+
\ No newline at end of file
diff --git a/_Emoji/emojiConverter/.idea/misc.xml b/_Emoji/emojiConverter/.idea/misc.xml
index f07d57a4..4f9b6d51 100644
--- a/_Emoji/emojiConverter/.idea/misc.xml
+++ b/_Emoji/emojiConverter/.idea/misc.xml
@@ -1,7 +1,7 @@
-
+
\ No newline at end of file
diff --git a/_Emoji/emojiConverter/build.gradle b/_Emoji/emojiConverter/build.gradle
index 36cceb9f..16f7cc49 100644
--- a/_Emoji/emojiConverter/build.gradle
+++ b/_Emoji/emojiConverter/build.gradle
@@ -1,6 +1,6 @@
plugins {
id 'java'
- id 'org.jetbrains.kotlin.jvm' version '1.5.10'
+ id 'org.jetbrains.kotlin.jvm' version '1.7.10'
}
group 'jp.juggler'
@@ -12,22 +12,22 @@ repositories {
dependencies {
implementation fileTree(include: ['*.jar'], dir: 'src/lib')
- implementation "com.google.guava:guava:28.1-jre"
+ implementation "com.google.guava:guava:31.1-jre"
implementation "org.jetbrains.kotlin:kotlin-stdlib"
- testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0'
+ testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
- def ktorVersion="1.5.0"
+ def ktorVersion="2.0.3"
implementation "io.ktor:ktor-client-core:$ktorVersion"
implementation "io.ktor:ktor-client-cio:$ktorVersion"
- implementation "io.ktor:ktor-client-features:$ktorVersion"
+// implementation "io.ktor:ktor-client-features:$ktorVersion"
implementation "io.ktor:ktor-client-encoding:$ktorVersion"
// StringEscapeUtils.unescapeHtml4
implementation "org.apache.commons:commons-text:1.9"
// HTML5パーサ
- implementation "org.jsoup:jsoup:1.13.1"
+ implementation "org.jsoup:jsoup:1.14.3"
}
test {
diff --git a/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties b/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties
index be52383e..aa991fce 100644
--- a/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties
+++ b/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt
index 019f8b40..7b22aa16 100644
--- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt
+++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt
@@ -860,7 +860,7 @@ private fun Writer.writeQuote(string: String): Writer {
in '\u0080' until '\u00a0',
in '\u2000' until '\u2100' -> {
write("\\u")
- val hexCode: String = Integer.toHexString(c.toInt())
+ val hexCode: String = Integer.toHexString(c.code)
write("0000", 0, 4 - hexCode.length)
write(hexCode)
}
@@ -1047,7 +1047,7 @@ fun Writer.writeJsonValue(
}
}
- value is Char -> writeJsonValue(indentFactor, indent, value.toInt())
+ value is Char -> writeJsonValue(indentFactor, indent, value.code)
value is String -> writeQuote(value)
value is Enum<*> -> writeQuote(value.name)
diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt
index 32ef06c2..3335eb1c 100644
--- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt
+++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt
@@ -1,7 +1,7 @@
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
-import io.ktor.client.features.*
+import io.ktor.client.plugins.HttpTimeout
import jp.juggler.subwaytooter.emoji.model.*
import kotlinx.coroutines.runBlocking
import org.apache.commons.text.StringEscapeUtils
@@ -11,7 +11,6 @@ import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.IOException
-
//pngフォルダにある画像ファイルを参照する
//emoji-data/emoji.json を参照する
//
@@ -26,16 +25,16 @@ const val pathCwebp = "C:/cygwin64/bin/cwebp.exe"
val emojiDataCodepointsVendors = arrayOf("docomo", "au", "softbank", "google")
fun copyFile(dst: File, src: File) {
- try {
- FileInputStream(src).use { streamIn ->
- FileOutputStream(dst).use { streamOut ->
- streamOut.write(streamIn.readAllBytes())
- }
- }
- } catch (ex: Throwable) {
- dst.delete()
- throw IOException("copyFile failed. src=$src dst=$dst", ex)
- }
+ try {
+ FileInputStream(src).use { streamIn ->
+ FileOutputStream(dst).use { streamOut ->
+ streamOut.write(streamIn.readBytes())
+ }
+ }
+ } catch (ex: Throwable) {
+ dst.delete()
+ throw IOException("copyFile failed. src=$src dst=$dst", ex)
+ }
}
//fun svgToVectorDrawable(dst: File, src: File) {
@@ -62,960 +61,963 @@ fun copyFile(dst: File, src: File) {
class App {
- companion object{
-
- const val fromCategoryHtml = "CategoryHtml"
-
-
- val reComment = """#.*""".toRegex()
-
- private val ignoreImagePath = arrayOf(
- "LICENSE",
-
- // fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない
- "noto-emoji/png/128/emoji_ufe82b.png",
- "noto-emoji/svg/emoji_ufe82b.svg",
-
- // mastodonのフォルダにある余計なファイル
- "mastodon/public/emoji/sheet_10.png",
- "mastodon/public/emoji/sheet_13.png",
- )
-
- // emojipediaにあるデータのうち、次のショートネームを持つ絵文字は無視する
- val ignoreShortName = arrayOf(
- "flag_for_texas_ustx"
- )
-
- private val ignoreEmojiOneShortNames = setOf(
- "man_in_tuxedo",
- "man_in_tuxedo_tone1", "tuxedo_tone1",
- "man_in_tuxedo_tone2", "tuxedo_tone2",
- "man_in_tuxedo_tone3", "tuxedo_tone3",
- "man_in_tuxedo_tone4", "tuxedo_tone4",
- "man_in_tuxedo_tone5", "tuxedo_tone5",
- )
- }
-
- // 修正用データ
- private val fixCode = HashMap()
- private val fixName = HashMap()
- private val fixCategory = ArrayList>()
- private val fixUnified = HashMap()
-
- // 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る
- // マップのキーはvariation selectorとZWJが除去される
- private val emojiMap = HashMap()
-
- private val emojipediaShortNames = HashMap()
-
- /////////////////////////////////////////////////
-
- private fun readFixData() {
- val fileFixData = "./fix_code.txt"
-
- File(fileFixData).forEachLine { lno, rawLine ->
- val line = rawLine
- .replace(reComment, "")
- .trim()
-
- val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line)
- if (mr != null) {
- val type = mr.groupValues[1]
- val arg1 = mr.groupValues[2]
-
- if (type == "unified") {
- val code = arg1.toCodepointList("fixUnified")!!
- val key = code.toKey("fixUnified")!!
- fixUnified[key] = code
- return@forEachLine
- }
-
- val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList()
- if (data.size != 1) return@forEachLine
- when (type) {
- "code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first()
- "name" -> fixName[arg1.toShortName("fixName")!!] = data.first()
- "category" -> Pair(arg1, data.first()).addTo(fixCategory)
- else -> error("$fileFixData $lno : bad fix_data type=$type")
- }
- }
- }
- }
+ companion object {
+
+ const val fromCategoryHtml = "CategoryHtml"
+
+
+ val reComment = """#.*""".toRegex()
+
+ private val ignoreImagePath = arrayOf(
+ "LICENSE",
+
+ // fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない
+ "noto-emoji/png/128/emoji_ufe82b.png",
+ "noto-emoji/svg/emoji_ufe82b.svg",
+
+ // mastodonのフォルダにある余計なファイル
+ "mastodon/public/emoji/sheet_10.png",
+ "mastodon/public/emoji/sheet_13.png",
+ )
+
+ // emojipediaにあるデータのうち、次のショートネームを持つ絵文字は無視する
+ val ignoreShortName = arrayOf(
+ "flag_for_texas_ustx"
+ )
+
+ private val ignoreEmojiOneShortNames = setOf(
+ "man_in_tuxedo",
+ "man_in_tuxedo_tone1", "tuxedo_tone1",
+ "man_in_tuxedo_tone2", "tuxedo_tone2",
+ "man_in_tuxedo_tone3", "tuxedo_tone3",
+ "man_in_tuxedo_tone4", "tuxedo_tone4",
+ "man_in_tuxedo_tone5", "tuxedo_tone5",
+ )
+ }
+
+ // 修正用データ
+ private val fixCode = HashMap()
+ private val fixName = HashMap()
+ private val fixCategory = ArrayList>()
+ private val fixUnified = HashMap()
+
+ // 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る
+ // マップのキーはvariation selectorとZWJが除去される
+ private val emojiMap = HashMap()
+
+ private val emojipediaShortNames = HashMap()
+
+ /////////////////////////////////////////////////
+
+ private fun readFixData() {
+ val fileFixData = "./fix_code.txt"
+
+ File(fileFixData).forEachLine { lno, rawLine ->
+ val line = rawLine
+ .replace(reComment, "")
+ .trim()
+
+ val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line)
+ if (mr != null) {
+ val type = mr.groupValues[1]
+ val arg1 = mr.groupValues[2]
+
+ if (type == "unified") {
+ val code = arg1.toCodepointList("fixUnified")!!
+ val key = code.toKey("fixUnified")!!
+ fixUnified[key] = code
+ return@forEachLine
+ }
+
+ val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList()
+ if (data.size != 1) return@forEachLine
+ when (type) {
+ "code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first()
+ "name" -> fixName[arg1.toShortName("fixName")!!] = data.first()
+ "category" -> Pair(arg1, data.first()).addTo(fixCategory)
+ else -> error("$fileFixData $lno : bad fix_data type=$type")
+ }
+ }
+ }
+ }
- // Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく
- // サイトにアクセスできなくなったら困るからな…
- @Suppress("FunctionName")
- private suspend fun readEmojipedia(client: HttpClient) :JsonObject {
+ // Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく
+ // サイトにアクセスできなくなったら困るからな…
+ @Suppress("FunctionName")
+ private suspend fun readEmojipedia(client: HttpClient): JsonObject {
- val fileEmojipedia = File("Emojipedia.json")
- if( fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject()
+ val fileEmojipedia = File("Emojipedia.json")
+ if (fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject()
- val dstRoot = JsonObject()
- val dstQualified = JsonArray().also{ dstRoot["qualifiedCode"] = it }
+ val dstRoot = JsonObject()
+ val dstQualified = JsonArray().also { dstRoot["qualifiedCode"] = it }
- for (url in arrayOf(
- "https://emojipedia.org/emoji-13.1/",
- "https://emojipedia.org/emoji-13.0/",
- "https://emojipedia.org/emoji-12.1/",
- "https://emojipedia.org/emoji-12.0/",
- "https://emojipedia.org/emoji-11.0/",
- "https://emojipedia.org/emoji-5.0/",
- "https://emojipedia.org/emoji-4.0/",
- "https://emojipedia.org/emoji-3.0/",
- "https://emojipedia.org/emoji-2.0/",
- "https://emojipedia.org/emoji-1.0/",
- )) {
- val root = client.cachedGetString(url, mapOf()).parseHtml(url)
-
- root.getElementsByClass("sidebar").forEach { it.remove() }
- root.getElementsByClass("categories").forEach { it.remove() }
-
- for (list in root.getElementsByTag("ul")) {
- for (li in list.getElementsByTag("li")) {
-
- val href = li.getElementsByTag("a")?.attr("href")
- .notEmpty() ?: continue
-
- val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
- ?.notEmpty() ?: continue
-
- dstQualified.add(jsonArray(spanText, href))
- }
- }
- }
-
- val dstCategory = JsonObject().also{ dstRoot["categories"]=it}
- categoryNames.forEach { category ->
- if (category.url == null) return@forEach
-
- val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it }
-
- val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
- val list = root.getElementsByClass("emoji-list").first()
- for (li in list.getElementsByTag("li")) {
- val href = li.getElementsByTag("a").attr("href")
- .notEmpty() ?: continue
+ for (url in arrayOf(
+ "https://emojipedia.org/emoji-14.0/",
+ "https://emojipedia.org/emoji-13.1/",
+ "https://emojipedia.org/emoji-13.0/",
+ "https://emojipedia.org/emoji-12.1/",
+ "https://emojipedia.org/emoji-12.0/",
+ "https://emojipedia.org/emoji-11.0/",
+ "https://emojipedia.org/emoji-5.0/",
+ "https://emojipedia.org/emoji-4.0/",
+ "https://emojipedia.org/emoji-3.0/",
+ "https://emojipedia.org/emoji-2.0/",
+ "https://emojipedia.org/emoji-1.0/",
+ )) {
+ val root = client.cachedGetString(url, mapOf()).parseHtml(url)
+ ?: error("parseHtml returns null!")
+
+ root.getElementsByClass("sidebar").forEach { it.remove() }
+ root.getElementsByClass("categories").forEach { it.remove() }
+
+ for (list in root.getElementsByTag("ul")) {
+ for (li in list.getElementsByTag("li")) {
+
+ val href = li.getElementsByTag("a").attr("href")
+ .notEmpty() ?: continue
+
+ val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
+ ?.notEmpty() ?: continue
+
+ dstQualified.add(jsonArray(spanText, href))
+ }
+ }
+ }
+
+ val dstCategory = JsonObject().also { dstRoot["categories"] = it }
+ categoryNames.forEach { category ->
+ if (category.url == null) return@forEach
+
+ val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it }
+
+ val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
+ ?: error("parseHtml returns null!")
+ val list = root.getElementsByClass("emoji-list").first()
+ ?: error("getElementsByClass(emoji-list) failed.")
+ for (li in list.getElementsByTag("li")) {
+ val href = li.getElementsByTag("a").attr("href")
+ .notEmpty() ?: continue
+
+ val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
+ ?.notEmpty() ?: continue
+
+ dstCategoryItems.add(jsonArray(spanText, href))
+ }
+ }
+
+ dstRoot.toString(2).encodeUtf8().saveTo(fileEmojipedia)
+ return dstRoot
+ }
+
+ ////////////////////////////////////////////////////////////////////////
+
+ // noto-emoji のファイル名はfe0fが欠けている
+ // あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく
+ private fun readEmojipediaQualified(root: JsonObject) {
+
+ val ignoreName2 = setOf(
+ "zero_width_joiner",
+ "variation_selector_16",
+ )
+
+ val cameFrom = "emojiQualified"
+
+ val hrefList = ArrayList>()
+
+ var countError = 0
+
+ for (cols in root.jsonArray("qualifiedCode")!!.filterIsInstance()) {
+ val spanText = cols[0] as String
+ var href = cols[1] as String
+
+ val code = spanText.listCodePoints().toCodepointList(cameFrom)
+ ?: error("can't get code from $spanText $href")
+
+
+
+ if (hrefList.any { it.first == href })
+ error("duplicate href: $href")
+
+ hrefList.add(Pair(href, code))
+
+ // https://emojipedia.org/80030/ Couple With Heart: Light Skin Tone
+ // ページ名が名前じゃないのを直す
+ if (href == "80030") href = "couple-with-heart-light-skin-tone"
+
+ val shortName = href.replace("/", "").toShortName(cameFrom)
+ ?: error("can't parse $href")
+
+ if (ignoreName2.contains(shortName.name)) {
+ log.w("skip ${shortName.name}")
+ continue
+ }
+
+ val key = code.toKey(cameFrom)
+ ?: error("can't get key from ${code.toHex()} ${shortName.name}")
+
+ if (!fixUnified.containsKey(key)) {
+ if (code.list.size == 1 && code.list.first() < 256) {
+ ++countError
+ log.e("bad unified code: $code")
+ } else {
+ fixUnified[key] = code
+ }
+ }
- val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
- ?.notEmpty() ?: continue
-
- dstCategoryItems.add(jsonArray(spanText, href))
- }
- }
-
- dstRoot.toString(2).encodeUtf8().saveTo(fileEmojipedia)
- return dstRoot
- }
-
- ////////////////////////////////////////////////////////////////////////
-
- // noto-emoji のファイル名はfe0fが欠けている
- // あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく
- private fun readEmojipediaQualified(root:JsonObject) {
-
- val ignoreName2 = setOf(
- "zero_width_joiner",
- "variation_selector_16",
- )
-
- val cameFrom = "emojiQualified"
-
- val hrefList = ArrayList>()
-
- var countError = 0
-
- for( cols in root.jsonArray("qualifiedCode")!!.filterIsInstance()) {
- val spanText = cols[0] as String
- var href = cols[1] as String
-
- val code = spanText.listCodePoints().toCodepointList(cameFrom)
- ?: error("can't get code from $spanText $href")
-
-
-
- if (hrefList.any { it.first == href })
- error("duplicate href: $href")
-
- hrefList.add(Pair(href, code))
-
- // https://emojipedia.org/80030/ Couple With Heart: Light Skin Tone
- // ページ名が名前じゃないのを直す
- if (href == "80030") href = "couple-with-heart-light-skin-tone"
-
- val shortName = href.replace("/", "").toShortName(cameFrom)
- ?: error("can't parse $href")
-
- if (ignoreName2.contains(shortName.name)) {
- log.w("skip ${shortName.name}")
- continue
- }
-
- val key = code.toKey(cameFrom)
- ?: error("can't get key from ${code.toHex()} ${shortName.name}")
-
- if (!fixUnified.containsKey(key)) {
- if (code.list.size == 1 && code.list.first() < 256) {
- ++countError
- log.e("bad unified code: $code")
- } else {
- fixUnified[key] = code
- }
- }
-
- if (ignoreShortName.any { it == shortName.name }) {
- log.w("skip shortname $shortName $code")
- continue
- }
-
- emojipediaShortNames[key] = shortName
- }
-
- // hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") }
-
- if(countError>0) error("please fix unified codes. countError=$countError")
- }
-
- private fun addEmojipediaShortnames() {
- for ((key, shortName) in emojipediaShortNames) {
- emojiMap[key]?.addShortName(shortName)
- }
- }
-
- // Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する
- private fun readCategoryShortName(root:JsonObject) {
- for(category in categoryNames){
- val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue
- for( cols in list.filterIsInstance()){
- val spanText = cols[0] as String
- val href = cols[1] as String
-
- val shortName = href.replace("/", "").toShortName(fromCategoryHtml)
- ?: error("can't parse $href")
-
- if (ignoreShortName.any { shortName.name == it }) continue
-
- val code = spanText.listCodePoints().toCodepointList(fromCategoryHtml)
- ?: error("can't parse code from $spanText")
-
- val key = code.toKey(fromCategoryHtml)
- val emoji = emojiMap[key]
- ?: error("can't find emoji. category=${category.name}, href=$href, spanText=$spanText")
-
- category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString())
- }
- }
- }
-
- // サブフォルダをスキャンして絵文字別に画像データを確定する
- private fun scanImageDir(
- cameFrom: String,
- dirPath: String,
- @Language("RegExp") codeSpec: String,
- unifiedQualifier: (CodepointList) -> CodepointList = { it }
- ) {
- val dir = File(dirPath)
- val reCodeSpec = codeSpec.toRegex()
- var countFound = 0
- var countCreate = 0
- var countError = 0
- val files = dir.listFiles() ?:error("listFiles returns null. $dir")
- for( imageFile in files){
- if (!imageFile.isFile) continue
- val unixPath = imageFile.path.replace("\\", "/")
- if (ignoreImagePath.any { unixPath.endsWith(it) }) continue
-
- val name = imageFile.name.replace("_border.", ".")
-
- val code = reCodeSpec.find(name)
- ?.groupValues
- ?.elementAtOrNull(1)
- ?.toCodepointList(cameFrom)
- ?: error("can't parse $name")
-
- ++countFound
-
- val key = code.toKey(cameFrom)!!
-
- var emoji = emojiMap[key]
- if (emoji == null) {
- val unified2 = fixUnified[key] ?: unifiedQualifier(code)
- if( unified2.list.size==1 && unified2.list.first()<256){
- ++countError
- log.e("bad unified code: $unified2 $unixPath")
- }
- emoji = Emoji(key, unified2)
-
- emojiMap[key] = emoji
- ++countCreate
- }
-
- emoji.imageFiles.add(Pair(imageFile, cameFrom))
- emoji.addCode(code)
- }
-
- log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
- if(countError>0) error("please fix unified codes. countError=$countError")
- }
-
- // サブフォルダをスキャンして絵文字別に画像データを確定する
- private fun scanEmojiImages() {
-
- scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
- scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""")
- scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""")
- scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code ->
- if (code.list.last() != 0xfe0f)
- "${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!!
- else
- code
- }
- scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""")
- scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""")
- scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""")
- scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""")
- }
-
- // 絵文字ごとにファイルをコピーする
- private fun copyImages() {
- var countSvg = 0
- var countPng = 0
- for (emoji in emojiMap.values) {
- val strResName = emoji.key.toResourceId()
- emoji.resName = strResName
- val (src, _) = emoji.imageFiles.first()
- if (src.name.endsWith("svg")) {
- ++countSvg
- val dst = File("assets/$strResName.svg")
- if (!dst.exists()) {
- //svgToVectorDrawable(dst, src)
- copyFile(dst, src)
- }
- } else {
- ++countPng
- val dst = File("drawable-nodpi/$strResName.webp")
- if (!dst.exists()) {
- val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path)
- val rv = pb.start().waitFor()
- if (rv != 0) error("cwebp failed. dst=$dst src=$src")
- }
- }
- }
- log.d("copyImage: countSvg=$countSvg, countPng=$countPng")
- }
-
- // emojiDataのjsonを読んで変換コードポイントやショートネームを追加する
- private fun readEmojiData() {
- for( src in File("./emoji-data/emoji.json")
- .readAllBytes()
- .decodeUtf8()
- .decodeJsonArray()
- .objectList()
- ){
- // 絵文字のコードポイント一覧
- var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
- var key = unified.toKey("EmojiDataJsonUnifiedKey")
- var emoji = emojiMap[key] ?: error("can't find emoji for $key")
-
- if (emoji.unified != unified) {
- log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}")
- emoji.addCode(unified)
- }
-
- src.stringArrayList("variations")
- ?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") }
- ?.forEach { emoji.addCode(it) }
-
- for (k in emojiDataCodepointsVendors) {
- src.string(k)?.toCodepointList("EmojiDataJson($k)")
- ?.let { emoji.addCode(it) }
- }
-
- // short_name のリスト
- val shortNames = HashSet().also { dst ->
- src.string("short_name")?.addTo(dst)
- src.stringArrayList("short_names")?.forEach {
- it.addTo(dst)
- }
- }.mapNotNull { it.toShortName("EmojiDataJson") }
-
- if (shortNames.isEmpty())
- error("emojiData ${src.string("unified")} has no shortName")
- shortNames.forEach { emoji.addShortName(it) }
-
- val parentEmoji = emoji
-
- // スキントーン
- src.jsonObject("skin_variations")?.let { skinVariations ->
- val parentName = shortNames.first()
- val skinToneUsed = HashSet()
- for ((k, data) in skinVariations.entries ) {
- if (data !is JsonObject) continue
-
- // 再帰呼び出しあり
- fun handleCode(list: IntArray, idx: Int, parentSuffix: Array, suffixIndex: Int) {
- val code = list.elementAtOrNull(idx) ?: return
- val modifier = skinToneModifiers[code]
- ?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName")
- skinToneUsed.add(code)
- val lastSuffix = modifier.suffixList[suffixIndex]
- val suffix =
- if (parentSuffix.contains(lastSuffix))
- parentSuffix
- else
- arrayOf(*parentSuffix, lastSuffix)
- if (idx < list.size - 1) {
- handleCode(list, idx + 1, suffix, suffixIndex)
- } else {
- unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!!
- key = unified.toKey("EmojiData(skinTone)")
- emoji = emojiMap[key] ?: error("can't find emoji for $key")
-
- emoji.addCode(unified)
- shortNames
- .mapNotNull { (it.name + suffix.joinToString("")).toShortName("EmojiData(skinTone)") }
- .forEach { emoji.addShortName(it) }
-
- emoji.addToneParent(parentEmoji)
- }
- }
-
- val codeList = k.toCodepointList("toneSpec")!!.list
- for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) {
- handleCode(codeList, 0, emptyArray(), suffixIndex)
- }
- }
- if (skinToneUsed.size != skinToneModifiers.size) {
- log.w("skin tone code not fully used: $parentName")
- }
- }
- }
- }
-
-
-
- private fun readEmojiOne() {
- val cameFrom = "EmojiOneJson"
- val root = File("./old-emojione.json")
- .readAllBytes()
- .decodeUtf8()
- .decodeJsonObject()
- for ((strCode, item) in root.entries) {
- if (item !is JsonObject) continue
-
- // コードを確認する
- val code = strCode.toCodepointList(cameFrom)
- ?: error("can't parse $strCode")
-
- val key = code.toKey(cameFrom)
- val emoji = emojiMap[key] ?: error("missing emoji for $key")
-
- val names = ArrayList()
- item.string("alpha code")?.let { names.add(it) }
- item.string("aliases")?.split("|")?.let { names.addAll(it) }
- names
- .mapNotNull { it.toShortName(cameFrom) }
- .filter { !ignoreEmojiOneShortNames.contains(it.name) }
- .forEach { emoji.addShortName(it) }
- }
- }
-
-
-
- private fun fixCategory() {
- val nameMap = HashMap().apply {
- for (emoji in emojiMap.values)
- for (shortName in emoji.shortNames)
- this[shortName] = emoji
- }
- for ((name, strShortName) in fixCategory) {
- val category = categoryNames.find { it.name == name }
- ?: error("fixCategory: missing category for $name")
- val shortName = strShortName.toShortName("fixCategory")
- ?: error("fixCategory: can't parse $strShortName")
- val emoji = nameMap[shortName]
- ?: error("fixCategory: missing emoji for $strShortName")
-
- category.addEmoji(emoji, addingName = shortName.toString())
- log.d("fixCategory $category ${emoji.resName} $shortName")
- }
- }
-
- private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this)
-
- private val vendorText = HashMap>()
- private val vendorUnicodeMap = HashMap>()
-
- private fun readVendorCode() {
- var error = false
- // まとまったxmlを読む
- // 優先順位の都合でベンダ別に読み直す
- val xml1 = File("emoji4unicode/data/emoji4unicode.xml")
- .readAllBytes()
- .decodeUtf8()
- for (vendor in arrayOf("docomo", "kddi", "softbank")) {
- """]+)""".toRegex().findAll(xml1).forEach { mr1 ->
- val attrs = HashMap()
- """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
- attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
- }
- val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach
- val strFrom = attrs[vendor] ?: return@forEach
- if (strFrom.indexOf(">") != -1) return@forEach
- val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach
- val text = "${attrs["name"]}/${attrs["text_fallback"]}"
- vendorText.prepare(from) { ArrayList() }.add(text)
- val old = vendorUnicodeMap[from]
- if (old != null) {
- if (old.second == "kddi" && vendor == "softbank") return@forEach
- error = true
- log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)")
- } else {
- vendorUnicodeMap[from] = Pair(unicode, vendor)
- }
- }
- }
-
- for (vendor in arrayOf("docomo", "kddi", "softbank")) {
-
- // ベンダ個別ファイルから説明文を読む
- val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml")
- .readAllBytes()
- .decodeUtf8()
-
- """]+)""".toRegex().findAll(xml).forEach { mr1 ->
- val attrs = HashMap()
- """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
- attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
- }
-
- val code = attrs["unicode"]?.toCodepointList("emoji4unicode")
- ?: return@forEach
-
- attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") }
- }
- }
-
- if (error) error("readVendorCode failed.")
- }
-
- private var hasConflict = false
-
- // コード=>画像の重複を調べる
- private fun checkCodeConflict() {
-
- val codeMap = HashMap>()
- for (emoji in emojiMap.values) {
- for (code in emoji.codes) {
- codeMap.prepare(code) { HashSet() }.add(emoji)
- }
- }
-
- for ((code, emojis) in codeMap.entries.sortedBy { it.key }) {
- if (emojis.size == 1) continue
-
- val fixResName = fixCode[code]
- if (fixResName != null) {
- var found = false
- for (emoji in emojis) {
- if (emoji.resName == fixResName) {
- found = true
- } else {
- emoji.codes.forEach {
- if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}")
- }
- emoji.removeCodeByCode(code)
- }
- }
- if (!found) error("checkCodeConflict: missing emoji resName=$fixResName")
- continue
- }
-
- val onlyVendorCode = emojis.all { emoji ->
- when (emoji.codes.find { it == code }?.from) {
- "EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true
- else -> false
- }
- }
-
- if (onlyVendorCode) {
- val preferCode = vendorUnicodeMap[code]?.first
- if (preferCode != null) {
- val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } }
- if (targetEmoji != null) {
- emojis.forEach { emoji ->
- if (emoji != targetEmoji) {
- emoji.codes.forEach {
- if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}")
- }
- emoji.removeCodeByCode(code)
- }
- }
- continue
- }
- log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode")
- }
- }
-
- log.e("checkCodeConflict: code $code ${vendorText[code]} ${
- emojis.joinToString(" ") {
- "${it.resName}/${it.unified.toRawString()}"
- }
- }")
- hasConflict = true
- }
-
- // コードのない絵文字のチェック
- for (emoji in emojiMap.values) {
- if (emoji.codes.isNotEmpty()) continue
- val fixes = fixCode.entries.filter { it.value == emoji.resName }
- when (fixes.size) {
- 0 -> {
- log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
- hasConflict = true
- }
- 1 -> {
- val fix = fixes.first()
- val code = fix.key
- emoji.addCode(code)
- log.i("fixCode code=$code resName=${emoji.resName}")
- }
- else -> {
- log.e("checkCodeConflict: multiple fix match for ${emoji.resName}")
- hasConflict = true
- }
- }
- }
- }
-
- private fun checkShortNameConflict() {
- val nameMap = HashMap>().apply {
- for (emoji in emojiMap.values) {
- for (name in emoji.shortNames) {
- prepare(name) { HashSet() }.add(emoji)
- }
- }
- }
-
- // cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない
- for (emoji in emojiMap.values) {
- if (emoji.shortNames.any { it.cameFrom != fromCategoryHtml }) {
- emoji.removeShortNameByCameFrom(fromCategoryHtml)
- }
- }
-
- for ((name, emojis) in nameMap.entries.sortedBy { it.key }) {
- // shortNameからemojiを1意に解決できるなら正常
- if (emojis.size == 1) continue
-
- // fixNameで解決する
- val fixResName = fixName[name]
- if (fixResName != null) {
- var found = false
- for (emoji in emojis) {
- if (emoji.resName == fixResName) {
- found = true
- } else {
- emoji.removeShortName(name.name)
- }
- }
- if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName")
- continue
- }
-
- // emoji,cameFrom のペアのリスト
- val froms = emojiMap.values
- .flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } }
- .filter { it.second == name }
- .map { Pair(it.first, it.second.cameFrom) }
-
- // どこ由来のShortNameかで優先順位をつける
- val preferFrom = froms.find { it.second == "EmojiDataJson" }
- ?: froms.find { it.second == "EmojiSpec" }
- ?: froms.find { it.second == "EmojiOneJson" }
- ?: froms.find { it.second == fromCategoryHtml }
-
- if (preferFrom != null) {
- // 優先順位の低いemojiからshortNameを除去する
- var found = false
- for (emoji in emojis) {
- if (emoji == preferFrom.first) {
- found = true
- } else {
- emoji.removeShortName(name.name)
- }
- }
- if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}")
- continue
- }
-
- // 解決できなかった
- log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}")
- hasConflict = true
- }
-
- // 名前のない絵文字のチェック
- for (emoji in emojiMap.values) {
- if (emoji.shortNames.isNotEmpty()) continue
- val fix = fixName.entries.filter { it.value == emoji.resName }
- if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}")
- if (fix.size == 1) {
- emoji.addShortName(fix.first().key)
- continue
- }
- log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
- hasConflict = true
- }
- }
-
- private fun fixToneParent() {
- var hasError = false
- val nameMap = HashMap()
- for (emoji in emojiMap.values) {
- for (shortName in emoji.shortNames) {
- nameMap[shortName.name] = emoji
- }
- }
- val suffixList = skinToneModifiers.values
- .flatMap { it.suffixList.toList() }
- .sortedByDescending { it.length }
-
-
- for (emoji in emojiMap.values) {
- // トーンの絵文字の一部は内部に他のトーンの名前を含むので誤検出を回避する
- if (emoji.resName in arrayOf("emj_1f3fc", "emj_1f3fe")) continue
-
- fun String.removeToneSuffix(): String {
- var name = this
- when (name) {
- "kiss_light_skin_tone" -> return "couplekiss"
- "kiss_medium_light_skin_tone" -> return "couplekiss"
- "kiss_medium_skin_tone" -> return "couplekiss"
- "kiss_medium_dark_skin_tone" -> return "couplekiss"
- "kiss_dark_skin_tone" -> return "couplekiss"
- }
-
- suffixList.forEach { name = name.replace(it, "") }
- return when (name) {
- "couple_with_heart_person_person" -> "couple_with_heart"
- "kiss_person_person" -> "couplekiss"
- "kiss_woman_woman" -> "woman_kiss_woman"
- "kiss_woman_man" -> "woman_kiss_man"
- "kiss_man_man" -> "man_kiss_man"
- else -> name
- }
- }
-
- for (shortName in emoji.shortNames) {
- val parent = nameMap[shortName.name.removeToneSuffix()]
- if (parent == emoji) continue
- if (parent == null) {
- log.e("${emoji.resName} $shortName looks like tone variation,but can't find parent.")
- hasError = true
- continue
- }
- emoji.addToneParent(parent)
- }
- }
-
- for (emoji in emojiMap.values) {
- val parents = emoji.toneParents
- if (parents.isEmpty()) continue
- if (parents.size > 1) {
- log.e("${emoji.resName} has many parents. ${parents.joinToString(",")}")
- hasError = true
- continue
- }
- parents.forEach { parent ->
- val toneCode = emoji.key.getToneCode("makeToneMap")
- ?: error("emoji $emoji has parent, but has no toneCode.")
- when (val old = parent.toneChildren[toneCode]) {
- null -> parent.toneChildren[toneCode] = emoji
- emoji -> {
- }
- else -> error("conflict toneChildren. emoji ${parent.resName} has $old and $emoji.")
- }
- }
- }
-
- if (hasError) error("toneParent error.")
- }
-
- private fun writeData(){
- val outFile = "emoji_map.txt"
- UnixPrinter(File(outFile)).use { writer ->
-
- // 絵文字をskipするか事前に調べる
- for (emoji in emojiMap.values.sortedBy { it.key }) {
-
- val codeSet = emoji.codeSet.sorted()
- if (codeSet.isEmpty()) {
- log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
- emoji.skip = true
- } else if (emoji.unified.list.isAsciiEmoji()) {
- log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
- emoji.skip = true
- }
- }
-
- for (emoji in emojiMap.values.sortedBy { it.key }) {
- if (emoji.skip) continue
-
- // 画像リソースID
- val strResName = emoji.resName
- if (File("assets/$strResName.svg").isFile) {
- writer.println("svg:$strResName.svg//${emoji.imageFiles.first().second}")
- } else {
- writer.println("drawable:$strResName//${emoji.imageFiles.first().second}")
- }
-
- // unified
- writer.println("un:${emoji.unified.toRawString()}//${emoji.unified.from}")
-
- // Unicodeシーケンス
- val codeSet = emoji.codeSet.sorted()
- for (code in codeSet) {
- if (code == emoji.unified) continue
- val raw = code.toRawString()
- if (raw.isEmpty()) error("too short code! ${emoji.resName}")
- writer.println("u:$raw//${code.from}")
- }
-
- // 画像リソースIDとshortcodeの関連付けを出力する
- // 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する
- val nameList = emoji.nameList.notEmpty()
- ?: error("missing shortName. ${emoji.resName}")
- nameList.forEachIndexed { index, triple ->
- val (_, name, froms) = triple
- val header = if (index == 0) "sn" else "s"
- writer.println("${header}:$name//${froms.joinToString(",")}")
- }
- }
-
- fun Category.printCategory(list:List){
- writer.println("cn:${this.name}")
- for(emoji in list){
- writer.println("c:${emoji.unified.toRawString()}")
- emoji.usedInCategory = this
- }
- }
-
- categoryNames.forEach { category ->
- category.printCategory(category.emojis.filter { !it.skip })
- }
-
- run{
- val category = categoryNames.find{ it.name == "Others"}!!
- category.printCategory(
- emojiMap.values
- .filter { it.usedInCategory == null && it.toneParents.isEmpty() }
- .sortedBy { it.shortNames.first() }
- )
- }
-
- // スキントーン
- emojiMap.values
- .filter { it.toneChildren.isNotEmpty() }
- .sortedBy { it.key }
- .forEach { parent ->
- if( parent.usedInCategory==null){
- log.e("parent ${parent.resName} not used in any category!")
- }
- parent.toneChildren.entries
- .toList()
- .sortedBy { it.key }
- .forEach eachChild@{
- val child = it.value
- if (child.skip) return@eachChild
- writer.println("t:${parent.unified.toRawString()},${it.key.toRawString()},${child.unified.toRawString()}")
- }
- }
-
- // 複合トーン
- run{
- val category = categoryNames.find { it.name == "ComplexTones" }!!
- category.printCategory(
- emojiMap.values
- .filter { it.toneChildren.isNotEmpty() }
- .sortedBy { it.key }
- .flatMap { parent ->
- if( parent.usedInCategory==null){
- log.e("parent ${parent.resName} not used in any category!")
- }
- parent.toneChildren.entries
- .toList()
- .filter { it.key.list.size > 1 }
- .sortedBy { it.key }
- .map{ it.value}
- }
- )
- }
- }
-
- log.d("wrote $outFile")
- }
-
- suspend fun run() {
-
- // 修正用データを読む
- readFixData()
-
- // emojipediaからバージョン別一覧とカテゴリ別一覧を読む
- val emojipediaData = HttpClient {
- install(HttpTimeout) {
- val t = 30000L
- requestTimeoutMillis = t
- connectTimeoutMillis = t
- socketTimeoutMillis = t
- }
- }.use { client ->
- readEmojipedia(client)
- }
-
- // 画像をスキャンする前に絵文字のqualified codeを調べておく
- readEmojipediaQualified(emojipediaData)
-
- // サブフォルダから絵文字の画像を収集する
- scanEmojiImages()
- // 収集した画像をコピーする
- copyImages()
-
- addEmojipediaShortnames()
-
- readVendorCode()
- readEmojiData()
- readEmojiOne()
- readCategoryShortName(emojipediaData)
-
- checkCodeConflict()
- checkShortNameConflict()
-
- fixToneParent()
-
-
- fixCategory()
-
- if (hasConflict) error("please fix conflicts.")
-
- // shortcodeに含まれる文字の種類を列挙する
- val nameChars = HashSet()
- val nameMap = HashMap()
- for (emoji in emojiMap.values) {
- for (shortName in emoji.shortNames) {
- nameMap[shortName] = emoji
- for (c in shortName.name)
- nameChars.add(c)
- }
- }
- log.i("nameChars: [${nameChars.sorted().joinToString("")}]")
-
- writeData()
-
- log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}")
- log.d("nameCameFroms: ${Emoji.nameCameFroms.joinToString(",")}")
- }
+ if (ignoreShortName.any { it == shortName.name }) {
+ log.w("skip shortname $shortName $code")
+ continue
+ }
+
+ emojipediaShortNames[key] = shortName
+ }
+
+ // hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") }
+
+ if (countError > 0) error("please fix unified codes. countError=$countError")
+ }
+
+ private fun addEmojipediaShortnames() {
+ for ((key, shortName) in emojipediaShortNames) {
+ emojiMap[key]?.addShortName(shortName)
+ }
+ }
+
+ // Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する
+ private fun readCategoryShortName(root: JsonObject) {
+ for (category in categoryNames) {
+ val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue
+ for (cols in list.filterIsInstance()) {
+ val spanText = cols[0] as String
+ val href = cols[1] as String
+
+ val shortName = href.replace("/", "").toShortName(fromCategoryHtml)
+ ?: error("can't parse $href")
+
+ if (ignoreShortName.any { shortName.name == it }) continue
+
+ val code = spanText.listCodePoints().toCodepointList(fromCategoryHtml)
+ ?: error("can't parse code from $spanText")
+
+ val key = code.toKey(fromCategoryHtml)
+ val emoji = emojiMap[key]
+ ?: error("can't find emoji. category=${category.name}, href=$href, spanText=$spanText")
+
+ category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString())
+ }
+ }
+ }
+
+ // サブフォルダをスキャンして絵文字別に画像データを確定する
+ private fun scanImageDir(
+ cameFrom: String,
+ dirPath: String,
+ @Language("RegExp") codeSpec: String,
+ unifiedQualifier: (CodepointList) -> CodepointList = { it }
+ ) {
+ val dir = File(dirPath)
+ val reCodeSpec = codeSpec.toRegex()
+ var countFound = 0
+ var countCreate = 0
+ var countError = 0
+ val files = dir.listFiles() ?: error("listFiles returns null. $dir")
+ for (imageFile in files) {
+ if (!imageFile.isFile) continue
+ val unixPath = imageFile.path.replace("\\", "/")
+ if (ignoreImagePath.any { unixPath.endsWith(it) }) continue
+
+ val name = imageFile.name.replace("_border.", ".")
+
+ val code = reCodeSpec.find(name)
+ ?.groupValues
+ ?.elementAtOrNull(1)
+ ?.toCodepointList(cameFrom)
+ ?: error("can't parse $name")
+
+ ++countFound
+
+ val key = code.toKey(cameFrom)!!
+
+ var emoji = emojiMap[key]
+ if (emoji == null) {
+ val unified2 = fixUnified[key] ?: unifiedQualifier(code)
+ if (unified2.list.size == 1 && unified2.list.first() < 256) {
+ ++countError
+ log.e("bad unified code: $unified2 $unixPath")
+ }
+ emoji = Emoji(key, unified2)
+
+ emojiMap[key] = emoji
+ ++countCreate
+ }
+
+ emoji.imageFiles.add(Pair(imageFile, cameFrom))
+ emoji.addCode(code)
+ }
+
+ log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
+ if (countError > 0) error("please fix unified codes. countError=$countError")
+ }
+
+ // サブフォルダをスキャンして絵文字別に画像データを確定する
+ @Suppress("RegExpSimplifiable")
+ private fun scanEmojiImages() {
+
+ scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code ->
+ if (code.list.last() != 0xfe0f)
+ "${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!!
+ else
+ code
+ }
+ scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""")
+ scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""")
+ }
+
+ // 絵文字ごとにファイルをコピーする
+ private fun copyImages() {
+ var countSvg = 0
+ var countPng = 0
+ for (emoji in emojiMap.values) {
+ val strResName = emoji.key.toResourceId()
+ emoji.resName = strResName
+ val (src, _) = emoji.imageFiles.first()
+ if (src.name.endsWith("svg")) {
+ ++countSvg
+ val dst = File("assets/$strResName.svg")
+ if (!dst.exists()) {
+ //svgToVectorDrawable(dst, src)
+ copyFile(dst, src)
+ }
+ } else {
+ ++countPng
+ val dst = File("drawable-nodpi/$strResName.webp")
+ if (!dst.exists()) {
+ val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path)
+ val rv = pb.start().waitFor()
+ if (rv != 0) error("cwebp failed. dst=$dst src=$src")
+ }
+ }
+ }
+ log.d("copyImage: countSvg=$countSvg, countPng=$countPng")
+ }
+
+ // emojiDataのjsonを読んで変換コードポイントやショートネームを追加する
+ private fun readEmojiData() {
+ for (src in File("./emoji-data/emoji.json")
+ .readAllBytes()
+ .decodeUtf8()
+ .decodeJsonArray()
+ .objectList()
+ ) {
+ // 絵文字のコードポイント一覧
+ var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
+ var key = unified.toKey("EmojiDataJsonUnifiedKey")
+ var emoji = emojiMap[key] ?: error("can't find emoji for $key")
+
+ if (emoji.unified != unified) {
+ log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}")
+ emoji.addCode(unified)
+ }
+
+ src.stringArrayList("variations")
+ ?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") }
+ ?.forEach { emoji.addCode(it) }
+
+ for (k in emojiDataCodepointsVendors) {
+ src.string(k)?.toCodepointList("EmojiDataJson($k)")
+ ?.let { emoji.addCode(it) }
+ }
+
+ // short_name のリスト
+ val shortNames = HashSet().also { dst ->
+ src.string("short_name")?.addTo(dst)
+ src.stringArrayList("short_names")?.forEach {
+ it.addTo(dst)
+ }
+ }.mapNotNull { it.toShortName("EmojiDataJson") }
+
+ if (shortNames.isEmpty())
+ error("emojiData ${src.string("unified")} has no shortName")
+ shortNames.forEach { emoji.addShortName(it) }
+
+ val parentEmoji = emoji
+
+ // スキントーン
+ src.jsonObject("skin_variations")?.let { skinVariations ->
+ val parentName = shortNames.first()
+ val skinToneUsed = HashSet()
+ for ((k, data) in skinVariations.entries) {
+ if (data !is JsonObject) continue
+
+ // 再帰呼び出しあり
+ fun handleCode(list: IntArray, idx: Int, parentSuffix: Array, suffixIndex: Int) {
+ val code = list.elementAtOrNull(idx) ?: return
+ val modifier = skinToneModifiers[code]
+ ?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName")
+ skinToneUsed.add(code)
+ val lastSuffix = modifier.suffixList[suffixIndex]
+ val suffix =
+ if (parentSuffix.contains(lastSuffix))
+ parentSuffix
+ else
+ arrayOf(*parentSuffix, lastSuffix)
+ if (idx < list.size - 1) {
+ handleCode(list, idx + 1, suffix, suffixIndex)
+ } else {
+ unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!!
+ key = unified.toKey("EmojiData(skinTone)")
+ emoji = emojiMap[key] ?: error("can't find emoji for $key")
+
+ emoji.addCode(unified)
+ shortNames
+ .mapNotNull { (it.name + suffix.joinToString("")).toShortName("EmojiData(skinTone)") }
+ .forEach { emoji.addShortName(it) }
+
+ emoji.addToneParent(parentEmoji)
+ }
+ }
+
+ val codeList = k.toCodepointList("toneSpec")!!.list
+ for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) {
+ handleCode(codeList, 0, emptyArray(), suffixIndex)
+ }
+ }
+ if (skinToneUsed.size != skinToneModifiers.size) {
+ log.w("skin tone code not fully used: $parentName")
+ }
+ }
+ }
+ }
+
+
+ private fun readEmojiOne() {
+ val cameFrom = "EmojiOneJson"
+ val root = File("./old-emojione.json")
+ .readAllBytes()
+ .decodeUtf8()
+ .decodeJsonObject()
+ for ((strCode, item) in root.entries) {
+ if (item !is JsonObject) continue
+
+ // コードを確認する
+ val code = strCode.toCodepointList(cameFrom)
+ ?: error("can't parse $strCode")
+
+ val key = code.toKey(cameFrom)
+ val emoji = emojiMap[key] ?: error("missing emoji for $key")
+
+ val names = ArrayList()
+ item.string("alpha code")?.let { names.add(it) }
+ item.string("aliases")?.split("|")?.let { names.addAll(it) }
+ names
+ .mapNotNull { it.toShortName(cameFrom) }
+ .filter { !ignoreEmojiOneShortNames.contains(it.name) }
+ .forEach { emoji.addShortName(it) }
+ }
+ }
+
+
+ private fun fixCategory() {
+ val nameMap = HashMap().apply {
+ for (emoji in emojiMap.values)
+ for (shortName in emoji.shortNames)
+ this[shortName] = emoji
+ }
+ for ((name, strShortName) in fixCategory) {
+ val category = categoryNames.find { it.name == name }
+ ?: error("fixCategory: missing category for $name")
+ val shortName = strShortName.toShortName("fixCategory")
+ ?: error("fixCategory: can't parse $strShortName")
+ val emoji = nameMap[shortName]
+ ?: error("fixCategory: missing emoji for $strShortName")
+
+ category.addEmoji(emoji, addingName = shortName.toString())
+ log.d("fixCategory $category ${emoji.resName} $shortName")
+ }
+ }
+
+ private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this)
+
+ private val vendorText = HashMap>()
+ private val vendorUnicodeMap = HashMap>()
+
+ private fun readVendorCode() {
+ var error = false
+ // まとまったxmlを読む
+ // 優先順位の都合でベンダ別に読み直す
+ val xml1 = File("emoji4unicode/data/emoji4unicode.xml")
+ .readAllBytes()
+ .decodeUtf8()
+ for (vendor in arrayOf("docomo", "kddi", "softbank")) {
+ """]+)""".toRegex().findAll(xml1).forEach { mr1 ->
+ val attrs = HashMap()
+ """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
+ attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
+ }
+ val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach
+ val strFrom = attrs[vendor] ?: return@forEach
+ if (strFrom.indexOf(">") != -1) return@forEach
+ val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach
+ val text = "${attrs["name"]}/${attrs["text_fallback"]}"
+ vendorText.prepare(from) { ArrayList() }.add(text)
+ val old = vendorUnicodeMap[from]
+ if (old != null) {
+ if (old.second == "kddi" && vendor == "softbank") return@forEach
+ error = true
+ log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)")
+ } else {
+ vendorUnicodeMap[from] = Pair(unicode, vendor)
+ }
+ }
+ }
+
+ for (vendor in arrayOf("docomo", "kddi", "softbank")) {
+
+ // ベンダ個別ファイルから説明文を読む
+ val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml")
+ .readAllBytes()
+ .decodeUtf8()
+
+ """]+)""".toRegex().findAll(xml).forEach { mr1 ->
+ val attrs = HashMap()
+ """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
+ attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
+ }
+
+ val code = attrs["unicode"]?.toCodepointList("emoji4unicode")
+ ?: return@forEach
+
+ attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") }
+ }
+ }
+
+ if (error) error("readVendorCode failed.")
+ }
+
+ private var hasConflict = false
+
+ // コード=>画像の重複を調べる
+ private fun checkCodeConflict() {
+
+ val codeMap = HashMap>()
+ for (emoji in emojiMap.values) {
+ for (code in emoji.codes) {
+ codeMap.prepare(code) { HashSet() }.add(emoji)
+ }
+ }
+
+ for ((code, emojis) in codeMap.entries.sortedBy { it.key }) {
+ if (emojis.size == 1) continue
+
+ val fixResName = fixCode[code]
+ if (fixResName != null) {
+ var found = false
+ for (emoji in emojis) {
+ if (emoji.resName == fixResName) {
+ found = true
+ } else {
+ emoji.codes.forEach {
+ if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}")
+ }
+ emoji.removeCodeByCode(code)
+ }
+ }
+ if (!found) error("checkCodeConflict: missing emoji resName=$fixResName")
+ continue
+ }
+
+ val onlyVendorCode = emojis.all { emoji ->
+ when (emoji.codes.find { it == code }?.from) {
+ "EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true
+ else -> false
+ }
+ }
+
+ if (onlyVendorCode) {
+ val preferCode = vendorUnicodeMap[code]?.first
+ if (preferCode != null) {
+ val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } }
+ if (targetEmoji != null) {
+ emojis.forEach { emoji ->
+ if (emoji != targetEmoji) {
+ emoji.codes.forEach {
+ if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}")
+ }
+ emoji.removeCodeByCode(code)
+ }
+ }
+ continue
+ }
+ log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode")
+ }
+ }
+
+ log.e("checkCodeConflict: code $code ${vendorText[code]} ${
+ emojis.joinToString(" ") {
+ "${it.resName}/${it.unified.toRawString()}"
+ }
+ }")
+ hasConflict = true
+ }
+
+ // コードのない絵文字のチェック
+ for (emoji in emojiMap.values) {
+ if (emoji.codes.isNotEmpty()) continue
+ val fixes = fixCode.entries.filter { it.value == emoji.resName }
+ when (fixes.size) {
+ 0 -> {
+ log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
+ hasConflict = true
+ }
+ 1 -> {
+ val fix = fixes.first()
+ val code = fix.key
+ emoji.addCode(code)
+ log.i("fixCode code=$code resName=${emoji.resName}")
+ }
+ else -> {
+ log.e("checkCodeConflict: multiple fix match for ${emoji.resName}")
+ hasConflict = true
+ }
+ }
+ }
+ }
+
+ private fun checkShortNameConflict() {
+ val nameMap = HashMap>().apply {
+ for (emoji in emojiMap.values) {
+ for (name in emoji.shortNames) {
+ prepare(name) { HashSet() }.add(emoji)
+ }
+ }
+ }
+
+ // cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない
+ for (emoji in emojiMap.values) {
+ if (emoji.shortNames.any { it.cameFrom != fromCategoryHtml }) {
+ emoji.removeShortNameByCameFrom(fromCategoryHtml)
+ }
+ }
+
+ for ((name, emojis) in nameMap.entries.sortedBy { it.key }) {
+ // shortNameからemojiを1意に解決できるなら正常
+ if (emojis.size == 1) continue
+
+ // fixNameで解決する
+ val fixResName = fixName[name]
+ if (fixResName != null) {
+ var found = false
+ for (emoji in emojis) {
+ if (emoji.resName == fixResName) {
+ found = true
+ } else {
+ emoji.removeShortName(name.name)
+ }
+ }
+ if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName")
+ continue
+ }
+
+ // emoji,cameFrom のペアのリスト
+ val froms = emojiMap.values
+ .flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } }
+ .filter { it.second == name }
+ .map { Pair(it.first, it.second.cameFrom) }
+
+ // どこ由来のShortNameかで優先順位をつける
+ val preferFrom = froms.find { it.second == "EmojiDataJson" }
+ ?: froms.find { it.second == "EmojiSpec" }
+ ?: froms.find { it.second == "EmojiOneJson" }
+ ?: froms.find { it.second == fromCategoryHtml }
+
+ if (preferFrom != null) {
+ // 優先順位の低いemojiからshortNameを除去する
+ var found = false
+ for (emoji in emojis) {
+ if (emoji == preferFrom.first) {
+ found = true
+ } else {
+ emoji.removeShortName(name.name)
+ }
+ }
+ if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}")
+ continue
+ }
+
+ // 解決できなかった
+ log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}")
+ hasConflict = true
+ }
+
+ // 名前のない絵文字のチェック
+ for (emoji in emojiMap.values) {
+ if (emoji.shortNames.isNotEmpty()) continue
+ val fix = fixName.entries.filter { it.value == emoji.resName }
+ if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}")
+ if (fix.size == 1) {
+ emoji.addShortName(fix.first().key)
+ continue
+ }
+ log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
+ hasConflict = true
+ }
+ }
+
+ private fun fixToneParent() {
+ var hasError = false
+ val nameMap = HashMap()
+ for (emoji in emojiMap.values) {
+ for (shortName in emoji.shortNames) {
+ nameMap[shortName.name] = emoji
+ }
+ }
+ val suffixList = skinToneModifiers.values
+ .flatMap { it.suffixList.toList() }
+ .sortedByDescending { it.length }
+
+
+ for (emoji in emojiMap.values) {
+ // トーンの絵文字の一部は内部に他のトーンの名前を含むので誤検出を回避する
+ if (emoji.resName in arrayOf("emj_1f3fc", "emj_1f3fe")) continue
+
+ fun String.removeToneSuffix(): String {
+ var name = this
+ when (name) {
+ "kiss_light_skin_tone" -> return "couplekiss"
+ "kiss_medium_light_skin_tone" -> return "couplekiss"
+ "kiss_medium_skin_tone" -> return "couplekiss"
+ "kiss_medium_dark_skin_tone" -> return "couplekiss"
+ "kiss_dark_skin_tone" -> return "couplekiss"
+ }
+
+ suffixList.forEach { name = name.replace(it, "") }
+ return when (name) {
+ "couple_with_heart_person_person" -> "couple_with_heart"
+ "kiss_person_person" -> "couplekiss"
+ "kiss_woman_woman" -> "woman_kiss_woman"
+ "kiss_woman_man" -> "woman_kiss_man"
+ "kiss_man_man" -> "man_kiss_man"
+ else -> name
+ }
+ }
+
+ for (shortName in emoji.shortNames) {
+ val parent = nameMap[shortName.name.removeToneSuffix()]
+ if (parent == emoji) continue
+ if (parent == null) {
+ log.e("${emoji.resName} $shortName looks like tone variation,but can't find parent.")
+ hasError = true
+ continue
+ }
+ emoji.addToneParent(parent)
+ }
+ }
+
+ for (emoji in emojiMap.values) {
+ val parents = emoji.toneParents
+ if (parents.isEmpty()) continue
+ if (parents.size > 1) {
+ log.e("${emoji.resName} has many parents. ${parents.joinToString(",")}")
+ hasError = true
+ continue
+ }
+ parents.forEach { parent ->
+ val toneCode = emoji.key.getToneCode("makeToneMap")
+ ?: error("emoji $emoji has parent, but has no toneCode.")
+ when (val old = parent.toneChildren[toneCode]) {
+ null -> parent.toneChildren[toneCode] = emoji
+ emoji -> {
+ }
+ else -> error("conflict toneChildren. emoji ${parent.resName} has $old and $emoji.")
+ }
+ }
+ }
+
+ if (hasError) error("toneParent error.")
+ }
+
+ private fun writeData() {
+ val outFile = "emoji_map.txt"
+ UnixPrinter(File(outFile)).use { writer ->
+
+ // 絵文字をskipするか事前に調べる
+ for (emoji in emojiMap.values.sortedBy { it.key }) {
+
+ val codeSet = emoji.codeSet.sorted()
+ if (codeSet.isEmpty()) {
+ log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
+ emoji.skip = true
+ } else if (emoji.unified.list.isAsciiEmoji()) {
+ log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
+ emoji.skip = true
+ }
+ }
+
+ for (emoji in emojiMap.values.sortedBy { it.key }) {
+ if (emoji.skip) continue
+
+ // 画像リソースID
+ val strResName = emoji.resName
+ if (File("assets/$strResName.svg").isFile) {
+ writer.println("svg:$strResName.svg//${emoji.imageFiles.first().second}")
+ } else {
+ writer.println("drawable:$strResName//${emoji.imageFiles.first().second}")
+ }
+
+ // unified
+ writer.println("un:${emoji.unified.toRawString()}//${emoji.unified.from}")
+
+ // Unicodeシーケンス
+ val codeSet = emoji.codeSet.sorted()
+ for (code in codeSet) {
+ if (code == emoji.unified) continue
+ val raw = code.toRawString()
+ if (raw.isEmpty()) error("too short code! ${emoji.resName}")
+ writer.println("u:$raw//${code.from}")
+ }
+
+ // 画像リソースIDとshortcodeの関連付けを出力する
+ // 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する
+ val nameList = emoji.nameList.notEmpty()
+ ?: error("missing shortName. ${emoji.resName}")
+ nameList.forEachIndexed { index, triple ->
+ val (_, name, froms) = triple
+ val header = if (index == 0) "sn" else "s"
+ writer.println("${header}:$name//${froms.joinToString(",")}")
+ }
+ }
+
+ fun Category.printCategory(list: List) {
+ writer.println("cn:${this.name}")
+ for (emoji in list) {
+ writer.println("c:${emoji.unified.toRawString()}")
+ emoji.usedInCategory = this
+ }
+ }
+
+ categoryNames.forEach { category ->
+ category.printCategory(category.emojis.filter { !it.skip })
+ }
+
+ run {
+ val category = categoryNames.find { it.name == "Others" }!!
+ category.printCategory(
+ emojiMap.values
+ .filter { it.usedInCategory == null && it.toneParents.isEmpty() }
+ .sortedBy { it.shortNames.first() }
+ )
+ }
+
+ // スキントーン
+ emojiMap.values
+ .filter { it.toneChildren.isNotEmpty() }
+ .sortedBy { it.key }
+ .forEach { parent ->
+ if (parent.usedInCategory == null) {
+ log.e("parent ${parent.resName} not used in any category!")
+ }
+ parent.toneChildren.entries
+ .toList()
+ .sortedBy { it.key }
+ .forEach eachChild@{
+ val child = it.value
+ if (child.skip) return@eachChild
+ writer.println("t:${parent.unified.toRawString()},${it.key.toRawString()},${child.unified.toRawString()}")
+ }
+ }
+
+ // 複合トーン
+ run {
+ val category = categoryNames.find { it.name == "ComplexTones" }!!
+ category.printCategory(
+ emojiMap.values
+ .filter { it.toneChildren.isNotEmpty() }
+ .sortedBy { it.key }
+ .flatMap { parent ->
+ if (parent.usedInCategory == null) {
+ log.e("parent ${parent.resName} not used in any category!")
+ }
+ parent.toneChildren.entries
+ .toList()
+ .filter { it.key.list.size > 1 }
+ .sortedBy { it.key }
+ .map { it.value }
+ }
+ )
+ }
+ }
+
+ log.d("wrote $outFile")
+ }
+
+ suspend fun run() {
+
+ // 修正用データを読む
+ readFixData()
+
+ // emojipediaからバージョン別一覧とカテゴリ別一覧を読む
+ val emojipediaData = HttpClient {
+ install(HttpTimeout) {
+ val t = 30000L
+ requestTimeoutMillis = t
+ connectTimeoutMillis = t
+ socketTimeoutMillis = t
+ }
+ }.use { client ->
+ readEmojipedia(client)
+ }
+
+ // 画像をスキャンする前に絵文字のqualified codeを調べておく
+ readEmojipediaQualified(emojipediaData)
+
+ // サブフォルダから絵文字の画像を収集する
+ scanEmojiImages()
+ // 収集した画像をコピーする
+ copyImages()
+
+ addEmojipediaShortnames()
+
+ readVendorCode()
+ readEmojiData()
+ readEmojiOne()
+ readCategoryShortName(emojipediaData)
+
+ checkCodeConflict()
+ checkShortNameConflict()
+
+ fixToneParent()
+
+
+ fixCategory()
+
+ if (hasConflict) error("please fix conflicts.")
+
+ // shortcodeに含まれる文字の種類を列挙する
+ val nameChars = HashSet()
+ val nameMap = HashMap()
+ for (emoji in emojiMap.values) {
+ for (shortName in emoji.shortNames) {
+ nameMap[shortName] = emoji
+ for (c in shortName.name)
+ nameChars.add(c)
+ }
+ }
+ log.i("nameChars: [${nameChars.sorted().joinToString("")}]")
+
+ writeData()
+
+ log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}")
+ log.d("nameCameFroms: ${Emoji.nameCameFroms.joinToString(",")}")
+ }
}
fun main(args: Array) = runBlocking {
- log.d("args=${args.joinToString(",")}")
- App().run()
+ log.d("args=${args.joinToString(",")}")
+ App().run()
}
diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt
index 118029da..9a0f090b 100644
--- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt
+++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt
@@ -1,147 +1,141 @@
+@file:Suppress("unused")
+
package jp.juggler.subwaytooter.emoji
-import io.ktor.client.*
-import io.ktor.client.call.*
-import io.ktor.client.request.*
-import io.ktor.client.statement.*
-import io.ktor.http.*
+import io.ktor.client.HttpClient
+import io.ktor.client.request.get
+import io.ktor.client.request.header
+import io.ktor.client.statement.readBytes
+import io.ktor.http.HttpStatusCode
import org.jsoup.Jsoup
-import java.io.*
+import java.io.BufferedReader
+import java.io.File
+import java.io.FileInputStream
+import java.io.FileOutputStream
+import java.io.InputStreamReader
import java.nio.charset.Charset
import java.security.MessageDigest
import java.util.*
fun String.isTruth() = when {
- this == "" -> false
- this == "0" -> false
- this.startsWith("f", ignoreCase = true) -> false
- this.startsWith("t", ignoreCase = true) -> true
- this == "on" -> true
- else -> true
+ this == "" -> false
+ this == "0" -> false
+ this.startsWith("f", ignoreCase = true) -> false
+ this.startsWith("t", ignoreCase = true) -> true
+ this == "on" -> true
+ else -> true
}
// split CharSequence to Unicode codepoints
fun CharSequence.eachCodePoint(block: (Int) -> Unit) {
- val end = length
- var i = 0
- while (i < end) {
- val c1 = get(i++)
- if (Character.isHighSurrogate(c1) && i < length) {
- val c2 = get(i)
- if (Character.isLowSurrogate(c2)) {
- i++
- block(Character.toCodePoint(c1, c2))
- continue
- }
- }
- block(c1.toInt())
- }
+ val end = length
+ var i = 0
+ while (i < end) {
+ val c1 = get(i++)
+ if (Character.isHighSurrogate(c1) && i < end) {
+ val c2 = get(i)
+ if (Character.isLowSurrogate(c2)) {
+ ++i
+ block(Character.toCodePoint(c1, c2))
+ continue
+ }
+ }
+ block(c1.code)
+ }
}
+
// split CharSequence to Unicode codepoints
-fun CharSequence.listCodePoints() = ArrayList().also{ dst->
- val end = length
- var i = 0
- while (i < end) {
- val c1 = get(i++)
- if (Character.isHighSurrogate(c1) && i < length) {
- val c2 = get(i)
- if (Character.isLowSurrogate(c2)) {
- i++
- dst.add(Character.toCodePoint(c1, c2))
- continue
- }
- }
- dst.add(c1.toInt())
- }
+fun CharSequence.listCodePoints() = ArrayList().also { dst ->
+ eachCodePoint { dst.add(it) }
}.toIntArray()
// split codepoint to UTF-8 bytes
fun codePointToUtf8(cp: Int, block: (Int) -> Unit) {
- // incorrect codepoint
- if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.toInt(), block)
+ // incorrect codepoint
+ if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.code, block)
- if (cp >= 128) {
- if (cp >= 2048) {
- if (cp >= 65536) {
- block(0xF0.or(cp.shr(18)))
- block(0x80.or(cp.shr(12).and(0x3f)))
- } else {
- block(0xE0.or(cp.shr(12)))
- }
- block(0x80.or(cp.shr(6).and(0x3f)))
- } else {
- block(0xC0.or(cp.shr(6)))
- }
- block(0x80.or(cp.and(0x3f)))
- } else {
- block(cp)
- }
+ if (cp >= 128) {
+ if (cp >= 2048) {
+ if (cp >= 65536) {
+ block(0xF0.or(cp.shr(18)))
+ block(0x80.or(cp.shr(12).and(0x3f)))
+ } else {
+ block(0xE0.or(cp.shr(12)))
+ }
+ block(0x80.or(cp.shr(6).and(0x3f)))
+ } else {
+ block(0xC0.or(cp.shr(6)))
+ }
+ block(0x80.or(cp.and(0x3f)))
+ } else {
+ block(cp)
+ }
}
private const val hexString = "0123456789ABCDEF"
private val encodePercentSkipChars by lazy {
- HashSet().apply {
- ('0'..'9').forEach { add(it.toInt()) }
- ('A'..'Z').forEach { add(it.toInt()) }
- ('a'..'z').forEach { add(it.toInt()) }
- add('-'.toInt())
- add('_'.toInt())
- add('.'.toInt())
- }
+ HashSet().apply {
+ ('0'..'9').forEach { add(it.code) }
+ ('A'..'Z').forEach { add(it.code) }
+ ('a'..'z').forEach { add(it.code) }
+ add('-'.code)
+ add('_'.code)
+ add('.'.code)
+ }
}
fun String.encodePercent(): String =
- StringBuilder(length).also { sb ->
- eachCodePoint { cp ->
- if (encodePercentSkipChars.contains(cp)) {
- sb.append(cp.toChar())
- } else {
- codePointToUtf8(cp) { b ->
- sb.append('%')
- .append(hexString[b shr 4])
- .append(hexString[b and 15])
- }
- }
- }
- }.toString()
+ StringBuilder(length).also { sb ->
+ eachCodePoint { cp ->
+ if (encodePercentSkipChars.contains(cp)) {
+ sb.append(cp.toChar())
+ } else {
+ codePointToUtf8(cp) { b ->
+ sb.append('%')
+ .append(hexString[b shr 4])
+ .append(hexString[b and 15])
+ }
+ }
+ }
+ }.toString()
// same as x?.let{ dst.add(it) }
fun T.addTo(dst: ArrayList) = dst.add(this)
fun T.addTo(dst: HashSet) = dst.add(this)
fun > E?.notEmpty(): E? =
- if (this?.isNotEmpty() == true) this else null
+ if (this?.isNotEmpty() == true) this else null
fun > E?.notEmpty(): E? =
- if (this?.isNotEmpty() == true) this else null
+ if (this?.isNotEmpty() == true) this else null
fun T?.notEmpty(): T? =
- if (this?.isNotEmpty() == true) this else null
+ if (this?.isNotEmpty() == true) this else null
fun ByteArray.digestSha256() =
- MessageDigest.getInstance("SHA-256")?.let {
- it.update(this@digestSha256)
- it.digest()
- }!!
+ MessageDigest.getInstance("SHA-256")?.let {
+ it.update(this@digestSha256)
+ it.digest()
+ }!!
fun ByteArray.encodeBase64UrlSafe(): String {
- val bytes = Base64.getUrlEncoder().encode(this)
- return StringBuilder(bytes.size).apply {
- for (b in bytes) {
- val c = b.toChar()
- if (c != '=') append(c)
- }
- }.toString()
+ val bytes = Base64.getUrlEncoder().encode(this)
+ return StringBuilder(bytes.size).apply {
+ for (b in bytes) {
+ val c = b.toInt().toChar()
+ if (c != '=') append(c)
+ }
+ }.toString()
}
fun ByteArray.decodeUtf8() = toString(Charsets.UTF_8)
fun String.encodeUtf8() = toByteArray(Charsets.UTF_8)
-inline fun Any?.castOrThrow(name:String,block: T.() -> Unit){
- if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}")
- block()
+inline fun Any?.castOrThrow(name: String, block: T.() -> Unit) {
+ if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}")
+ block()
}
// 型推論できる文脈だと型名を書かずにすむ
@@ -155,87 +149,86 @@ fun > minComparable(a: T, b: T): T = if (a <= b) a else b
fun > maxComparable(a: T, b: T): T = if (a >= b) a else b
fun MutableCollection.removeFirst(check: (T) -> Boolean): T? {
- val it = iterator()
- while (it.hasNext()) {
- val item = it.next()
- if (check(item)) {
- it.remove()
- return item
- }
- }
- return null
+ val it = iterator()
+ while (it.hasNext()) {
+ val item = it.next()
+ if (check(item)) {
+ it.remove()
+ return item
+ }
+ }
+ return null
}
fun File.readAllBytes() =
- FileInputStream(this).use { it.readBytes() }
+ FileInputStream(this).use { it.readBytes() }
fun File.save(data: ByteArray) {
- val tmpFile = File("$absolutePath.tmp")
- FileOutputStream(tmpFile).use { it.write(data) }
- this.delete()
- if (!tmpFile.renameTo(this)) error("$this: rename failed.")
+ val tmpFile = File("$absolutePath.tmp")
+ FileOutputStream(tmpFile).use { it.write(data) }
+ this.delete()
+ if (!tmpFile.renameTo(this)) error("$this: rename failed.")
}
fun ByteArray.saveTo(file: File) = file.save(this)
-fun File.forEachLine(charset: Charset = Charsets.UTF_8, block:(Int, String)->Unit)=
- BufferedReader(InputStreamReader(FileInputStream(this),charset)).use { reader ->
- var lno = 0
- reader.forEachLine {
- block(++lno, it)
- }
- lno
- }
+fun File.forEachLine(charset: Charset = Charsets.UTF_8, block: (Int, String) -> Unit) =
+ BufferedReader(InputStreamReader(FileInputStream(this), charset)).use { reader ->
+ var lno = 0
+ reader.forEachLine {
+ block(++lno, it)
+ }
+ lno
+ }
-inline fun HashMap.prepare(key:K,creator:()->V):V{
- var value = get(key)
- if( value == null) {
- value = creator()
- put(key,value)
- }
- return value!!
+inline fun HashMap.prepare(key: K, creator: () -> V): V {
+ var value = get(key)
+ if (value == null) {
+ value = creator()
+ put(key, value)
+ }
+ return value!!
}
private val reFileNameBadChars = """[\\/:*?"<>|-]+""".toRegex()
-private val cacheDir by lazy{ File("./cache").apply { mkdirs() }}
+private val cacheDir by lazy { File("./cache").apply { mkdirs() } }
-fun clearCache(){
- cacheDir.list()?.forEach { name->
- File(cacheDir,name).takeIf { it.isFile }?.delete()
- }
+fun clearCache() {
+ cacheDir.list()?.forEach { name ->
+ File(cacheDir, name).takeIf { it.isFile }?.delete()
+ }
}
-private val cacheExpire by lazy{ 8 * 3600000L }
+private val cacheExpire by lazy { 8 * 3600000L }
suspend fun HttpClient.cachedGetBytes(url: String, headers: Map): ByteArray {
- val fName = reFileNameBadChars.replace(url, "-")
- val cacheFile = File(cacheDir, fName)
- if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) {
- println("GET(cached) $url")
- return cacheFile.readAllBytes()
- }
- println("GET $url")
+ val fName = reFileNameBadChars.replace(url, "-")
+ val cacheFile = File(cacheDir, fName)
+ if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) {
+ println("GET(cached) $url")
+ return cacheFile.readAllBytes()
+ }
+ println("GET $url")
- get(url) {
- headers.entries.forEach {
- header(it.key, it.value)
- }
- }.let { res ->
- return when (res.status) {
- HttpStatusCode.OK ->
- res.receive().also { it.saveTo(cacheFile) }
- else -> {
- cacheFile.delete()
- error("get failed. $url ${res.status}")
- }
- }
- }
+ get(url) {
+ headers.entries.forEach {
+ header(it.key, it.value)
+ }
+ }.let { res ->
+ return when (res.status) {
+ HttpStatusCode.OK ->
+ res.readBytes().also { it.saveTo(cacheFile) }
+ else -> {
+ cacheFile.delete()
+ error("get failed. $url ${res.status}")
+ }
+ }
+ }
}
suspend fun HttpClient.cachedGetString(url: String, headers: Map): String =
- cachedGetBytes(url,headers).decodeUtf8()
-
-fun String.parseHtml(baseUri: String) =
- Jsoup.parse(this, baseUri)
+ cachedGetBytes(url, headers).decodeUtf8()
+fun String.parseHtml(baseUri: String): org.jsoup.nodes.Document? =
+ Jsoup.parse(this, baseUri)
diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt
index 7b6475a7..2f6dfe87 100644
--- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt
+++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt
@@ -2,7 +2,6 @@ package jp.juggler.subwaytooter.emoji.model
import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
-import java.lang.StringBuilder
/*
絵文字はコードポイントのリストで表現される。
@@ -30,61 +29,61 @@ import java.lang.StringBuilder
// list of codepoints
class CodepointList(
- val from: String,
- val list: IntArray
+ val from: String,
+ val list: IntArray
) : Comparable {
- override fun equals(other: Any?): Boolean =
- list.contentEquals(other.cast()?.list)
+ override fun equals(other: Any?): Boolean =
+ list.contentEquals(other.cast()?.list)
- override fun hashCode(): Int {
- var code = 0
- for (v in list) code = code.shl(2).xor(v)
- return code
- }
+ override fun hashCode(): Int {
+ var code = 0
+ for (v in list) code = code.shl(2).xor(v)
+ return code
+ }
- override fun compareTo(other: CodepointList): Int {
- val la = this.list
- val lb = other.list
- var i = 0
- do {
- val a = la.elementAtOrNull(i)
- val b = lb.elementAtOrNull(i)
+ override fun compareTo(other: CodepointList): Int {
+ val la = this.list
+ val lb = other.list
+ var i = 0
+ do {
+ val a = la.elementAtOrNull(i)
+ val b = lb.elementAtOrNull(i)
- val r = if (a == null) {
- if (b == null) break
- -1
- } else if (b == null) {
- 1
- } else {
- a.compareTo(b)
- }
- if (r != 0) return r
- ++i
- } while (true)
- return 0
- }
+ val r = if (a == null) {
+ if (b == null) break
+ -1
+ } else if (b == null) {
+ 1
+ } else {
+ a.compareTo(b)
+ }
+ if (r != 0) return r
+ ++i
+ } while (true)
+ return 0
+ }
- // make string like as "uuuu-uuuu-uuuu-uuuu"
- // cp値の余分な0は除去される
- // 常に小文字である
- fun toHex() = StringBuilder(list.size * 5).also {
- list.forEachIndexed { i, v ->
- if (i > 0) it.append('-')
- it.append(String.format("%x", v).toLowerCase())
- }
- }.toString()
+ // make string like as "uuuu-uuuu-uuuu-uuuu"
+ // cp値の余分な0は除去される
+ // 常に小文字である
+ fun toHex() = StringBuilder(list.size * 5).also {
+ list.forEachIndexed { i, v ->
+ if (i > 0) it.append('-')
+ it.append("%x".format(v).lowercase())
+ }
+ }.toString()
- // make raw string
- fun toRawString() = StringBuilder(list.size + 10).also { sb ->
- for (cp in list) {
- sb.appendCodePoint(cp)
- }
- }.toString()
+ // make raw string
+ fun toRawString() = StringBuilder(list.size + 10).also { sb ->
+ for (cp in list) {
+ sb.appendCodePoint(cp)
+ }
+ }.toString()
- fun toResourceId() = "emj_${toHex().replace("-", "_")}"
+ fun toResourceId() = "emj_${toHex().replace("-", "_")}"
- override fun toString() = "${toHex()},$from"
+ override fun toString() = "${toHex()},$from"
// fun makeUtf16(): String {
// // java の文字列にする
@@ -111,28 +110,28 @@ class CodepointList(
// return sb.toString()
// }
- fun toKey(from: String) =
- list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d }
- .toIntArray().toCodepointList(from)
+ fun toKey(from: String) =
+ list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d }
+ .toIntArray().toCodepointList(from)
- fun getToneCode(from: String) :CodepointList? {
- val used = HashSet()
- return list
- .filter { skinToneModifiers.containsKey(it) }
- .mapNotNull {
- if (used.contains(it)) {
- null
- } else {
- used.add(it)
- it
- }
- }.toIntArray().toCodepointList(from)
- }
+ fun getToneCode(from: String): CodepointList? {
+ val used = HashSet()
+ return list
+ .filter { skinToneModifiers.containsKey(it) }
+ .mapNotNull {
+ if (used.contains(it)) {
+ null
+ } else {
+ used.add(it)
+ it
+ }
+ }.toIntArray().toCodepointList(from)
+ }
}
fun IntArray.isAsciiEmoji() =
- size == 1 && first() < 0xae
+ size == 1 && first() < 0xae
fun IntArray.toCodepointList(from: String) = if (isEmpty()) null else CodepointList(from, this)
@@ -140,8 +139,8 @@ private val reHex = """([0-9A-Fa-f]+)""".toRegex()
// cp-cp-cp-cp => CodepointList
fun String.toCodepointList(from: String) =
- reHex.findAll(this)
- .map { mr -> mr.groupValues[1].toInt(16) }
- .toList().notEmpty()
- ?.toIntArray()
- ?.toCodepointList(from)
+ reHex.findAll(this)
+ .map { mr -> mr.groupValues[1].toInt(16) }
+ .toList().notEmpty()
+ ?.toIntArray()
+ ?.toCodepointList(from)
diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt
index 9b244a47..f482b452 100644
--- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt
+++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt
@@ -4,18 +4,18 @@ import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
-class ShortName(val cameFrom:String,val name:String) :Comparable{
- override fun equals(other: Any?): Boolean =
- name == other.cast()?.name
+class ShortName(val cameFrom: String, val name: String) : Comparable {
+ override fun equals(other: Any?): Boolean =
+ name == other.cast()?.name
- override fun hashCode(): Int =
- name.hashCode()
+ override fun hashCode(): Int =
+ name.hashCode()
- override fun toString(): String =
- "SN($cameFrom)$name"
+ override fun toString(): String =
+ "SN($cameFrom)$name"
- override fun compareTo(other: ShortName): Int =
- name.compareTo(other.name)
+ override fun compareTo(other: ShortName): Int =
+ name.compareTo(other.name)
}
private val reColonHead = """\A:""".toRegex()
@@ -23,10 +23,10 @@ private val reColonTail = """:\z""".toRegex()
private val reNotCode = """[^\w\d+_]+""".toRegex()
private val reUnderTail = """_+\z""".toRegex()
-fun String.toShortName(cameFrom:String) =
- toLowerCase()
- .replace(reColonHead, "")
- .replace(reColonTail, "")
- .replace(reNotCode, "_")
- .replace(reUnderTail,"")
- .notEmpty()?.let{ ShortName(cameFrom=cameFrom,it) }
+fun String.toShortName(cameFrom: String) =
+ lowercase()
+ .replace(reColonHead, "")
+ .replace(reColonTail, "")
+ .replace(reNotCode, "_")
+ .replace(reUnderTail, "")
+ .notEmpty()?.let { ShortName(cameFrom = cameFrom, it) }