From f7564dcecfb0795d81ba91a22d7a74cf4ff3f6d7 Mon Sep 17 00:00:00 2001 From: tateisu Date: Thu, 14 Jul 2022 13:05:23 +0900 Subject: [PATCH] =?UTF-8?q?emojiConverter=E3=81=AE=E4=BE=9D=E5=AD=98?= =?UTF-8?q?=E9=96=A2=E4=BF=82=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _Emoji/emojiConverter/.idea/compiler.xml | 2 +- _Emoji/emojiConverter/.idea/misc.xml | 2 +- _Emoji/emojiConverter/build.gradle | 12 +- .../gradle/wrapper/gradle-wrapper.properties | 2 +- .../jp/juggler/subwaytooter/emoji/Json.kt | 4 +- .../jp/juggler/subwaytooter/emoji/Main.kt | 1922 +++++++++-------- .../jp/juggler/subwaytooter/emoji/Utils.kt | 307 ++- .../subwaytooter/emoji/model/CodepointList.kt | 137 +- .../subwaytooter/emoji/model/ShortName.kt | 32 +- 9 files changed, 1207 insertions(+), 1213 deletions(-) diff --git a/_Emoji/emojiConverter/.idea/compiler.xml b/_Emoji/emojiConverter/.idea/compiler.xml index 3e828077..245a82c8 100644 --- a/_Emoji/emojiConverter/.idea/compiler.xml +++ b/_Emoji/emojiConverter/.idea/compiler.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/_Emoji/emojiConverter/.idea/misc.xml b/_Emoji/emojiConverter/.idea/misc.xml index f07d57a4..4f9b6d51 100644 --- a/_Emoji/emojiConverter/.idea/misc.xml +++ b/_Emoji/emojiConverter/.idea/misc.xml @@ -1,7 +1,7 @@ - + \ No newline at end of file diff --git a/_Emoji/emojiConverter/build.gradle b/_Emoji/emojiConverter/build.gradle index 36cceb9f..16f7cc49 100644 --- a/_Emoji/emojiConverter/build.gradle +++ b/_Emoji/emojiConverter/build.gradle @@ -1,6 +1,6 @@ plugins { id 'java' - id 'org.jetbrains.kotlin.jvm' version '1.5.10' + id 'org.jetbrains.kotlin.jvm' version '1.7.10' } group 'jp.juggler' @@ -12,22 +12,22 @@ repositories { dependencies { implementation fileTree(include: ['*.jar'], dir: 'src/lib') - implementation "com.google.guava:guava:28.1-jre" + implementation "com.google.guava:guava:31.1-jre" implementation "org.jetbrains.kotlin:kotlin-stdlib" - testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0' + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' - def ktorVersion="1.5.0" + def ktorVersion="2.0.3" implementation "io.ktor:ktor-client-core:$ktorVersion" implementation "io.ktor:ktor-client-cio:$ktorVersion" - implementation "io.ktor:ktor-client-features:$ktorVersion" +// implementation "io.ktor:ktor-client-features:$ktorVersion" implementation "io.ktor:ktor-client-encoding:$ktorVersion" // StringEscapeUtils.unescapeHtml4 implementation "org.apache.commons:commons-text:1.9" // HTML5パーサ - implementation "org.jsoup:jsoup:1.13.1" + implementation "org.jsoup:jsoup:1.14.3" } test { diff --git a/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties b/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties index be52383e..aa991fce 100644 --- a/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties +++ b/_Emoji/emojiConverter/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt index 019f8b40..7b22aa16 100644 --- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt +++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Json.kt @@ -860,7 +860,7 @@ private fun Writer.writeQuote(string: String): Writer { in '\u0080' until '\u00a0', in '\u2000' until '\u2100' -> { write("\\u") - val hexCode: String = Integer.toHexString(c.toInt()) + val hexCode: String = Integer.toHexString(c.code) write("0000", 0, 4 - hexCode.length) write(hexCode) } @@ -1047,7 +1047,7 @@ fun Writer.writeJsonValue( } } - value is Char -> writeJsonValue(indentFactor, indent, value.toInt()) + value is Char -> writeJsonValue(indentFactor, indent, value.code) value is String -> writeQuote(value) value is Enum<*> -> writeQuote(value.name) diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt index 32ef06c2..3335eb1c 100644 --- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt +++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt @@ -1,7 +1,7 @@ package jp.juggler.subwaytooter.emoji import io.ktor.client.* -import io.ktor.client.features.* +import io.ktor.client.plugins.HttpTimeout import jp.juggler.subwaytooter.emoji.model.* import kotlinx.coroutines.runBlocking import org.apache.commons.text.StringEscapeUtils @@ -11,7 +11,6 @@ import java.io.FileInputStream import java.io.FileOutputStream import java.io.IOException - //pngフォルダにある画像ファイルを参照する //emoji-data/emoji.json を参照する // @@ -26,16 +25,16 @@ const val pathCwebp = "C:/cygwin64/bin/cwebp.exe" val emojiDataCodepointsVendors = arrayOf("docomo", "au", "softbank", "google") fun copyFile(dst: File, src: File) { - try { - FileInputStream(src).use { streamIn -> - FileOutputStream(dst).use { streamOut -> - streamOut.write(streamIn.readAllBytes()) - } - } - } catch (ex: Throwable) { - dst.delete() - throw IOException("copyFile failed. src=$src dst=$dst", ex) - } + try { + FileInputStream(src).use { streamIn -> + FileOutputStream(dst).use { streamOut -> + streamOut.write(streamIn.readBytes()) + } + } + } catch (ex: Throwable) { + dst.delete() + throw IOException("copyFile failed. src=$src dst=$dst", ex) + } } //fun svgToVectorDrawable(dst: File, src: File) { @@ -62,960 +61,963 @@ fun copyFile(dst: File, src: File) { class App { - companion object{ - - const val fromCategoryHtml = "CategoryHtml" - - - val reComment = """#.*""".toRegex() - - private val ignoreImagePath = arrayOf( - "LICENSE", - - // fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない - "noto-emoji/png/128/emoji_ufe82b.png", - "noto-emoji/svg/emoji_ufe82b.svg", - - // mastodonのフォルダにある余計なファイル - "mastodon/public/emoji/sheet_10.png", - "mastodon/public/emoji/sheet_13.png", - ) - - // emojipediaにあるデータのうち、次のショートネームを持つ絵文字は無視する - val ignoreShortName = arrayOf( - "flag_for_texas_ustx" - ) - - private val ignoreEmojiOneShortNames = setOf( - "man_in_tuxedo", - "man_in_tuxedo_tone1", "tuxedo_tone1", - "man_in_tuxedo_tone2", "tuxedo_tone2", - "man_in_tuxedo_tone3", "tuxedo_tone3", - "man_in_tuxedo_tone4", "tuxedo_tone4", - "man_in_tuxedo_tone5", "tuxedo_tone5", - ) - } - - // 修正用データ - private val fixCode = HashMap() - private val fixName = HashMap() - private val fixCategory = ArrayList>() - private val fixUnified = HashMap() - - // 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る - // マップのキーはvariation selectorとZWJが除去される - private val emojiMap = HashMap() - - private val emojipediaShortNames = HashMap() - - ///////////////////////////////////////////////// - - private fun readFixData() { - val fileFixData = "./fix_code.txt" - - File(fileFixData).forEachLine { lno, rawLine -> - val line = rawLine - .replace(reComment, "") - .trim() - - val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line) - if (mr != null) { - val type = mr.groupValues[1] - val arg1 = mr.groupValues[2] - - if (type == "unified") { - val code = arg1.toCodepointList("fixUnified")!! - val key = code.toKey("fixUnified")!! - fixUnified[key] = code - return@forEachLine - } - - val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList() - if (data.size != 1) return@forEachLine - when (type) { - "code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first() - "name" -> fixName[arg1.toShortName("fixName")!!] = data.first() - "category" -> Pair(arg1, data.first()).addTo(fixCategory) - else -> error("$fileFixData $lno : bad fix_data type=$type") - } - } - } - } + companion object { + + const val fromCategoryHtml = "CategoryHtml" + + + val reComment = """#.*""".toRegex() + + private val ignoreImagePath = arrayOf( + "LICENSE", + + // fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない + "noto-emoji/png/128/emoji_ufe82b.png", + "noto-emoji/svg/emoji_ufe82b.svg", + + // mastodonのフォルダにある余計なファイル + "mastodon/public/emoji/sheet_10.png", + "mastodon/public/emoji/sheet_13.png", + ) + + // emojipediaにあるデータのうち、次のショートネームを持つ絵文字は無視する + val ignoreShortName = arrayOf( + "flag_for_texas_ustx" + ) + + private val ignoreEmojiOneShortNames = setOf( + "man_in_tuxedo", + "man_in_tuxedo_tone1", "tuxedo_tone1", + "man_in_tuxedo_tone2", "tuxedo_tone2", + "man_in_tuxedo_tone3", "tuxedo_tone3", + "man_in_tuxedo_tone4", "tuxedo_tone4", + "man_in_tuxedo_tone5", "tuxedo_tone5", + ) + } + + // 修正用データ + private val fixCode = HashMap() + private val fixName = HashMap() + private val fixCategory = ArrayList>() + private val fixUnified = HashMap() + + // 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る + // マップのキーはvariation selectorとZWJが除去される + private val emojiMap = HashMap() + + private val emojipediaShortNames = HashMap() + + ///////////////////////////////////////////////// + + private fun readFixData() { + val fileFixData = "./fix_code.txt" + + File(fileFixData).forEachLine { lno, rawLine -> + val line = rawLine + .replace(reComment, "") + .trim() + + val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line) + if (mr != null) { + val type = mr.groupValues[1] + val arg1 = mr.groupValues[2] + + if (type == "unified") { + val code = arg1.toCodepointList("fixUnified")!! + val key = code.toKey("fixUnified")!! + fixUnified[key] = code + return@forEachLine + } + + val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList() + if (data.size != 1) return@forEachLine + when (type) { + "code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first() + "name" -> fixName[arg1.toShortName("fixName")!!] = data.first() + "category" -> Pair(arg1, data.first()).addTo(fixCategory) + else -> error("$fileFixData $lno : bad fix_data type=$type") + } + } + } + } - // Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく - // サイトにアクセスできなくなったら困るからな… - @Suppress("FunctionName") - private suspend fun readEmojipedia(client: HttpClient) :JsonObject { + // Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく + // サイトにアクセスできなくなったら困るからな… + @Suppress("FunctionName") + private suspend fun readEmojipedia(client: HttpClient): JsonObject { - val fileEmojipedia = File("Emojipedia.json") - if( fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject() + val fileEmojipedia = File("Emojipedia.json") + if (fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject() - val dstRoot = JsonObject() - val dstQualified = JsonArray().also{ dstRoot["qualifiedCode"] = it } + val dstRoot = JsonObject() + val dstQualified = JsonArray().also { dstRoot["qualifiedCode"] = it } - for (url in arrayOf( - "https://emojipedia.org/emoji-13.1/", - "https://emojipedia.org/emoji-13.0/", - "https://emojipedia.org/emoji-12.1/", - "https://emojipedia.org/emoji-12.0/", - "https://emojipedia.org/emoji-11.0/", - "https://emojipedia.org/emoji-5.0/", - "https://emojipedia.org/emoji-4.0/", - "https://emojipedia.org/emoji-3.0/", - "https://emojipedia.org/emoji-2.0/", - "https://emojipedia.org/emoji-1.0/", - )) { - val root = client.cachedGetString(url, mapOf()).parseHtml(url) - - root.getElementsByClass("sidebar").forEach { it.remove() } - root.getElementsByClass("categories").forEach { it.remove() } - - for (list in root.getElementsByTag("ul")) { - for (li in list.getElementsByTag("li")) { - - val href = li.getElementsByTag("a")?.attr("href") - .notEmpty() ?: continue - - val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text() - ?.notEmpty() ?: continue - - dstQualified.add(jsonArray(spanText, href)) - } - } - } - - val dstCategory = JsonObject().also{ dstRoot["categories"]=it} - categoryNames.forEach { category -> - if (category.url == null) return@forEach - - val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it } - - val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url) - val list = root.getElementsByClass("emoji-list").first() - for (li in list.getElementsByTag("li")) { - val href = li.getElementsByTag("a").attr("href") - .notEmpty() ?: continue + for (url in arrayOf( + "https://emojipedia.org/emoji-14.0/", + "https://emojipedia.org/emoji-13.1/", + "https://emojipedia.org/emoji-13.0/", + "https://emojipedia.org/emoji-12.1/", + "https://emojipedia.org/emoji-12.0/", + "https://emojipedia.org/emoji-11.0/", + "https://emojipedia.org/emoji-5.0/", + "https://emojipedia.org/emoji-4.0/", + "https://emojipedia.org/emoji-3.0/", + "https://emojipedia.org/emoji-2.0/", + "https://emojipedia.org/emoji-1.0/", + )) { + val root = client.cachedGetString(url, mapOf()).parseHtml(url) + ?: error("parseHtml returns null!") + + root.getElementsByClass("sidebar").forEach { it.remove() } + root.getElementsByClass("categories").forEach { it.remove() } + + for (list in root.getElementsByTag("ul")) { + for (li in list.getElementsByTag("li")) { + + val href = li.getElementsByTag("a").attr("href") + .notEmpty() ?: continue + + val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text() + ?.notEmpty() ?: continue + + dstQualified.add(jsonArray(spanText, href)) + } + } + } + + val dstCategory = JsonObject().also { dstRoot["categories"] = it } + categoryNames.forEach { category -> + if (category.url == null) return@forEach + + val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it } + + val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url) + ?: error("parseHtml returns null!") + val list = root.getElementsByClass("emoji-list").first() + ?: error("getElementsByClass(emoji-list) failed.") + for (li in list.getElementsByTag("li")) { + val href = li.getElementsByTag("a").attr("href") + .notEmpty() ?: continue + + val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text() + ?.notEmpty() ?: continue + + dstCategoryItems.add(jsonArray(spanText, href)) + } + } + + dstRoot.toString(2).encodeUtf8().saveTo(fileEmojipedia) + return dstRoot + } + + //////////////////////////////////////////////////////////////////////// + + // noto-emoji のファイル名はfe0fが欠けている + // あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく + private fun readEmojipediaQualified(root: JsonObject) { + + val ignoreName2 = setOf( + "zero_width_joiner", + "variation_selector_16", + ) + + val cameFrom = "emojiQualified" + + val hrefList = ArrayList>() + + var countError = 0 + + for (cols in root.jsonArray("qualifiedCode")!!.filterIsInstance()) { + val spanText = cols[0] as String + var href = cols[1] as String + + val code = spanText.listCodePoints().toCodepointList(cameFrom) + ?: error("can't get code from $spanText $href") + + + + if (hrefList.any { it.first == href }) + error("duplicate href: $href") + + hrefList.add(Pair(href, code)) + + // https://emojipedia.org/80030/ Couple With Heart: Light Skin Tone + // ページ名が名前じゃないのを直す + if (href == "80030") href = "couple-with-heart-light-skin-tone" + + val shortName = href.replace("/", "").toShortName(cameFrom) + ?: error("can't parse $href") + + if (ignoreName2.contains(shortName.name)) { + log.w("skip ${shortName.name}") + continue + } + + val key = code.toKey(cameFrom) + ?: error("can't get key from ${code.toHex()} ${shortName.name}") + + if (!fixUnified.containsKey(key)) { + if (code.list.size == 1 && code.list.first() < 256) { + ++countError + log.e("bad unified code: $code") + } else { + fixUnified[key] = code + } + } - val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text() - ?.notEmpty() ?: continue - - dstCategoryItems.add(jsonArray(spanText, href)) - } - } - - dstRoot.toString(2).encodeUtf8().saveTo(fileEmojipedia) - return dstRoot - } - - //////////////////////////////////////////////////////////////////////// - - // noto-emoji のファイル名はfe0fが欠けている - // あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく - private fun readEmojipediaQualified(root:JsonObject) { - - val ignoreName2 = setOf( - "zero_width_joiner", - "variation_selector_16", - ) - - val cameFrom = "emojiQualified" - - val hrefList = ArrayList>() - - var countError = 0 - - for( cols in root.jsonArray("qualifiedCode")!!.filterIsInstance()) { - val spanText = cols[0] as String - var href = cols[1] as String - - val code = spanText.listCodePoints().toCodepointList(cameFrom) - ?: error("can't get code from $spanText $href") - - - - if (hrefList.any { it.first == href }) - error("duplicate href: $href") - - hrefList.add(Pair(href, code)) - - // https://emojipedia.org/80030/ Couple With Heart: Light Skin Tone - // ページ名が名前じゃないのを直す - if (href == "80030") href = "couple-with-heart-light-skin-tone" - - val shortName = href.replace("/", "").toShortName(cameFrom) - ?: error("can't parse $href") - - if (ignoreName2.contains(shortName.name)) { - log.w("skip ${shortName.name}") - continue - } - - val key = code.toKey(cameFrom) - ?: error("can't get key from ${code.toHex()} ${shortName.name}") - - if (!fixUnified.containsKey(key)) { - if (code.list.size == 1 && code.list.first() < 256) { - ++countError - log.e("bad unified code: $code") - } else { - fixUnified[key] = code - } - } - - if (ignoreShortName.any { it == shortName.name }) { - log.w("skip shortname $shortName $code") - continue - } - - emojipediaShortNames[key] = shortName - } - - // hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") } - - if(countError>0) error("please fix unified codes. countError=$countError") - } - - private fun addEmojipediaShortnames() { - for ((key, shortName) in emojipediaShortNames) { - emojiMap[key]?.addShortName(shortName) - } - } - - // Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する - private fun readCategoryShortName(root:JsonObject) { - for(category in categoryNames){ - val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue - for( cols in list.filterIsInstance()){ - val spanText = cols[0] as String - val href = cols[1] as String - - val shortName = href.replace("/", "").toShortName(fromCategoryHtml) - ?: error("can't parse $href") - - if (ignoreShortName.any { shortName.name == it }) continue - - val code = spanText.listCodePoints().toCodepointList(fromCategoryHtml) - ?: error("can't parse code from $spanText") - - val key = code.toKey(fromCategoryHtml) - val emoji = emojiMap[key] - ?: error("can't find emoji. category=${category.name}, href=$href, spanText=$spanText") - - category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString()) - } - } - } - - // サブフォルダをスキャンして絵文字別に画像データを確定する - private fun scanImageDir( - cameFrom: String, - dirPath: String, - @Language("RegExp") codeSpec: String, - unifiedQualifier: (CodepointList) -> CodepointList = { it } - ) { - val dir = File(dirPath) - val reCodeSpec = codeSpec.toRegex() - var countFound = 0 - var countCreate = 0 - var countError = 0 - val files = dir.listFiles() ?:error("listFiles returns null. $dir") - for( imageFile in files){ - if (!imageFile.isFile) continue - val unixPath = imageFile.path.replace("\\", "/") - if (ignoreImagePath.any { unixPath.endsWith(it) }) continue - - val name = imageFile.name.replace("_border.", ".") - - val code = reCodeSpec.find(name) - ?.groupValues - ?.elementAtOrNull(1) - ?.toCodepointList(cameFrom) - ?: error("can't parse $name") - - ++countFound - - val key = code.toKey(cameFrom)!! - - var emoji = emojiMap[key] - if (emoji == null) { - val unified2 = fixUnified[key] ?: unifiedQualifier(code) - if( unified2.list.size==1 && unified2.list.first()<256){ - ++countError - log.e("bad unified code: $unified2 $unixPath") - } - emoji = Emoji(key, unified2) - - emojiMap[key] = emoji - ++countCreate - } - - emoji.imageFiles.add(Pair(imageFile, cameFrom)) - emoji.addCode(code) - } - - log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir") - if(countError>0) error("please fix unified codes. countError=$countError") - } - - // サブフォルダをスキャンして絵文字別に画像データを確定する - private fun scanEmojiImages() { - - scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""") - scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""") - scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""") - scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code -> - if (code.list.last() != 0xfe0f) - "${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!! - else - code - } - scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""") - scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""") - scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""") - scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""") - } - - // 絵文字ごとにファイルをコピーする - private fun copyImages() { - var countSvg = 0 - var countPng = 0 - for (emoji in emojiMap.values) { - val strResName = emoji.key.toResourceId() - emoji.resName = strResName - val (src, _) = emoji.imageFiles.first() - if (src.name.endsWith("svg")) { - ++countSvg - val dst = File("assets/$strResName.svg") - if (!dst.exists()) { - //svgToVectorDrawable(dst, src) - copyFile(dst, src) - } - } else { - ++countPng - val dst = File("drawable-nodpi/$strResName.webp") - if (!dst.exists()) { - val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path) - val rv = pb.start().waitFor() - if (rv != 0) error("cwebp failed. dst=$dst src=$src") - } - } - } - log.d("copyImage: countSvg=$countSvg, countPng=$countPng") - } - - // emojiDataのjsonを読んで変換コードポイントやショートネームを追加する - private fun readEmojiData() { - for( src in File("./emoji-data/emoji.json") - .readAllBytes() - .decodeUtf8() - .decodeJsonArray() - .objectList() - ){ - // 絵文字のコードポイント一覧 - var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!! - var key = unified.toKey("EmojiDataJsonUnifiedKey") - var emoji = emojiMap[key] ?: error("can't find emoji for $key") - - if (emoji.unified != unified) { - log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}") - emoji.addCode(unified) - } - - src.stringArrayList("variations") - ?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") } - ?.forEach { emoji.addCode(it) } - - for (k in emojiDataCodepointsVendors) { - src.string(k)?.toCodepointList("EmojiDataJson($k)") - ?.let { emoji.addCode(it) } - } - - // short_name のリスト - val shortNames = HashSet().also { dst -> - src.string("short_name")?.addTo(dst) - src.stringArrayList("short_names")?.forEach { - it.addTo(dst) - } - }.mapNotNull { it.toShortName("EmojiDataJson") } - - if (shortNames.isEmpty()) - error("emojiData ${src.string("unified")} has no shortName") - shortNames.forEach { emoji.addShortName(it) } - - val parentEmoji = emoji - - // スキントーン - src.jsonObject("skin_variations")?.let { skinVariations -> - val parentName = shortNames.first() - val skinToneUsed = HashSet() - for ((k, data) in skinVariations.entries ) { - if (data !is JsonObject) continue - - // 再帰呼び出しあり - fun handleCode(list: IntArray, idx: Int, parentSuffix: Array, suffixIndex: Int) { - val code = list.elementAtOrNull(idx) ?: return - val modifier = skinToneModifiers[code] - ?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName") - skinToneUsed.add(code) - val lastSuffix = modifier.suffixList[suffixIndex] - val suffix = - if (parentSuffix.contains(lastSuffix)) - parentSuffix - else - arrayOf(*parentSuffix, lastSuffix) - if (idx < list.size - 1) { - handleCode(list, idx + 1, suffix, suffixIndex) - } else { - unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!! - key = unified.toKey("EmojiData(skinTone)") - emoji = emojiMap[key] ?: error("can't find emoji for $key") - - emoji.addCode(unified) - shortNames - .mapNotNull { (it.name + suffix.joinToString("")).toShortName("EmojiData(skinTone)") } - .forEach { emoji.addShortName(it) } - - emoji.addToneParent(parentEmoji) - } - } - - val codeList = k.toCodepointList("toneSpec")!!.list - for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) { - handleCode(codeList, 0, emptyArray(), suffixIndex) - } - } - if (skinToneUsed.size != skinToneModifiers.size) { - log.w("skin tone code not fully used: $parentName") - } - } - } - } - - - - private fun readEmojiOne() { - val cameFrom = "EmojiOneJson" - val root = File("./old-emojione.json") - .readAllBytes() - .decodeUtf8() - .decodeJsonObject() - for ((strCode, item) in root.entries) { - if (item !is JsonObject) continue - - // コードを確認する - val code = strCode.toCodepointList(cameFrom) - ?: error("can't parse $strCode") - - val key = code.toKey(cameFrom) - val emoji = emojiMap[key] ?: error("missing emoji for $key") - - val names = ArrayList() - item.string("alpha code")?.let { names.add(it) } - item.string("aliases")?.split("|")?.let { names.addAll(it) } - names - .mapNotNull { it.toShortName(cameFrom) } - .filter { !ignoreEmojiOneShortNames.contains(it.name) } - .forEach { emoji.addShortName(it) } - } - } - - - - private fun fixCategory() { - val nameMap = HashMap().apply { - for (emoji in emojiMap.values) - for (shortName in emoji.shortNames) - this[shortName] = emoji - } - for ((name, strShortName) in fixCategory) { - val category = categoryNames.find { it.name == name } - ?: error("fixCategory: missing category for $name") - val shortName = strShortName.toShortName("fixCategory") - ?: error("fixCategory: can't parse $strShortName") - val emoji = nameMap[shortName] - ?: error("fixCategory: missing emoji for $strShortName") - - category.addEmoji(emoji, addingName = shortName.toString()) - log.d("fixCategory $category ${emoji.resName} $shortName") - } - } - - private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this) - - private val vendorText = HashMap>() - private val vendorUnicodeMap = HashMap>() - - private fun readVendorCode() { - var error = false - // まとまったxmlを読む - // 優先順位の都合でベンダ別に読み直す - val xml1 = File("emoji4unicode/data/emoji4unicode.xml") - .readAllBytes() - .decodeUtf8() - for (vendor in arrayOf("docomo", "kddi", "softbank")) { - """]+)""".toRegex().findAll(xml1).forEach { mr1 -> - val attrs = HashMap() - """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 -> - attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml() - } - val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach - val strFrom = attrs[vendor] ?: return@forEach - if (strFrom.indexOf(">") != -1) return@forEach - val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach - val text = "${attrs["name"]}/${attrs["text_fallback"]}" - vendorText.prepare(from) { ArrayList() }.add(text) - val old = vendorUnicodeMap[from] - if (old != null) { - if (old.second == "kddi" && vendor == "softbank") return@forEach - error = true - log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)") - } else { - vendorUnicodeMap[from] = Pair(unicode, vendor) - } - } - } - - for (vendor in arrayOf("docomo", "kddi", "softbank")) { - - // ベンダ個別ファイルから説明文を読む - val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml") - .readAllBytes() - .decodeUtf8() - - """]+)""".toRegex().findAll(xml).forEach { mr1 -> - val attrs = HashMap() - """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 -> - attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml() - } - - val code = attrs["unicode"]?.toCodepointList("emoji4unicode") - ?: return@forEach - - attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") } - } - } - - if (error) error("readVendorCode failed.") - } - - private var hasConflict = false - - // コード=>画像の重複を調べる - private fun checkCodeConflict() { - - val codeMap = HashMap>() - for (emoji in emojiMap.values) { - for (code in emoji.codes) { - codeMap.prepare(code) { HashSet() }.add(emoji) - } - } - - for ((code, emojis) in codeMap.entries.sortedBy { it.key }) { - if (emojis.size == 1) continue - - val fixResName = fixCode[code] - if (fixResName != null) { - var found = false - for (emoji in emojis) { - if (emoji.resName == fixResName) { - found = true - } else { - emoji.codes.forEach { - if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}") - } - emoji.removeCodeByCode(code) - } - } - if (!found) error("checkCodeConflict: missing emoji resName=$fixResName") - continue - } - - val onlyVendorCode = emojis.all { emoji -> - when (emoji.codes.find { it == code }?.from) { - "EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true - else -> false - } - } - - if (onlyVendorCode) { - val preferCode = vendorUnicodeMap[code]?.first - if (preferCode != null) { - val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } } - if (targetEmoji != null) { - emojis.forEach { emoji -> - if (emoji != targetEmoji) { - emoji.codes.forEach { - if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}") - } - emoji.removeCodeByCode(code) - } - } - continue - } - log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode") - } - } - - log.e("checkCodeConflict: code $code ${vendorText[code]} ${ - emojis.joinToString(" ") { - "${it.resName}/${it.unified.toRawString()}" - } - }") - hasConflict = true - } - - // コードのない絵文字のチェック - for (emoji in emojiMap.values) { - if (emoji.codes.isNotEmpty()) continue - val fixes = fixCode.entries.filter { it.value == emoji.resName } - when (fixes.size) { - 0 -> { - log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}") - hasConflict = true - } - 1 -> { - val fix = fixes.first() - val code = fix.key - emoji.addCode(code) - log.i("fixCode code=$code resName=${emoji.resName}") - } - else -> { - log.e("checkCodeConflict: multiple fix match for ${emoji.resName}") - hasConflict = true - } - } - } - } - - private fun checkShortNameConflict() { - val nameMap = HashMap>().apply { - for (emoji in emojiMap.values) { - for (name in emoji.shortNames) { - prepare(name) { HashSet() }.add(emoji) - } - } - } - - // cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない - for (emoji in emojiMap.values) { - if (emoji.shortNames.any { it.cameFrom != fromCategoryHtml }) { - emoji.removeShortNameByCameFrom(fromCategoryHtml) - } - } - - for ((name, emojis) in nameMap.entries.sortedBy { it.key }) { - // shortNameからemojiを1意に解決できるなら正常 - if (emojis.size == 1) continue - - // fixNameで解決する - val fixResName = fixName[name] - if (fixResName != null) { - var found = false - for (emoji in emojis) { - if (emoji.resName == fixResName) { - found = true - } else { - emoji.removeShortName(name.name) - } - } - if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName") - continue - } - - // emoji,cameFrom のペアのリスト - val froms = emojiMap.values - .flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } } - .filter { it.second == name } - .map { Pair(it.first, it.second.cameFrom) } - - // どこ由来のShortNameかで優先順位をつける - val preferFrom = froms.find { it.second == "EmojiDataJson" } - ?: froms.find { it.second == "EmojiSpec" } - ?: froms.find { it.second == "EmojiOneJson" } - ?: froms.find { it.second == fromCategoryHtml } - - if (preferFrom != null) { - // 優先順位の低いemojiからshortNameを除去する - var found = false - for (emoji in emojis) { - if (emoji == preferFrom.first) { - found = true - } else { - emoji.removeShortName(name.name) - } - } - if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}") - continue - } - - // 解決できなかった - log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}") - hasConflict = true - } - - // 名前のない絵文字のチェック - for (emoji in emojiMap.values) { - if (emoji.shortNames.isNotEmpty()) continue - val fix = fixName.entries.filter { it.value == emoji.resName } - if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}") - if (fix.size == 1) { - emoji.addShortName(fix.first().key) - continue - } - log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}") - hasConflict = true - } - } - - private fun fixToneParent() { - var hasError = false - val nameMap = HashMap() - for (emoji in emojiMap.values) { - for (shortName in emoji.shortNames) { - nameMap[shortName.name] = emoji - } - } - val suffixList = skinToneModifiers.values - .flatMap { it.suffixList.toList() } - .sortedByDescending { it.length } - - - for (emoji in emojiMap.values) { - // トーンの絵文字の一部は内部に他のトーンの名前を含むので誤検出を回避する - if (emoji.resName in arrayOf("emj_1f3fc", "emj_1f3fe")) continue - - fun String.removeToneSuffix(): String { - var name = this - when (name) { - "kiss_light_skin_tone" -> return "couplekiss" - "kiss_medium_light_skin_tone" -> return "couplekiss" - "kiss_medium_skin_tone" -> return "couplekiss" - "kiss_medium_dark_skin_tone" -> return "couplekiss" - "kiss_dark_skin_tone" -> return "couplekiss" - } - - suffixList.forEach { name = name.replace(it, "") } - return when (name) { - "couple_with_heart_person_person" -> "couple_with_heart" - "kiss_person_person" -> "couplekiss" - "kiss_woman_woman" -> "woman_kiss_woman" - "kiss_woman_man" -> "woman_kiss_man" - "kiss_man_man" -> "man_kiss_man" - else -> name - } - } - - for (shortName in emoji.shortNames) { - val parent = nameMap[shortName.name.removeToneSuffix()] - if (parent == emoji) continue - if (parent == null) { - log.e("${emoji.resName} $shortName looks like tone variation,but can't find parent.") - hasError = true - continue - } - emoji.addToneParent(parent) - } - } - - for (emoji in emojiMap.values) { - val parents = emoji.toneParents - if (parents.isEmpty()) continue - if (parents.size > 1) { - log.e("${emoji.resName} has many parents. ${parents.joinToString(",")}") - hasError = true - continue - } - parents.forEach { parent -> - val toneCode = emoji.key.getToneCode("makeToneMap") - ?: error("emoji $emoji has parent, but has no toneCode.") - when (val old = parent.toneChildren[toneCode]) { - null -> parent.toneChildren[toneCode] = emoji - emoji -> { - } - else -> error("conflict toneChildren. emoji ${parent.resName} has $old and $emoji.") - } - } - } - - if (hasError) error("toneParent error.") - } - - private fun writeData(){ - val outFile = "emoji_map.txt" - UnixPrinter(File(outFile)).use { writer -> - - // 絵文字をskipするか事前に調べる - for (emoji in emojiMap.values.sortedBy { it.key }) { - - val codeSet = emoji.codeSet.sorted() - if (codeSet.isEmpty()) { - log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes") - emoji.skip = true - } else if (emoji.unified.list.isAsciiEmoji()) { - log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes") - emoji.skip = true - } - } - - for (emoji in emojiMap.values.sortedBy { it.key }) { - if (emoji.skip) continue - - // 画像リソースID - val strResName = emoji.resName - if (File("assets/$strResName.svg").isFile) { - writer.println("svg:$strResName.svg//${emoji.imageFiles.first().second}") - } else { - writer.println("drawable:$strResName//${emoji.imageFiles.first().second}") - } - - // unified - writer.println("un:${emoji.unified.toRawString()}//${emoji.unified.from}") - - // Unicodeシーケンス - val codeSet = emoji.codeSet.sorted() - for (code in codeSet) { - if (code == emoji.unified) continue - val raw = code.toRawString() - if (raw.isEmpty()) error("too short code! ${emoji.resName}") - writer.println("u:$raw//${code.from}") - } - - // 画像リソースIDとshortcodeの関連付けを出力する - // 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する - val nameList = emoji.nameList.notEmpty() - ?: error("missing shortName. ${emoji.resName}") - nameList.forEachIndexed { index, triple -> - val (_, name, froms) = triple - val header = if (index == 0) "sn" else "s" - writer.println("${header}:$name//${froms.joinToString(",")}") - } - } - - fun Category.printCategory(list:List){ - writer.println("cn:${this.name}") - for(emoji in list){ - writer.println("c:${emoji.unified.toRawString()}") - emoji.usedInCategory = this - } - } - - categoryNames.forEach { category -> - category.printCategory(category.emojis.filter { !it.skip }) - } - - run{ - val category = categoryNames.find{ it.name == "Others"}!! - category.printCategory( - emojiMap.values - .filter { it.usedInCategory == null && it.toneParents.isEmpty() } - .sortedBy { it.shortNames.first() } - ) - } - - // スキントーン - emojiMap.values - .filter { it.toneChildren.isNotEmpty() } - .sortedBy { it.key } - .forEach { parent -> - if( parent.usedInCategory==null){ - log.e("parent ${parent.resName} not used in any category!") - } - parent.toneChildren.entries - .toList() - .sortedBy { it.key } - .forEach eachChild@{ - val child = it.value - if (child.skip) return@eachChild - writer.println("t:${parent.unified.toRawString()},${it.key.toRawString()},${child.unified.toRawString()}") - } - } - - // 複合トーン - run{ - val category = categoryNames.find { it.name == "ComplexTones" }!! - category.printCategory( - emojiMap.values - .filter { it.toneChildren.isNotEmpty() } - .sortedBy { it.key } - .flatMap { parent -> - if( parent.usedInCategory==null){ - log.e("parent ${parent.resName} not used in any category!") - } - parent.toneChildren.entries - .toList() - .filter { it.key.list.size > 1 } - .sortedBy { it.key } - .map{ it.value} - } - ) - } - } - - log.d("wrote $outFile") - } - - suspend fun run() { - - // 修正用データを読む - readFixData() - - // emojipediaからバージョン別一覧とカテゴリ別一覧を読む - val emojipediaData = HttpClient { - install(HttpTimeout) { - val t = 30000L - requestTimeoutMillis = t - connectTimeoutMillis = t - socketTimeoutMillis = t - } - }.use { client -> - readEmojipedia(client) - } - - // 画像をスキャンする前に絵文字のqualified codeを調べておく - readEmojipediaQualified(emojipediaData) - - // サブフォルダから絵文字の画像を収集する - scanEmojiImages() - // 収集した画像をコピーする - copyImages() - - addEmojipediaShortnames() - - readVendorCode() - readEmojiData() - readEmojiOne() - readCategoryShortName(emojipediaData) - - checkCodeConflict() - checkShortNameConflict() - - fixToneParent() - - - fixCategory() - - if (hasConflict) error("please fix conflicts.") - - // shortcodeに含まれる文字の種類を列挙する - val nameChars = HashSet() - val nameMap = HashMap() - for (emoji in emojiMap.values) { - for (shortName in emoji.shortNames) { - nameMap[shortName] = emoji - for (c in shortName.name) - nameChars.add(c) - } - } - log.i("nameChars: [${nameChars.sorted().joinToString("")}]") - - writeData() - - log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}") - log.d("nameCameFroms: ${Emoji.nameCameFroms.joinToString(",")}") - } + if (ignoreShortName.any { it == shortName.name }) { + log.w("skip shortname $shortName $code") + continue + } + + emojipediaShortNames[key] = shortName + } + + // hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") } + + if (countError > 0) error("please fix unified codes. countError=$countError") + } + + private fun addEmojipediaShortnames() { + for ((key, shortName) in emojipediaShortNames) { + emojiMap[key]?.addShortName(shortName) + } + } + + // Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する + private fun readCategoryShortName(root: JsonObject) { + for (category in categoryNames) { + val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue + for (cols in list.filterIsInstance()) { + val spanText = cols[0] as String + val href = cols[1] as String + + val shortName = href.replace("/", "").toShortName(fromCategoryHtml) + ?: error("can't parse $href") + + if (ignoreShortName.any { shortName.name == it }) continue + + val code = spanText.listCodePoints().toCodepointList(fromCategoryHtml) + ?: error("can't parse code from $spanText") + + val key = code.toKey(fromCategoryHtml) + val emoji = emojiMap[key] + ?: error("can't find emoji. category=${category.name}, href=$href, spanText=$spanText") + + category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString()) + } + } + } + + // サブフォルダをスキャンして絵文字別に画像データを確定する + private fun scanImageDir( + cameFrom: String, + dirPath: String, + @Language("RegExp") codeSpec: String, + unifiedQualifier: (CodepointList) -> CodepointList = { it } + ) { + val dir = File(dirPath) + val reCodeSpec = codeSpec.toRegex() + var countFound = 0 + var countCreate = 0 + var countError = 0 + val files = dir.listFiles() ?: error("listFiles returns null. $dir") + for (imageFile in files) { + if (!imageFile.isFile) continue + val unixPath = imageFile.path.replace("\\", "/") + if (ignoreImagePath.any { unixPath.endsWith(it) }) continue + + val name = imageFile.name.replace("_border.", ".") + + val code = reCodeSpec.find(name) + ?.groupValues + ?.elementAtOrNull(1) + ?.toCodepointList(cameFrom) + ?: error("can't parse $name") + + ++countFound + + val key = code.toKey(cameFrom)!! + + var emoji = emojiMap[key] + if (emoji == null) { + val unified2 = fixUnified[key] ?: unifiedQualifier(code) + if (unified2.list.size == 1 && unified2.list.first() < 256) { + ++countError + log.e("bad unified code: $unified2 $unixPath") + } + emoji = Emoji(key, unified2) + + emojiMap[key] = emoji + ++countCreate + } + + emoji.imageFiles.add(Pair(imageFile, cameFrom)) + emoji.addCode(code) + } + + log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir") + if (countError > 0) error("please fix unified codes. countError=$countError") + } + + // サブフォルダをスキャンして絵文字別に画像データを確定する + @Suppress("RegExpSimplifiable") + private fun scanEmojiImages() { + + scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""") + scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""") + scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""") + scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code -> + if (code.list.last() != 0xfe0f) + "${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!! + else + code + } + scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""") + scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""") + scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""") + scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""") + } + + // 絵文字ごとにファイルをコピーする + private fun copyImages() { + var countSvg = 0 + var countPng = 0 + for (emoji in emojiMap.values) { + val strResName = emoji.key.toResourceId() + emoji.resName = strResName + val (src, _) = emoji.imageFiles.first() + if (src.name.endsWith("svg")) { + ++countSvg + val dst = File("assets/$strResName.svg") + if (!dst.exists()) { + //svgToVectorDrawable(dst, src) + copyFile(dst, src) + } + } else { + ++countPng + val dst = File("drawable-nodpi/$strResName.webp") + if (!dst.exists()) { + val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path) + val rv = pb.start().waitFor() + if (rv != 0) error("cwebp failed. dst=$dst src=$src") + } + } + } + log.d("copyImage: countSvg=$countSvg, countPng=$countPng") + } + + // emojiDataのjsonを読んで変換コードポイントやショートネームを追加する + private fun readEmojiData() { + for (src in File("./emoji-data/emoji.json") + .readAllBytes() + .decodeUtf8() + .decodeJsonArray() + .objectList() + ) { + // 絵文字のコードポイント一覧 + var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!! + var key = unified.toKey("EmojiDataJsonUnifiedKey") + var emoji = emojiMap[key] ?: error("can't find emoji for $key") + + if (emoji.unified != unified) { + log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}") + emoji.addCode(unified) + } + + src.stringArrayList("variations") + ?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") } + ?.forEach { emoji.addCode(it) } + + for (k in emojiDataCodepointsVendors) { + src.string(k)?.toCodepointList("EmojiDataJson($k)") + ?.let { emoji.addCode(it) } + } + + // short_name のリスト + val shortNames = HashSet().also { dst -> + src.string("short_name")?.addTo(dst) + src.stringArrayList("short_names")?.forEach { + it.addTo(dst) + } + }.mapNotNull { it.toShortName("EmojiDataJson") } + + if (shortNames.isEmpty()) + error("emojiData ${src.string("unified")} has no shortName") + shortNames.forEach { emoji.addShortName(it) } + + val parentEmoji = emoji + + // スキントーン + src.jsonObject("skin_variations")?.let { skinVariations -> + val parentName = shortNames.first() + val skinToneUsed = HashSet() + for ((k, data) in skinVariations.entries) { + if (data !is JsonObject) continue + + // 再帰呼び出しあり + fun handleCode(list: IntArray, idx: Int, parentSuffix: Array, suffixIndex: Int) { + val code = list.elementAtOrNull(idx) ?: return + val modifier = skinToneModifiers[code] + ?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName") + skinToneUsed.add(code) + val lastSuffix = modifier.suffixList[suffixIndex] + val suffix = + if (parentSuffix.contains(lastSuffix)) + parentSuffix + else + arrayOf(*parentSuffix, lastSuffix) + if (idx < list.size - 1) { + handleCode(list, idx + 1, suffix, suffixIndex) + } else { + unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!! + key = unified.toKey("EmojiData(skinTone)") + emoji = emojiMap[key] ?: error("can't find emoji for $key") + + emoji.addCode(unified) + shortNames + .mapNotNull { (it.name + suffix.joinToString("")).toShortName("EmojiData(skinTone)") } + .forEach { emoji.addShortName(it) } + + emoji.addToneParent(parentEmoji) + } + } + + val codeList = k.toCodepointList("toneSpec")!!.list + for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) { + handleCode(codeList, 0, emptyArray(), suffixIndex) + } + } + if (skinToneUsed.size != skinToneModifiers.size) { + log.w("skin tone code not fully used: $parentName") + } + } + } + } + + + private fun readEmojiOne() { + val cameFrom = "EmojiOneJson" + val root = File("./old-emojione.json") + .readAllBytes() + .decodeUtf8() + .decodeJsonObject() + for ((strCode, item) in root.entries) { + if (item !is JsonObject) continue + + // コードを確認する + val code = strCode.toCodepointList(cameFrom) + ?: error("can't parse $strCode") + + val key = code.toKey(cameFrom) + val emoji = emojiMap[key] ?: error("missing emoji for $key") + + val names = ArrayList() + item.string("alpha code")?.let { names.add(it) } + item.string("aliases")?.split("|")?.let { names.addAll(it) } + names + .mapNotNull { it.toShortName(cameFrom) } + .filter { !ignoreEmojiOneShortNames.contains(it.name) } + .forEach { emoji.addShortName(it) } + } + } + + + private fun fixCategory() { + val nameMap = HashMap().apply { + for (emoji in emojiMap.values) + for (shortName in emoji.shortNames) + this[shortName] = emoji + } + for ((name, strShortName) in fixCategory) { + val category = categoryNames.find { it.name == name } + ?: error("fixCategory: missing category for $name") + val shortName = strShortName.toShortName("fixCategory") + ?: error("fixCategory: can't parse $strShortName") + val emoji = nameMap[shortName] + ?: error("fixCategory: missing emoji for $strShortName") + + category.addEmoji(emoji, addingName = shortName.toString()) + log.d("fixCategory $category ${emoji.resName} $shortName") + } + } + + private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this) + + private val vendorText = HashMap>() + private val vendorUnicodeMap = HashMap>() + + private fun readVendorCode() { + var error = false + // まとまったxmlを読む + // 優先順位の都合でベンダ別に読み直す + val xml1 = File("emoji4unicode/data/emoji4unicode.xml") + .readAllBytes() + .decodeUtf8() + for (vendor in arrayOf("docomo", "kddi", "softbank")) { + """]+)""".toRegex().findAll(xml1).forEach { mr1 -> + val attrs = HashMap() + """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 -> + attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml() + } + val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach + val strFrom = attrs[vendor] ?: return@forEach + if (strFrom.indexOf(">") != -1) return@forEach + val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach + val text = "${attrs["name"]}/${attrs["text_fallback"]}" + vendorText.prepare(from) { ArrayList() }.add(text) + val old = vendorUnicodeMap[from] + if (old != null) { + if (old.second == "kddi" && vendor == "softbank") return@forEach + error = true + log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)") + } else { + vendorUnicodeMap[from] = Pair(unicode, vendor) + } + } + } + + for (vendor in arrayOf("docomo", "kddi", "softbank")) { + + // ベンダ個別ファイルから説明文を読む + val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml") + .readAllBytes() + .decodeUtf8() + + """]+)""".toRegex().findAll(xml).forEach { mr1 -> + val attrs = HashMap() + """(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 -> + attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml() + } + + val code = attrs["unicode"]?.toCodepointList("emoji4unicode") + ?: return@forEach + + attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") } + } + } + + if (error) error("readVendorCode failed.") + } + + private var hasConflict = false + + // コード=>画像の重複を調べる + private fun checkCodeConflict() { + + val codeMap = HashMap>() + for (emoji in emojiMap.values) { + for (code in emoji.codes) { + codeMap.prepare(code) { HashSet() }.add(emoji) + } + } + + for ((code, emojis) in codeMap.entries.sortedBy { it.key }) { + if (emojis.size == 1) continue + + val fixResName = fixCode[code] + if (fixResName != null) { + var found = false + for (emoji in emojis) { + if (emoji.resName == fixResName) { + found = true + } else { + emoji.codes.forEach { + if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}") + } + emoji.removeCodeByCode(code) + } + } + if (!found) error("checkCodeConflict: missing emoji resName=$fixResName") + continue + } + + val onlyVendorCode = emojis.all { emoji -> + when (emoji.codes.find { it == code }?.from) { + "EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true + else -> false + } + } + + if (onlyVendorCode) { + val preferCode = vendorUnicodeMap[code]?.first + if (preferCode != null) { + val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } } + if (targetEmoji != null) { + emojis.forEach { emoji -> + if (emoji != targetEmoji) { + emoji.codes.forEach { + if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}") + } + emoji.removeCodeByCode(code) + } + } + continue + } + log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode") + } + } + + log.e("checkCodeConflict: code $code ${vendorText[code]} ${ + emojis.joinToString(" ") { + "${it.resName}/${it.unified.toRawString()}" + } + }") + hasConflict = true + } + + // コードのない絵文字のチェック + for (emoji in emojiMap.values) { + if (emoji.codes.isNotEmpty()) continue + val fixes = fixCode.entries.filter { it.value == emoji.resName } + when (fixes.size) { + 0 -> { + log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}") + hasConflict = true + } + 1 -> { + val fix = fixes.first() + val code = fix.key + emoji.addCode(code) + log.i("fixCode code=$code resName=${emoji.resName}") + } + else -> { + log.e("checkCodeConflict: multiple fix match for ${emoji.resName}") + hasConflict = true + } + } + } + } + + private fun checkShortNameConflict() { + val nameMap = HashMap>().apply { + for (emoji in emojiMap.values) { + for (name in emoji.shortNames) { + prepare(name) { HashSet() }.add(emoji) + } + } + } + + // cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない + for (emoji in emojiMap.values) { + if (emoji.shortNames.any { it.cameFrom != fromCategoryHtml }) { + emoji.removeShortNameByCameFrom(fromCategoryHtml) + } + } + + for ((name, emojis) in nameMap.entries.sortedBy { it.key }) { + // shortNameからemojiを1意に解決できるなら正常 + if (emojis.size == 1) continue + + // fixNameで解決する + val fixResName = fixName[name] + if (fixResName != null) { + var found = false + for (emoji in emojis) { + if (emoji.resName == fixResName) { + found = true + } else { + emoji.removeShortName(name.name) + } + } + if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName") + continue + } + + // emoji,cameFrom のペアのリスト + val froms = emojiMap.values + .flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } } + .filter { it.second == name } + .map { Pair(it.first, it.second.cameFrom) } + + // どこ由来のShortNameかで優先順位をつける + val preferFrom = froms.find { it.second == "EmojiDataJson" } + ?: froms.find { it.second == "EmojiSpec" } + ?: froms.find { it.second == "EmojiOneJson" } + ?: froms.find { it.second == fromCategoryHtml } + + if (preferFrom != null) { + // 優先順位の低いemojiからshortNameを除去する + var found = false + for (emoji in emojis) { + if (emoji == preferFrom.first) { + found = true + } else { + emoji.removeShortName(name.name) + } + } + if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}") + continue + } + + // 解決できなかった + log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}") + hasConflict = true + } + + // 名前のない絵文字のチェック + for (emoji in emojiMap.values) { + if (emoji.shortNames.isNotEmpty()) continue + val fix = fixName.entries.filter { it.value == emoji.resName } + if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}") + if (fix.size == 1) { + emoji.addShortName(fix.first().key) + continue + } + log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}") + hasConflict = true + } + } + + private fun fixToneParent() { + var hasError = false + val nameMap = HashMap() + for (emoji in emojiMap.values) { + for (shortName in emoji.shortNames) { + nameMap[shortName.name] = emoji + } + } + val suffixList = skinToneModifiers.values + .flatMap { it.suffixList.toList() } + .sortedByDescending { it.length } + + + for (emoji in emojiMap.values) { + // トーンの絵文字の一部は内部に他のトーンの名前を含むので誤検出を回避する + if (emoji.resName in arrayOf("emj_1f3fc", "emj_1f3fe")) continue + + fun String.removeToneSuffix(): String { + var name = this + when (name) { + "kiss_light_skin_tone" -> return "couplekiss" + "kiss_medium_light_skin_tone" -> return "couplekiss" + "kiss_medium_skin_tone" -> return "couplekiss" + "kiss_medium_dark_skin_tone" -> return "couplekiss" + "kiss_dark_skin_tone" -> return "couplekiss" + } + + suffixList.forEach { name = name.replace(it, "") } + return when (name) { + "couple_with_heart_person_person" -> "couple_with_heart" + "kiss_person_person" -> "couplekiss" + "kiss_woman_woman" -> "woman_kiss_woman" + "kiss_woman_man" -> "woman_kiss_man" + "kiss_man_man" -> "man_kiss_man" + else -> name + } + } + + for (shortName in emoji.shortNames) { + val parent = nameMap[shortName.name.removeToneSuffix()] + if (parent == emoji) continue + if (parent == null) { + log.e("${emoji.resName} $shortName looks like tone variation,but can't find parent.") + hasError = true + continue + } + emoji.addToneParent(parent) + } + } + + for (emoji in emojiMap.values) { + val parents = emoji.toneParents + if (parents.isEmpty()) continue + if (parents.size > 1) { + log.e("${emoji.resName} has many parents. ${parents.joinToString(",")}") + hasError = true + continue + } + parents.forEach { parent -> + val toneCode = emoji.key.getToneCode("makeToneMap") + ?: error("emoji $emoji has parent, but has no toneCode.") + when (val old = parent.toneChildren[toneCode]) { + null -> parent.toneChildren[toneCode] = emoji + emoji -> { + } + else -> error("conflict toneChildren. emoji ${parent.resName} has $old and $emoji.") + } + } + } + + if (hasError) error("toneParent error.") + } + + private fun writeData() { + val outFile = "emoji_map.txt" + UnixPrinter(File(outFile)).use { writer -> + + // 絵文字をskipするか事前に調べる + for (emoji in emojiMap.values.sortedBy { it.key }) { + + val codeSet = emoji.codeSet.sorted() + if (codeSet.isEmpty()) { + log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes") + emoji.skip = true + } else if (emoji.unified.list.isAsciiEmoji()) { + log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes") + emoji.skip = true + } + } + + for (emoji in emojiMap.values.sortedBy { it.key }) { + if (emoji.skip) continue + + // 画像リソースID + val strResName = emoji.resName + if (File("assets/$strResName.svg").isFile) { + writer.println("svg:$strResName.svg//${emoji.imageFiles.first().second}") + } else { + writer.println("drawable:$strResName//${emoji.imageFiles.first().second}") + } + + // unified + writer.println("un:${emoji.unified.toRawString()}//${emoji.unified.from}") + + // Unicodeシーケンス + val codeSet = emoji.codeSet.sorted() + for (code in codeSet) { + if (code == emoji.unified) continue + val raw = code.toRawString() + if (raw.isEmpty()) error("too short code! ${emoji.resName}") + writer.println("u:$raw//${code.from}") + } + + // 画像リソースIDとshortcodeの関連付けを出力する + // 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する + val nameList = emoji.nameList.notEmpty() + ?: error("missing shortName. ${emoji.resName}") + nameList.forEachIndexed { index, triple -> + val (_, name, froms) = triple + val header = if (index == 0) "sn" else "s" + writer.println("${header}:$name//${froms.joinToString(",")}") + } + } + + fun Category.printCategory(list: List) { + writer.println("cn:${this.name}") + for (emoji in list) { + writer.println("c:${emoji.unified.toRawString()}") + emoji.usedInCategory = this + } + } + + categoryNames.forEach { category -> + category.printCategory(category.emojis.filter { !it.skip }) + } + + run { + val category = categoryNames.find { it.name == "Others" }!! + category.printCategory( + emojiMap.values + .filter { it.usedInCategory == null && it.toneParents.isEmpty() } + .sortedBy { it.shortNames.first() } + ) + } + + // スキントーン + emojiMap.values + .filter { it.toneChildren.isNotEmpty() } + .sortedBy { it.key } + .forEach { parent -> + if (parent.usedInCategory == null) { + log.e("parent ${parent.resName} not used in any category!") + } + parent.toneChildren.entries + .toList() + .sortedBy { it.key } + .forEach eachChild@{ + val child = it.value + if (child.skip) return@eachChild + writer.println("t:${parent.unified.toRawString()},${it.key.toRawString()},${child.unified.toRawString()}") + } + } + + // 複合トーン + run { + val category = categoryNames.find { it.name == "ComplexTones" }!! + category.printCategory( + emojiMap.values + .filter { it.toneChildren.isNotEmpty() } + .sortedBy { it.key } + .flatMap { parent -> + if (parent.usedInCategory == null) { + log.e("parent ${parent.resName} not used in any category!") + } + parent.toneChildren.entries + .toList() + .filter { it.key.list.size > 1 } + .sortedBy { it.key } + .map { it.value } + } + ) + } + } + + log.d("wrote $outFile") + } + + suspend fun run() { + + // 修正用データを読む + readFixData() + + // emojipediaからバージョン別一覧とカテゴリ別一覧を読む + val emojipediaData = HttpClient { + install(HttpTimeout) { + val t = 30000L + requestTimeoutMillis = t + connectTimeoutMillis = t + socketTimeoutMillis = t + } + }.use { client -> + readEmojipedia(client) + } + + // 画像をスキャンする前に絵文字のqualified codeを調べておく + readEmojipediaQualified(emojipediaData) + + // サブフォルダから絵文字の画像を収集する + scanEmojiImages() + // 収集した画像をコピーする + copyImages() + + addEmojipediaShortnames() + + readVendorCode() + readEmojiData() + readEmojiOne() + readCategoryShortName(emojipediaData) + + checkCodeConflict() + checkShortNameConflict() + + fixToneParent() + + + fixCategory() + + if (hasConflict) error("please fix conflicts.") + + // shortcodeに含まれる文字の種類を列挙する + val nameChars = HashSet() + val nameMap = HashMap() + for (emoji in emojiMap.values) { + for (shortName in emoji.shortNames) { + nameMap[shortName] = emoji + for (c in shortName.name) + nameChars.add(c) + } + } + log.i("nameChars: [${nameChars.sorted().joinToString("")}]") + + writeData() + + log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}") + log.d("nameCameFroms: ${Emoji.nameCameFroms.joinToString(",")}") + } } fun main(args: Array) = runBlocking { - log.d("args=${args.joinToString(",")}") - App().run() + log.d("args=${args.joinToString(",")}") + App().run() } diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt index 118029da..9a0f090b 100644 --- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt +++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/Utils.kt @@ -1,147 +1,141 @@ +@file:Suppress("unused") + package jp.juggler.subwaytooter.emoji -import io.ktor.client.* -import io.ktor.client.call.* -import io.ktor.client.request.* -import io.ktor.client.statement.* -import io.ktor.http.* +import io.ktor.client.HttpClient +import io.ktor.client.request.get +import io.ktor.client.request.header +import io.ktor.client.statement.readBytes +import io.ktor.http.HttpStatusCode import org.jsoup.Jsoup -import java.io.* +import java.io.BufferedReader +import java.io.File +import java.io.FileInputStream +import java.io.FileOutputStream +import java.io.InputStreamReader import java.nio.charset.Charset import java.security.MessageDigest import java.util.* fun String.isTruth() = when { - this == "" -> false - this == "0" -> false - this.startsWith("f", ignoreCase = true) -> false - this.startsWith("t", ignoreCase = true) -> true - this == "on" -> true - else -> true + this == "" -> false + this == "0" -> false + this.startsWith("f", ignoreCase = true) -> false + this.startsWith("t", ignoreCase = true) -> true + this == "on" -> true + else -> true } // split CharSequence to Unicode codepoints fun CharSequence.eachCodePoint(block: (Int) -> Unit) { - val end = length - var i = 0 - while (i < end) { - val c1 = get(i++) - if (Character.isHighSurrogate(c1) && i < length) { - val c2 = get(i) - if (Character.isLowSurrogate(c2)) { - i++ - block(Character.toCodePoint(c1, c2)) - continue - } - } - block(c1.toInt()) - } + val end = length + var i = 0 + while (i < end) { + val c1 = get(i++) + if (Character.isHighSurrogate(c1) && i < end) { + val c2 = get(i) + if (Character.isLowSurrogate(c2)) { + ++i + block(Character.toCodePoint(c1, c2)) + continue + } + } + block(c1.code) + } } + // split CharSequence to Unicode codepoints -fun CharSequence.listCodePoints() = ArrayList().also{ dst-> - val end = length - var i = 0 - while (i < end) { - val c1 = get(i++) - if (Character.isHighSurrogate(c1) && i < length) { - val c2 = get(i) - if (Character.isLowSurrogate(c2)) { - i++ - dst.add(Character.toCodePoint(c1, c2)) - continue - } - } - dst.add(c1.toInt()) - } +fun CharSequence.listCodePoints() = ArrayList().also { dst -> + eachCodePoint { dst.add(it) } }.toIntArray() // split codepoint to UTF-8 bytes fun codePointToUtf8(cp: Int, block: (Int) -> Unit) { - // incorrect codepoint - if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.toInt(), block) + // incorrect codepoint + if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.code, block) - if (cp >= 128) { - if (cp >= 2048) { - if (cp >= 65536) { - block(0xF0.or(cp.shr(18))) - block(0x80.or(cp.shr(12).and(0x3f))) - } else { - block(0xE0.or(cp.shr(12))) - } - block(0x80.or(cp.shr(6).and(0x3f))) - } else { - block(0xC0.or(cp.shr(6))) - } - block(0x80.or(cp.and(0x3f))) - } else { - block(cp) - } + if (cp >= 128) { + if (cp >= 2048) { + if (cp >= 65536) { + block(0xF0.or(cp.shr(18))) + block(0x80.or(cp.shr(12).and(0x3f))) + } else { + block(0xE0.or(cp.shr(12))) + } + block(0x80.or(cp.shr(6).and(0x3f))) + } else { + block(0xC0.or(cp.shr(6))) + } + block(0x80.or(cp.and(0x3f))) + } else { + block(cp) + } } private const val hexString = "0123456789ABCDEF" private val encodePercentSkipChars by lazy { - HashSet().apply { - ('0'..'9').forEach { add(it.toInt()) } - ('A'..'Z').forEach { add(it.toInt()) } - ('a'..'z').forEach { add(it.toInt()) } - add('-'.toInt()) - add('_'.toInt()) - add('.'.toInt()) - } + HashSet().apply { + ('0'..'9').forEach { add(it.code) } + ('A'..'Z').forEach { add(it.code) } + ('a'..'z').forEach { add(it.code) } + add('-'.code) + add('_'.code) + add('.'.code) + } } fun String.encodePercent(): String = - StringBuilder(length).also { sb -> - eachCodePoint { cp -> - if (encodePercentSkipChars.contains(cp)) { - sb.append(cp.toChar()) - } else { - codePointToUtf8(cp) { b -> - sb.append('%') - .append(hexString[b shr 4]) - .append(hexString[b and 15]) - } - } - } - }.toString() + StringBuilder(length).also { sb -> + eachCodePoint { cp -> + if (encodePercentSkipChars.contains(cp)) { + sb.append(cp.toChar()) + } else { + codePointToUtf8(cp) { b -> + sb.append('%') + .append(hexString[b shr 4]) + .append(hexString[b and 15]) + } + } + } + }.toString() // same as x?.let{ dst.add(it) } fun T.addTo(dst: ArrayList) = dst.add(this) fun T.addTo(dst: HashSet) = dst.add(this) fun > E?.notEmpty(): E? = - if (this?.isNotEmpty() == true) this else null + if (this?.isNotEmpty() == true) this else null fun > E?.notEmpty(): E? = - if (this?.isNotEmpty() == true) this else null + if (this?.isNotEmpty() == true) this else null fun T?.notEmpty(): T? = - if (this?.isNotEmpty() == true) this else null + if (this?.isNotEmpty() == true) this else null fun ByteArray.digestSha256() = - MessageDigest.getInstance("SHA-256")?.let { - it.update(this@digestSha256) - it.digest() - }!! + MessageDigest.getInstance("SHA-256")?.let { + it.update(this@digestSha256) + it.digest() + }!! fun ByteArray.encodeBase64UrlSafe(): String { - val bytes = Base64.getUrlEncoder().encode(this) - return StringBuilder(bytes.size).apply { - for (b in bytes) { - val c = b.toChar() - if (c != '=') append(c) - } - }.toString() + val bytes = Base64.getUrlEncoder().encode(this) + return StringBuilder(bytes.size).apply { + for (b in bytes) { + val c = b.toInt().toChar() + if (c != '=') append(c) + } + }.toString() } fun ByteArray.decodeUtf8() = toString(Charsets.UTF_8) fun String.encodeUtf8() = toByteArray(Charsets.UTF_8) -inline fun Any?.castOrThrow(name:String,block: T.() -> Unit){ - if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}") - block() +inline fun Any?.castOrThrow(name: String, block: T.() -> Unit) { + if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}") + block() } // 型推論できる文脈だと型名を書かずにすむ @@ -155,87 +149,86 @@ fun > minComparable(a: T, b: T): T = if (a <= b) a else b fun > maxComparable(a: T, b: T): T = if (a >= b) a else b fun MutableCollection.removeFirst(check: (T) -> Boolean): T? { - val it = iterator() - while (it.hasNext()) { - val item = it.next() - if (check(item)) { - it.remove() - return item - } - } - return null + val it = iterator() + while (it.hasNext()) { + val item = it.next() + if (check(item)) { + it.remove() + return item + } + } + return null } fun File.readAllBytes() = - FileInputStream(this).use { it.readBytes() } + FileInputStream(this).use { it.readBytes() } fun File.save(data: ByteArray) { - val tmpFile = File("$absolutePath.tmp") - FileOutputStream(tmpFile).use { it.write(data) } - this.delete() - if (!tmpFile.renameTo(this)) error("$this: rename failed.") + val tmpFile = File("$absolutePath.tmp") + FileOutputStream(tmpFile).use { it.write(data) } + this.delete() + if (!tmpFile.renameTo(this)) error("$this: rename failed.") } fun ByteArray.saveTo(file: File) = file.save(this) -fun File.forEachLine(charset: Charset = Charsets.UTF_8, block:(Int, String)->Unit)= - BufferedReader(InputStreamReader(FileInputStream(this),charset)).use { reader -> - var lno = 0 - reader.forEachLine { - block(++lno, it) - } - lno - } +fun File.forEachLine(charset: Charset = Charsets.UTF_8, block: (Int, String) -> Unit) = + BufferedReader(InputStreamReader(FileInputStream(this), charset)).use { reader -> + var lno = 0 + reader.forEachLine { + block(++lno, it) + } + lno + } -inline fun HashMap.prepare(key:K,creator:()->V):V{ - var value = get(key) - if( value == null) { - value = creator() - put(key,value) - } - return value!! +inline fun HashMap.prepare(key: K, creator: () -> V): V { + var value = get(key) + if (value == null) { + value = creator() + put(key, value) + } + return value!! } private val reFileNameBadChars = """[\\/:*?"<>|-]+""".toRegex() -private val cacheDir by lazy{ File("./cache").apply { mkdirs() }} +private val cacheDir by lazy { File("./cache").apply { mkdirs() } } -fun clearCache(){ - cacheDir.list()?.forEach { name-> - File(cacheDir,name).takeIf { it.isFile }?.delete() - } +fun clearCache() { + cacheDir.list()?.forEach { name -> + File(cacheDir, name).takeIf { it.isFile }?.delete() + } } -private val cacheExpire by lazy{ 8 * 3600000L } +private val cacheExpire by lazy { 8 * 3600000L } suspend fun HttpClient.cachedGetBytes(url: String, headers: Map): ByteArray { - val fName = reFileNameBadChars.replace(url, "-") - val cacheFile = File(cacheDir, fName) - if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) { - println("GET(cached) $url") - return cacheFile.readAllBytes() - } - println("GET $url") + val fName = reFileNameBadChars.replace(url, "-") + val cacheFile = File(cacheDir, fName) + if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) { + println("GET(cached) $url") + return cacheFile.readAllBytes() + } + println("GET $url") - get(url) { - headers.entries.forEach { - header(it.key, it.value) - } - }.let { res -> - return when (res.status) { - HttpStatusCode.OK -> - res.receive().also { it.saveTo(cacheFile) } - else -> { - cacheFile.delete() - error("get failed. $url ${res.status}") - } - } - } + get(url) { + headers.entries.forEach { + header(it.key, it.value) + } + }.let { res -> + return when (res.status) { + HttpStatusCode.OK -> + res.readBytes().also { it.saveTo(cacheFile) } + else -> { + cacheFile.delete() + error("get failed. $url ${res.status}") + } + } + } } suspend fun HttpClient.cachedGetString(url: String, headers: Map): String = - cachedGetBytes(url,headers).decodeUtf8() - -fun String.parseHtml(baseUri: String) = - Jsoup.parse(this, baseUri) + cachedGetBytes(url, headers).decodeUtf8() +fun String.parseHtml(baseUri: String): org.jsoup.nodes.Document? = + Jsoup.parse(this, baseUri) diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt index 7b6475a7..2f6dfe87 100644 --- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt +++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/CodepointList.kt @@ -2,7 +2,6 @@ package jp.juggler.subwaytooter.emoji.model import jp.juggler.subwaytooter.emoji.cast import jp.juggler.subwaytooter.emoji.notEmpty -import java.lang.StringBuilder /* 絵文字はコードポイントのリストで表現される。 @@ -30,61 +29,61 @@ import java.lang.StringBuilder // list of codepoints class CodepointList( - val from: String, - val list: IntArray + val from: String, + val list: IntArray ) : Comparable { - override fun equals(other: Any?): Boolean = - list.contentEquals(other.cast()?.list) + override fun equals(other: Any?): Boolean = + list.contentEquals(other.cast()?.list) - override fun hashCode(): Int { - var code = 0 - for (v in list) code = code.shl(2).xor(v) - return code - } + override fun hashCode(): Int { + var code = 0 + for (v in list) code = code.shl(2).xor(v) + return code + } - override fun compareTo(other: CodepointList): Int { - val la = this.list - val lb = other.list - var i = 0 - do { - val a = la.elementAtOrNull(i) - val b = lb.elementAtOrNull(i) + override fun compareTo(other: CodepointList): Int { + val la = this.list + val lb = other.list + var i = 0 + do { + val a = la.elementAtOrNull(i) + val b = lb.elementAtOrNull(i) - val r = if (a == null) { - if (b == null) break - -1 - } else if (b == null) { - 1 - } else { - a.compareTo(b) - } - if (r != 0) return r - ++i - } while (true) - return 0 - } + val r = if (a == null) { + if (b == null) break + -1 + } else if (b == null) { + 1 + } else { + a.compareTo(b) + } + if (r != 0) return r + ++i + } while (true) + return 0 + } - // make string like as "uuuu-uuuu-uuuu-uuuu" - // cp値の余分な0は除去される - // 常に小文字である - fun toHex() = StringBuilder(list.size * 5).also { - list.forEachIndexed { i, v -> - if (i > 0) it.append('-') - it.append(String.format("%x", v).toLowerCase()) - } - }.toString() + // make string like as "uuuu-uuuu-uuuu-uuuu" + // cp値の余分な0は除去される + // 常に小文字である + fun toHex() = StringBuilder(list.size * 5).also { + list.forEachIndexed { i, v -> + if (i > 0) it.append('-') + it.append("%x".format(v).lowercase()) + } + }.toString() - // make raw string - fun toRawString() = StringBuilder(list.size + 10).also { sb -> - for (cp in list) { - sb.appendCodePoint(cp) - } - }.toString() + // make raw string + fun toRawString() = StringBuilder(list.size + 10).also { sb -> + for (cp in list) { + sb.appendCodePoint(cp) + } + }.toString() - fun toResourceId() = "emj_${toHex().replace("-", "_")}" + fun toResourceId() = "emj_${toHex().replace("-", "_")}" - override fun toString() = "${toHex()},$from" + override fun toString() = "${toHex()},$from" // fun makeUtf16(): String { // // java の文字列にする @@ -111,28 +110,28 @@ class CodepointList( // return sb.toString() // } - fun toKey(from: String) = - list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d } - .toIntArray().toCodepointList(from) + fun toKey(from: String) = + list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d } + .toIntArray().toCodepointList(from) - fun getToneCode(from: String) :CodepointList? { - val used = HashSet() - return list - .filter { skinToneModifiers.containsKey(it) } - .mapNotNull { - if (used.contains(it)) { - null - } else { - used.add(it) - it - } - }.toIntArray().toCodepointList(from) - } + fun getToneCode(from: String): CodepointList? { + val used = HashSet() + return list + .filter { skinToneModifiers.containsKey(it) } + .mapNotNull { + if (used.contains(it)) { + null + } else { + used.add(it) + it + } + }.toIntArray().toCodepointList(from) + } } fun IntArray.isAsciiEmoji() = - size == 1 && first() < 0xae + size == 1 && first() < 0xae fun IntArray.toCodepointList(from: String) = if (isEmpty()) null else CodepointList(from, this) @@ -140,8 +139,8 @@ private val reHex = """([0-9A-Fa-f]+)""".toRegex() // cp-cp-cp-cp => CodepointList fun String.toCodepointList(from: String) = - reHex.findAll(this) - .map { mr -> mr.groupValues[1].toInt(16) } - .toList().notEmpty() - ?.toIntArray() - ?.toCodepointList(from) + reHex.findAll(this) + .map { mr -> mr.groupValues[1].toInt(16) } + .toList().notEmpty() + ?.toIntArray() + ?.toCodepointList(from) diff --git a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt index 9b244a47..f482b452 100644 --- a/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt +++ b/_Emoji/emojiConverter/src/main/kotlin/jp/juggler/subwaytooter/emoji/model/ShortName.kt @@ -4,18 +4,18 @@ import jp.juggler.subwaytooter.emoji.cast import jp.juggler.subwaytooter.emoji.notEmpty -class ShortName(val cameFrom:String,val name:String) :Comparable{ - override fun equals(other: Any?): Boolean = - name == other.cast()?.name +class ShortName(val cameFrom: String, val name: String) : Comparable { + override fun equals(other: Any?): Boolean = + name == other.cast()?.name - override fun hashCode(): Int = - name.hashCode() + override fun hashCode(): Int = + name.hashCode() - override fun toString(): String = - "SN($cameFrom)$name" + override fun toString(): String = + "SN($cameFrom)$name" - override fun compareTo(other: ShortName): Int = - name.compareTo(other.name) + override fun compareTo(other: ShortName): Int = + name.compareTo(other.name) } private val reColonHead = """\A:""".toRegex() @@ -23,10 +23,10 @@ private val reColonTail = """:\z""".toRegex() private val reNotCode = """[^\w\d+_]+""".toRegex() private val reUnderTail = """_+\z""".toRegex() -fun String.toShortName(cameFrom:String) = - toLowerCase() - .replace(reColonHead, "") - .replace(reColonTail, "") - .replace(reNotCode, "_") - .replace(reUnderTail,"") - .notEmpty()?.let{ ShortName(cameFrom=cameFrom,it) } +fun String.toShortName(cameFrom: String) = + lowercase() + .replace(reColonHead, "") + .replace(reColonTail, "") + .replace(reNotCode, "_") + .replace(reUnderTail, "") + .notEmpty()?.let { ShortName(cameFrom = cameFrom, it) }