2021-02-15 08:32:16 +01:00
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.features.*
2021-02-19 02:18:58 +01:00
import io.ktor.http.*
import jp.juggler.subwaytooter.emoji.model.*
2021-02-15 08:32:16 +01:00
import kotlinx.coroutines.runBlocking
2021-02-19 02:18:58 +01:00
import org.apache.commons.text.StringEscapeUtils
import org.intellij.lang.annotations.Language
2021-02-15 08:32:16 +01:00
//emoji-data/emoji.json を参照する
//- UTF-16文字列 => 画像リソースID のマップ。同一のIDに複数のUTF-16文字列が振られることがある。
//- shortcode => 画像リソースID のマップ。同一のIDに複数のshortcodeが振られることがある。
//- shortcode中の区切り文字はハイフンもアンダーバーもありうる。出力データではアンダーバーに寄せる
//- アプリはshortcodeの探索時にキー文字列の区切り文字をアンダーバーに正規化すること
const val pathCwebp = "C:/cygwin64/bin/cwebp.exe"
val emojiDataCodepointsVendors = arrayOf("docomo", "au", "softbank", "google")
fun copyFile(dst: File, src: File) {
try {
FileInputStream(src).use { streamIn ->
FileOutputStream(dst).use { streamOut ->
} catch (ex: Throwable) {
throw IOException("copyFile failed. src=$src dst=$dst", ex)
2021-02-19 02:18:58 +01:00
//fun svgToVectorDrawable(dst: File, src: File) {
// val tmp = ByteArrayOutputStream()
// // Write all the error message during parsing into SvgTree. and return here as getErrorLog().
// // We will also log the exceptions here.
// try {
// val svgTree = Svg2Vector.parse(src)
// svgTree.mScaleFactor = 24 / max(svgTree.w, svgTree.h)
// if (svgTree.canConvertToVectorDrawable()) {
// Svg2Vector.writeFile(tmp, svgTree)
// }
// val errorLog = svgTree.errorLog
// if (errorLog.isNotEmpty()) println("$src $errorLog")
// FileOutputStream(dst).use { outStream ->
// outStream.write(tmp.toByteArray())
// }
// } catch (e: Exception) {
// println("svgToVectorDrawable: ${e.message} ${src.canonicalPath}")
// }
2021-02-15 08:32:16 +01:00
class App {
companion object {
2021-02-19 02:18:58 +01:00
const val cameFromCategory = "CategoryHtml"
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val reComment = """#.*""".toRegex()
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val ignoreShortName = arrayOf(
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private val ignoreImagePath = arrayOf(
// fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない
// mastodonのフォルダにある余計なファイル
2021-02-15 08:32:16 +01:00
// 修正用データを読む
private val fixCode = HashMap<CodepointList, String>()
private val fixName = HashMap<ShortName, String>()
private val fixCategory = ArrayList<Pair<String, String>>()
private val fixUnified = HashMap<CodepointList, CodepointList>()
private fun readFixData() {
val fixFile = "./fix_code.txt"
File(fixFile).forEachLine { lno, rawLine ->
val line = rawLine
.replace(reComment, "")
val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line)
if (mr != null) {
val type = mr.groupValues[1]
val arg1 = mr.groupValues[2]
if (type == "unified") {
val code = arg1.toCodepointList("fixUnified")!!
fixUnified[code.toKey("fixUnified")] = code
val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList()
if (data.size != 1) return@forEachLine
when (type) {
"code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first()
"name" -> fixName[arg1.toShortName("fixName")!!] = data.first()
"category" -> Pair(arg1, data.first()).addTo(fixCategory)
else -> error("$fixFile $lno : bad fix_data type=$type")
val emojipediaShortNames = HashMap<CodepointList,ShortName>()
// noto-emoji のファイル名はfeofが欠けているので、
// あらかじめemoji 13.1 の qualified name を取得しておく
private suspend fun readQualified13_1(client: HttpClient) {
val cameFrom = "emojiQualified"
for( url in arrayOf(
)) {
val root = client.cachedGetString(url, mapOf()).parseHtml(url)
for( node in root.getElementsByClass("sidebar") ){
for( node in root.getElementsByClass("categories") ){
for( list in root.getElementsByTag("ul")){
for( li in list.getElementsByTag("li")) {
val span = li.getElementsByTag("span").find { it.hasClass("emoji") }
?: continue
val code = span.text().listCodePoints().toCodepointList(cameFrom)!!
val key = code.toKey(cameFrom)
fixUnified[ key] = code
val href = li.getElementsByTag("a")!!.attr("href")
val shortName = href
.replace("/", "")
?: error("can't parse $href")
if( !ignoreShortName.any{ it ==}){
emojipediaShortNames[key] = shortName
private fun addEmojipediaShortnames(){
for((key,shortName) in emojipediaShortNames){
2021-02-19 02:18:58 +01:00
// 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る
// マップのキーはvariation selectorとZWJが除去される
private val emojiMap = HashMap<CodepointList, Emoji>()
private fun scanEmojiImages() {
fun HashMap<CodepointList, Emoji>.scanImageDir(
cameFrom: String,
dirPath: String,
@Language("RegExp") codeSpec: String,
unifiedQualifier: (CodepointList) -> CodepointList = { it }
2021-02-19 02:18:58 +01:00
) {
val dir = File(dirPath)
val reCodeSpec = codeSpec.toRegex()
var countFound = 0
var countCreate = 0
dir.listFiles()!!.forEach { imageFile ->
if (!imageFile.isFile) return@forEach
val unixPath = imageFile.path.replace("\\", "/")
if (ignoreImagePath.any { unixPath.indexOf(it) != -1 }) return@forEach
var name =
if (name == "LICENSE") return@forEach
name = name.replace("_border", "")
2021-02-19 02:18:58 +01:00
val code = reCodeSpec.find(name)
2021-02-19 02:18:58 +01:00
?: error("can't parse $name")
// variation selector やZWJを除去したコードをキーにする
val key = code.toKey(cameFrom)
2021-02-19 02:18:58 +01:00
var emoji = get(key)
if (emoji == null) {
val unified2 = fixUnified[key]
emoji = Emoji(key, unified2 ?: unifiedQualifier(code))
2021-02-19 02:18:58 +01:00
put(key, emoji)
emoji.imageFiles.add(Pair(imageFile, cameFrom))
2021-02-19 02:18:58 +01:00
log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
emojiMap.scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code ->
if (code.list.last() != 0xfe0f)
2021-02-19 02:18:58 +01:00
emojiMap.scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""")
emojiMap.scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// // resName => Resource
// private val resNameMap = HashMap<ResName, Resource>()
// // map: code => resName => Resource
// private val codeMap = HashMap<CodepointList, HashMap<ResName, Resource>>()
// // map shortname => resName => Resource
// private val nameMap = HashMap<ShortName, HashMap<ResName, Resource>>()
2021-02-15 08:32:16 +01:00
private fun copyImages() {
2021-02-19 02:18:58 +01:00
var countSvg = 0
var countPng = 0
for (emoji in emojiMap.values) {
val strResName = emoji.key.toResourceId()
emoji.resName = strResName
val (src, _) = emoji.imageFiles.first()
if ("svg")) {
val dst = File("assets/$strResName.svg")
if (!dst.exists()) {
//svgToVectorDrawable(dst, src)
copyFile(dst, src)
} else {
val dst = File("drawable-nodpi/$strResName.webp")
if (!dst.exists()) {
val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path)
val rv = pb.start().waitFor()
if (rv != 0) error("cwebp failed. dst=$dst src=$src")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
log.d("copyImage: countSvg=$countSvg, countPng=$countPng")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// private fun updateCodeMap() {
// codeMap.clear()
// resNameMap.values.forEach { res_info ->
// res_info.codePoints.forEach { cp ->
// codeMap.prepare(cp) { HashMap() }[res_info.res_name] = res_info
// codeMap.prepare(cp.removeZWJ()) { HashMap() }[res_info.res_name] = res_info
// }
// }
// }
// private fun updateNameMap() {
// nameMap.clear()
// resNameMap.values.forEach { res_info ->
// res_info.shortNames.forEach { name ->
// nameMap.prepare(name) { HashMap() }[res_info.res_name] = res_info
// }
// }
// }
2021-02-15 08:32:16 +01:00
private fun readEmojiData() {
2021-02-19 02:18:58 +01:00
.forEach { src ->
// 絵文字のコードポイント一覧
var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
var key = unified.toKey("EmojiDataJsonUnifiedKey")
var emoji = emojiMap[key] ?: error("can't find emoji for $key")
2021-02-19 02:18:58 +01:00
if (emoji.unified != unified) {
log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}")
2021-02-19 02:18:58 +01:00
?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") }
?.forEach { emoji.addCode(it) }
2021-02-19 02:18:58 +01:00
for (k in emojiDataCodepointsVendors) {
?.let { emoji.addCode(it) }
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
// short_name のリスト
2021-02-19 02:18:58 +01:00
val shortNames = HashSet<String>().also { dst ->
src.stringArrayList("short_names")?.forEach {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
}.mapNotNull { it.toShortName("EmojiDataJson") }
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (shortNames.isEmpty())
error("emojiData ${src.string("unified")} has no shortName")
shortNames.forEach { emoji.addShortName(it) }
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val parentEmoji = emoji
2021-02-15 08:32:16 +01:00
// スキントーン
2021-02-19 02:18:58 +01:00
src.jsonObject("skin_variations")?.let { skinVariations ->
val parentName = shortNames.first()
val skinToneUsed = HashSet<Int>()
for ((k, data) in skinVariations.entries) {
if (data !is JsonObject) continue
fun handleCode(list: IntArray, idx: Int, parentSuffix: Array<String>, suffixIndex: Int) {
2021-02-19 02:18:58 +01:00
val code = list.elementAtOrNull(idx) ?: return
val modifier = skinToneModifiers[code]
?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName")
val lastSuffix = modifier.suffixList[suffixIndex]
val suffix =
if (parentSuffix.contains(lastSuffix))
2021-02-19 02:18:58 +01:00
arrayOf( *parentSuffix ,lastSuffix)
if (idx < list.size - 1) {
2021-02-19 02:18:58 +01:00
handleCode(list, idx + 1, suffix, suffixIndex)
} else {
unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!!
key = unified.toKey("EmojiData(skinTone)")
emoji = emojiMap[key] ?: error("can't find emoji for $key")
.mapNotNull { ( + suffix.joinToString("")).toShortName("EmojiData(skinTone)") }
.forEach { emoji.addShortName(it) }
2021-02-19 02:18:58 +01:00
emoji.toneParent = parentEmoji
emoji.isToneVariation = true
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val codeList = k.toCodepointList("toneSpec")!!.list
2021-02-19 02:18:58 +01:00
for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) {
handleCode(codeList, 0, emptyArray(), suffixIndex)
2021-02-19 02:18:58 +01:00
if (skinToneUsed.size != skinToneModifiers.size) {
log.w("skin tone code not fully used: $parentName")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private fun readEmojione() {
val cameFrom = "EmojiOneJson"
2021-02-15 08:32:16 +01:00
val root = File("./old-emojione.json")
2021-02-19 02:18:58 +01:00
for ((strCode, item) in root.entries) {
if (item !is JsonObject) continue
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// コードを確認する
val code = strCode.toCodepointList(cameFrom)
2021-02-19 02:18:58 +01:00
?: error("can't parse $strCode")
val key = code.toKey(cameFrom)
2021-02-19 02:18:58 +01:00
val emoji = emojiMap[key] ?: error("missing emoji for $key")
val names = ArrayList<String>()
item.string("alpha code")?.let { names.add(it) }
item.string("aliases")?.split("|")?.let { names.addAll(it) }
val shortNames = names.mapNotNull { it.toShortName(cameFrom) }
if (shortNames.isEmpty()) error("readEmojione: missing name for code $strCode")
shortNames.forEach { emoji.addShortName(it) }
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private suspend fun readEmojiSpec(client: HttpClient) {
// 絵文字のショートネームを外部から拾ってくる
for (url in arrayOf(
)) {
client.cachedGetString(url, mapOf())
.forEach { rawLine ->
val line = rawLine.replace(reComment, "").trim()
if (line.isEmpty()) return@forEach
val cols = line.split(";", limit = 3).map { it.trim() }
if (cols.size != 3) return@forEach
val (strCode, _, descriptionSpec) = cols
if (strCode.indexOf("..") != -1) return@forEach
val code = strCode.toCodepointList("EmojiSpec")!!
val key = code.toKey("EmojiSpec")
val emoji = emojiMap[key] ?: error("can't find emoji for $key")
val strShortName = descriptionSpec.toLowerCase()
.replace("medium-light skin tone", "medium_light_skin_tone")
.replace("medium skin tone", "medium_skin_tone")
.replace("medium-dark skin tone", "medium_dark_skin_tone")
.replace("light skin tone", "light_skin_tone")
.replace("dark skin tone", "dark_skin_tone")
.replace("""[^\w\d]+""".toRegex(), "_")
val shortName = strShortName.toShortName("EmojiSpec")
?: error("can't parse $strShortName")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// カテゴリ別
// 絵文字のショートネームを外部から拾ってくる
private suspend fun readCategoryShortName(client: HttpClient) {
categoryNames.values.forEach { category ->
if (category.url == null) return@forEach
val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
val list = root.getElementsByClass("emoji-list").first()
list.getElementsByTag("li").forEach liLoop@{ node ->
val shortName = node.getElementsByTag("a")!!.attr("href")
.replace("/", "")
?: error("can't parse ${node.getElementsByTag("a")!!.attr("href")}")
if (ignoreShortName.any { == it }) return@liLoop
val text = node.getElementsByClass("emoji").text()
val code = text.listCodePoints()
.takeIf { it.isNotEmpty() }?.toCodepointList("CategoryHtml")
2021-02-19 02:18:58 +01:00
?: error("can't parse code from $text")
val key = code.toKey("CategoryHtml")
2021-02-19 02:18:58 +01:00
val emoji = emojiMap[key]
?: error("can't find emoji for ${category.url} $shortName $key $text")
category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString())
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private fun fixCategory() {
val nameMap = HashMap<ShortName, Emoji>().apply {
for (emoji in emojiMap.values)
for (shortName in emoji.shortNames)
this[shortName] = emoji
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
for ((enumId, strShortName) in fixCategory) {
val category = categoryNames.values.find { it.enumId == enumId }
?: error("fixCategory: missing category for $enumId")
val shortName = strShortName.toShortName("fixCategory")
?: error("fixCategory: can't parse $strShortName")
val emoji = nameMap[shortName]
?: error("fixCategory: missing emoji for $strShortName")
category.addEmoji(emoji, addingName = shortName.toString())
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this)
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private val vendorText = HashMap<CodepointList, ArrayList<String>>()
private val vendorUnicodeMap = HashMap<CodepointList, Pair<CodepointList, String>>()
private fun readVendorCode() {
var error = false
// まとまったxmlを読む
// 優先順位の都合でベンダ別に読み直す
val xml1 = File("emoji4unicode/data/emoji4unicode.xml")
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
"""<e([^>]+)""".toRegex().findAll(xml1).forEach { mr1 ->
val attrs = HashMap<String, String>()
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach
2021-02-19 02:18:58 +01:00
val strFrom = attrs[vendor] ?: return@forEach
if (strFrom.indexOf(">") != -1) return@forEach
val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach
2021-02-19 02:18:58 +01:00
val text = "${attrs["name"]}/${attrs["text_fallback"]}"
vendorText.prepare(from) { ArrayList() }.add(text)
val old = vendorUnicodeMap[from]
if (old != null) {
if (old.second == "kddi" && vendor == "softbank") return@forEach
error = true
log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)")
} else {
vendorUnicodeMap[from] = Pair(unicode, vendor)
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
2021-02-19 02:18:58 +01:00
// ベンダ個別ファイルから説明文を読む
val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml")
2021-02-19 02:18:58 +01:00
"""<e([^>]+)""".toRegex().findAll(xml).forEach { mr1 ->
val attrs = HashMap<String, String>()
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
val code = attrs["unicode"]?.toCodepointList("emoji4unicode")
?: return@forEach
2021-02-19 02:18:58 +01:00
attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") }
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (error) error("readVendorCode failed.")
2021-02-15 08:32:16 +01:00
private var hasConflict = false
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// コード=>画像の重複を調べる
private fun checkCodeConflict() {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val codeMap = HashMap<CodepointList, HashSet<Emoji>>()
for (emoji in emojiMap.values) {
for (code in {
2021-02-19 02:18:58 +01:00
codeMap.prepare(code) { HashSet() }.add(emoji)
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
for ((code, emojis) in codeMap.entries.sortedBy { it.key }) {
if (emojis.size == 1) continue
val fixResName = fixCode[code]
if (fixResName != null) {
2021-02-15 08:32:16 +01:00
var found = false
2021-02-19 02:18:58 +01:00
for (emoji in emojis) {
if (emoji.resName == fixResName) {
2021-02-15 08:32:16 +01:00
found = true
} else {
2021-02-19 02:18:58 +01:00 {
if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}")
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (!found) error("checkCodeConflict: missing emoji resName=$fixResName")
2021-02-15 08:32:16 +01:00
val onlyVendorCode = emojis.all { emoji ->
when ( { it == code }?.from) {
2021-02-19 02:18:58 +01:00
"EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true
else -> false
2021-02-15 08:32:16 +01:00
if (onlyVendorCode) {
2021-02-19 02:18:58 +01:00
val preferCode = vendorUnicodeMap[code]?.first
if (preferCode != null) {
val targetEmoji = emojis.find { emoji -> { it == preferCode } }
2021-02-19 02:18:58 +01:00
if (targetEmoji != null) {
emojis.forEach { emoji ->
if (emoji != targetEmoji) { {
if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}")
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
log.e("checkCodeConflict: code $code ${vendorText[code]} ${
emojis.joinToString(" ") {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
hasConflict = true
// コードのない絵文字のチェック
for (emoji in emojiMap.values) {
if ( continue
val fixes = fixCode.entries.filter { it.value == emoji.resName }
when (fixes.size) {
0 -> {
log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
hasConflict = true
1 -> {
val fix = fixes.first()
val code = fix.key
log.i("fixCode code=$code resName=${emoji.resName}")
else -> {
log.e("checkCodeConflict: multiple fix match for ${emoji.resName}")
hasConflict = true
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
private fun checkShortNameConflict() {
val nameMap = HashMap<ShortName, HashSet<Emoji>>().apply {
for (emoji in emojiMap.values) {
for (name in emoji.shortNames) {
prepare(name) { HashSet() }.add(emoji)
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない
for (emoji in emojiMap.values) {
if (emoji.shortNames.any { it.cameFrom != cameFromCategory }) {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
for ((name, emojis) in nameMap.entries.sortedBy { it.key }) {
// shortNameからemojiを1意に解決できるなら正常
if (emojis.size == 1) continue
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// fixNameで解決する
val fixResName = fixName[name]
if (fixResName != null) {
var found = false
for (emoji in emojis) {
if (emoji.resName == fixResName) {
found = true
} else {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// emoji,cameFrom のペアのリスト
val froms = emojiMap.values
.flatMap { emoji -> { Pair(emoji, it) } }
.filter { it.second == name }
.map { Pair(it.first, it.second.cameFrom) }
// どこ由来のShortNameかで優先順位をつける
val preferFrom = froms.find { it.second == "EmojiDataJson" }
?: froms.find { it.second == "EmojiSpec" }
?: froms.find { it.second == "EmojiOneJson" }
?: froms.find { it.second == cameFromCategory }
2021-02-19 02:18:58 +01:00
if (preferFrom != null) {
// 優先順位の低いemojiからshortNameを除去する
2021-02-15 08:32:16 +01:00
var found = false
2021-02-19 02:18:58 +01:00
for (emoji in emojis) {
if (emoji == preferFrom.first) {
found = true
} else {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// 解決できなかった
log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}")
2021-02-19 02:18:58 +01:00
hasConflict = true
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
// 名前のない絵文字のチェック
for (emoji in emojiMap.values) {
if (emoji.shortNames.isNotEmpty()) continue
val fix = fixName.entries.filter { it.value == emoji.resName }
if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}")
if (fix.size == 1) {
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
hasConflict = true
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
suspend fun run() {
HttpClient {
install(HttpTimeout) {
val t = 30000L
requestTimeoutMillis = t
connectTimeoutMillis = t
socketTimeoutMillis = t
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
}.use { client ->
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
if (hasConflict) error("please fix conflicts.")
// shortcodeに含まれる文字の種類を列挙する
val nameChars = HashSet<Char>()
val nameMap = HashMap<ShortName, Emoji>()
for (emoji in emojiMap.values) {
for (shortName in emoji.shortNames) {
nameMap[shortName] = emoji
for (c in
log.w("nameChars: [${nameChars.sorted().joinToString("")}]")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
val outFile = "emoji_map.txt"
UnixPrinter(File(outFile)).use { writer ->
2021-02-19 02:18:58 +01:00
for (emoji in emojiMap.values.sortedBy { it.key }) {
val codeSet = emoji.codeSet.sorted()
// asciiコードだけの絵文字は処理しない
if (codeSet.isEmpty()) {
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
emoji.skip = true
} else if (emoji.unified.list.isAsciiEmoji()) {
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
emoji.skip = true
if (emoji.skip) continue
2021-02-19 02:18:58 +01:00
// 画像リソースIDとUnicodeシーケンスの関連付けを出力する
val strResName = emoji.resName
if (File("assets/$strResName.svg").isFile) {
} else {
codeSet.forEach { code ->
val raw = code.toRawString()
if(raw.isEmpty()) error("too short code! ${emoji.resName}")
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
for (emoji in emojiMap.values.sortedBy { it.key }) {
if (emoji.skip) continue
2021-02-19 02:18:58 +01:00
// shortcodeから変換するunicode表現
val unified = emoji.unified
2021-02-19 02:18:58 +01:00
// 画像リソースIDとshortcodeの関連付けを出力する
// 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する
for (name in { }.toSet().sorted()) {
val froms = emoji.shortNames.filter { == name }.map { it.cameFrom }.sorted()
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
categoryNames.values.forEach { category ->
2021-02-19 02:18:58 +01:00
category.eachEmoji { emoji ->
if (emoji.skip) return@eachEmoji
2021-02-19 02:18:58 +01:00
val shortName = emoji.shortNames.first()
2021-02-15 08:32:16 +01:00
val enumId = "CATEGORY_OTHER"
2021-02-19 02:18:58 +01:00
.filter { it.usedInCategory == null && !it.isToneVariation }
2021-02-19 02:18:58 +01:00
.sortedBy { it.shortNames.first() }
.forEach { emoji ->
if (emoji.skip) return@forEach
2021-02-19 02:18:58 +01:00
val shortName = emoji.shortNames.first()
2021-02-19 02:18:58 +01:00
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
log.d("wrote $outFile")
2021-02-15 08:32:16 +01:00
log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}")
2021-02-15 08:32:16 +01:00
// shortname => unicode
2021-02-19 02:18:58 +01:00
// JsonArray()
// .also { dst ->
// for ((shortName, rh) in nameMap.entries.sortedBy { it.key }) {
// val resInfo = rh.values.first()
// dst.add(jsonObject("shortcode" to, "unicode" to resInfo.unified))
// }
// }
// .toString(2)
// .encodeUtf8()
// .saveTo(File("shortcode-emoji-data-and-old-emojione2.json"))
2021-02-15 08:32:16 +01:00
2021-02-19 02:18:58 +01:00
fun main(args: Array<String>) = runBlocking {
2021-02-15 08:32:16 +01:00