SubwayTooter-Android-App/_Emoji/src/main/kotlin/jp/juggler/subwaytooter/emoji/Main.kt

1014 lines
32 KiB
Kotlin

package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.features.*
import io.ktor.http.*
import jp.juggler.subwaytooter.emoji.model.*
import kotlinx.coroutines.runBlocking
import org.apache.commons.text.StringEscapeUtils
import org.intellij.lang.annotations.Language
import java.io.*
//pngフォルダにある画像ファイルを参照する
//emoji-data/emoji.json を参照する
//
//以下のjavaコードを生成する
//- UTF-16文字列 => 画像リソースID のマップ。同一のIDに複数のUTF-16文字列が振られることがある。
//- shortcode => 画像リソースID のマップ。同一のIDに複数のshortcodeが振られることがある。
//- shortcode中の区切り文字はハイフンもアンダーバーもありうる。出力データではアンダーバーに寄せる
//- アプリはshortcodeの探索時にキー文字列の区切り文字をアンダーバーに正規化すること
const val pathCwebp = "C:/cygwin64/bin/cwebp.exe"
val emojiDataCodepointsVendors = arrayOf("docomo", "au", "softbank", "google")
fun copyFile(dst: File, src: File) {
try {
FileInputStream(src).use { streamIn ->
FileOutputStream(dst).use { streamOut ->
streamOut.write(streamIn.readAllBytes())
}
}
} catch (ex: Throwable) {
dst.delete()
throw IOException("copyFile failed. src=$src dst=$dst", ex)
}
}
//fun svgToVectorDrawable(dst: File, src: File) {
//
// val tmp = ByteArrayOutputStream()
//
// // Write all the error message during parsing into SvgTree. and return here as getErrorLog().
// // We will also log the exceptions here.
// try {
// val svgTree = Svg2Vector.parse(src)
// svgTree.mScaleFactor = 24 / max(svgTree.w, svgTree.h)
// if (svgTree.canConvertToVectorDrawable()) {
// Svg2Vector.writeFile(tmp, svgTree)
// }
// val errorLog = svgTree.errorLog
// if (errorLog.isNotEmpty()) println("$src $errorLog")
// FileOutputStream(dst).use { outStream ->
// outStream.write(tmp.toByteArray())
// }
// } catch (e: Exception) {
// println("svgToVectorDrawable: ${e.message} ${src.canonicalPath}")
// }
//}
class App {
companion object{
const val fromCategoryHtml = "CategoryHtml"
val reComment = """#.*""".toRegex()
private val ignoreImagePath = arrayOf(
"LICENSE",
// fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない
"noto-emoji/png/128/emoji_ufe82b.png",
"noto-emoji/svg/emoji_ufe82b.svg",
// mastodonのフォルダにある余計なファイル
"mastodon/public/emoji/sheet_10.png"
)
// emojipediaにあるデータのうち、次のショートネームを持つ絵文字は無視する
val ignoreShortName = arrayOf(
"flag_for_texas_ustx"
)
private val ignoreEmojiOneShortNames = setOf(
"man_in_tuxedo",
"man_in_tuxedo_tone1", "tuxedo_tone1",
"man_in_tuxedo_tone2", "tuxedo_tone2",
"man_in_tuxedo_tone3", "tuxedo_tone3",
"man_in_tuxedo_tone4", "tuxedo_tone4",
"man_in_tuxedo_tone5", "tuxedo_tone5",
)
}
// 修正用データ
private val fixCode = HashMap<CodepointList, String>()
private val fixName = HashMap<ShortName, String>()
private val fixCategory = ArrayList<Pair<String, String>>()
private val fixUnified = HashMap<CodepointList, CodepointList>()
// 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る
// マップのキーはvariation selectorとZWJが除去される
private val emojiMap = HashMap<CodepointList, Emoji>()
private val emojipediaShortNames = HashMap<CodepointList, ShortName>()
/////////////////////////////////////////////////
private fun readFixData() {
val fileFixData = "./fix_code.txt"
File(fileFixData).forEachLine { lno, rawLine ->
val line = rawLine
.replace(reComment, "")
.trim()
val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line)
if (mr != null) {
val type = mr.groupValues[1]
val arg1 = mr.groupValues[2]
if (type == "unified") {
val code = arg1.toCodepointList("fixUnified")!!
val key = code.toKey("fixUnified")!!
fixUnified[key] = code
return@forEachLine
}
val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList()
if (data.size != 1) return@forEachLine
when (type) {
"code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first()
"name" -> fixName[arg1.toShortName("fixName")!!] = data.first()
"category" -> Pair(arg1, data.first()).addTo(fixCategory)
else -> error("$fileFixData $lno : bad fix_data type=$type")
}
}
}
}
// Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく
// サイトにアクセスできなくなったら困るからな…
@Suppress("FunctionName")
private suspend fun readEmojipedia(client: HttpClient) :JsonObject {
val fileEmojipedia = File("Emojipedia.json")
if( fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject()
val dstRoot = JsonObject()
val dstQualified = JsonArray().also{ dstRoot["qualifiedCode"] = it }
for (url in arrayOf(
"https://emojipedia.org/emoji-13.1/",
"https://emojipedia.org/emoji-13.0/",
"https://emojipedia.org/emoji-12.1/",
"https://emojipedia.org/emoji-12.0/",
"https://emojipedia.org/emoji-11.0/",
"https://emojipedia.org/emoji-5.0/",
"https://emojipedia.org/emoji-4.0/",
"https://emojipedia.org/emoji-3.0/",
"https://emojipedia.org/emoji-2.0/",
"https://emojipedia.org/emoji-1.0/",
)) {
val root = client.cachedGetString(url, mapOf()).parseHtml(url)
root.getElementsByClass("sidebar").forEach { it.remove() }
root.getElementsByClass("categories").forEach { it.remove() }
for (list in root.getElementsByTag("ul")) {
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a")?.attr("href")
.notEmpty() ?: continue
val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
?.notEmpty() ?: continue
dstQualified.add(jsonArray(spanText, href))
}
}
}
val dstCategory = JsonObject().also{ dstRoot["categories"]=it}
categoryNames.forEach { category ->
if (category.url == null) return@forEach
val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it }
val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
val list = root.getElementsByClass("emoji-list").first()
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a").attr("href")
.notEmpty() ?: continue
val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
?.notEmpty() ?: continue
dstCategoryItems.add(jsonArray(spanText, href))
}
}
dstRoot.toString(2).encodeUtf8().saveTo(fileEmojipedia)
return dstRoot
}
////////////////////////////////////////////////////////////////////////
// noto-emoji のファイル名はfe0fが欠けている
// あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく
private fun readEmojipediaQualified(root:JsonObject) {
val ignoreName2 = setOf(
"zero_width_joiner",
"variation_selector_16",
)
val cameFrom = "emojiQualified"
val hrefList = ArrayList<Pair<String, CodepointList>>()
var countError = 0
for( cols in root.jsonArray("qualifiedCode")!!.filterIsInstance<JsonArray>()) {
val spanText = cols[0] as String
var href = cols[1] as String
var code = spanText.listCodePoints().toCodepointList(cameFrom)
?: error("can't get code from $spanText $href")
if (hrefList.any { it.first == href })
error("duplicate href: $href")
hrefList.add(Pair(href, code))
// https://emojipedia.org/80030/ Couple With Heart: Light Skin Tone
// ページ名が名前じゃないのを直す
if (href == "80030") href = "couple-with-heart-light-skin-tone"
val shortName = href.replace("/", "").toShortName(cameFrom)
?: error("can't parse $href")
if (ignoreName2.contains(shortName.name)) {
log.w("skip ${shortName.name}")
continue
}
val key = code.toKey(cameFrom)
?: error("can't get key from ${code.toHex()} ${shortName.name}")
if (!fixUnified.containsKey(key)) {
if (code.list.size == 1 && code.list.first() < 256) {
++countError
log.e("bad unified code: $code")
} else {
fixUnified[key] = code
}
}
if (ignoreShortName.any { it == shortName.name }) {
log.w("skip shortname $shortName $code")
continue
}
emojipediaShortNames[key] = shortName
}
// hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") }
if(countError>0) error("please fix unified codes. countError=$countError")
}
private fun addEmojipediaShortnames() {
for ((key, shortName) in emojipediaShortNames) {
emojiMap[key]?.addShortName(shortName)
}
}
// Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する
private fun readCategoryShortName(root:JsonObject) {
for(category in categoryNames){
val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue
for( cols in list.filterIsInstance<JsonArray>()){
val spanText = cols[0] as String
val href = cols[1] as String
val shortName = href.replace("/", "").toShortName(fromCategoryHtml)
?: error("can't parse $href")
if (ignoreShortName.any { shortName.name == it }) continue
val code = spanText.listCodePoints().toCodepointList(fromCategoryHtml)
?: error("can't parse code from $spanText")
val key = code.toKey(fromCategoryHtml)
val emoji = emojiMap[key]
?: error("can't find emoji. category=${category.name}, href=$href, spanText=$spanText")
category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString())
}
}
}
// サブフォルダをスキャンして絵文字別に画像データを確定する
private fun scanImageDir(
cameFrom: String,
dirPath: String,
@Language("RegExp") codeSpec: String,
unifiedQualifier: (CodepointList) -> CodepointList = { it }
) {
val dir = File(dirPath)
val reCodeSpec = codeSpec.toRegex()
var countFound = 0
var countCreate = 0
var countError = 0
for( imageFile in dir.listFiles()!!){
if (!imageFile.isFile) continue
val unixPath = imageFile.path.replace("\\", "/")
if (ignoreImagePath.any { unixPath.endsWith(it) }) continue
val name = imageFile.name.replace("_border.", ".")
val code = reCodeSpec.find(name)
?.groupValues
?.elementAtOrNull(1)
?.toCodepointList(cameFrom)
?: error("can't parse $name")
++countFound
val key = code.toKey(cameFrom)!!
var emoji = emojiMap[key]
if (emoji == null) {
val unified2 = fixUnified[key] ?: unifiedQualifier(code)
if( unified2.list.size==1 && unified2.list.first()<256){
++countError
log.e("bad unified code: $unified2")
}
emoji = Emoji(key, unified2)
emojiMap[key] = emoji
++countCreate
}
emoji.imageFiles.add(Pair(imageFile, cameFrom))
emoji.addCode(code)
}
log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
if(countError>0) error("please fix unified codes. countError=$countError")
}
// サブフォルダをスキャンして絵文字別に画像データを確定する
private fun scanEmojiImages() {
scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""")
scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""")
scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code ->
if (code.list.last() != 0xfe0f)
"${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!!
else
code
}
scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""")
scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""")
scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""")
scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""")
}
// 絵文字ごとにファイルをコピーする
private fun copyImages() {
var countSvg = 0
var countPng = 0
for (emoji in emojiMap.values) {
val strResName = emoji.key.toResourceId()
emoji.resName = strResName
val (src, _) = emoji.imageFiles.first()
if (src.name.endsWith("svg")) {
++countSvg
val dst = File("assets/$strResName.svg")
if (!dst.exists()) {
//svgToVectorDrawable(dst, src)
copyFile(dst, src)
}
} else {
++countPng
val dst = File("drawable-nodpi/$strResName.webp")
if (!dst.exists()) {
val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path)
val rv = pb.start().waitFor()
if (rv != 0) error("cwebp failed. dst=$dst src=$src")
}
}
}
log.d("copyImage: countSvg=$countSvg, countPng=$countPng")
}
// emojiDataのjsonを読んで変換コードポイントやショートネームを追加する
private fun readEmojiData() {
for( src in File("./emoji-data/emoji.json")
.readAllBytes()
.decodeUtf8()
.decodeJsonArray()
.objectList()
){
// 絵文字のコードポイント一覧
var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
var key = unified.toKey("EmojiDataJsonUnifiedKey")
var emoji = emojiMap[key] ?: error("can't find emoji for $key")
if (emoji.unified != unified) {
log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}")
emoji.addCode(unified)
}
src.stringArrayList("variations")
?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") }
?.forEach { emoji.addCode(it) }
for (k in emojiDataCodepointsVendors) {
src.string(k)?.toCodepointList("EmojiDataJson($k)")
?.let { emoji.addCode(it) }
}
// short_name のリスト
val shortNames = HashSet<String>().also { dst ->
src.string("short_name")?.addTo(dst)
src.stringArrayList("short_names")?.forEach {
it.addTo(dst)
}
}.mapNotNull { it.toShortName("EmojiDataJson") }
if (shortNames.isEmpty())
error("emojiData ${src.string("unified")} has no shortName")
shortNames.forEach { emoji.addShortName(it) }
val parentEmoji = emoji
// スキントーン
src.jsonObject("skin_variations")?.let { skinVariations ->
val parentName = shortNames.first()
val skinToneUsed = HashSet<Int>()
for ((k, data) in skinVariations.entries ) {
if (data !is JsonObject) continue
// 再帰呼び出しあり
fun handleCode(list: IntArray, idx: Int, parentSuffix: Array<String>, suffixIndex: Int) {
val code = list.elementAtOrNull(idx) ?: return
val modifier = skinToneModifiers[code]
?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName")
skinToneUsed.add(code)
val lastSuffix = modifier.suffixList[suffixIndex]
val suffix =
if (parentSuffix.contains(lastSuffix))
parentSuffix
else
arrayOf(*parentSuffix, lastSuffix)
if (idx < list.size - 1) {
handleCode(list, idx + 1, suffix, suffixIndex)
} else {
unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!!
key = unified.toKey("EmojiData(skinTone)")
emoji = emojiMap[key] ?: error("can't find emoji for $key")
emoji.addCode(unified)
shortNames
.mapNotNull { (it.name + suffix.joinToString("")).toShortName("EmojiData(skinTone)") }
.forEach { emoji.addShortName(it) }
emoji.addToneParent(parentEmoji)
}
}
val codeList = k.toCodepointList("toneSpec")!!.list
for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) {
handleCode(codeList, 0, emptyArray(), suffixIndex)
}
}
if (skinToneUsed.size != skinToneModifiers.size) {
log.w("skin tone code not fully used: $parentName")
}
}
}
}
private fun readEmojiOne() {
val cameFrom = "EmojiOneJson"
val root = File("./old-emojione.json")
.readAllBytes()
.decodeUtf8()
.decodeJsonObject()
for ((strCode, item) in root.entries) {
if (item !is JsonObject) continue
// コードを確認する
val code = strCode.toCodepointList(cameFrom)
?: error("can't parse $strCode")
val key = code.toKey(cameFrom)
val emoji = emojiMap[key] ?: error("missing emoji for $key")
val names = ArrayList<String>()
item.string("alpha code")?.let { names.add(it) }
item.string("aliases")?.split("|")?.let { names.addAll(it) }
names
.mapNotNull { it.toShortName(cameFrom) }
.filter { !ignoreEmojiOneShortNames.contains(it.name) }
.forEach { emoji.addShortName(it) }
}
}
private fun fixCategory() {
val nameMap = HashMap<ShortName, Emoji>().apply {
for (emoji in emojiMap.values)
for (shortName in emoji.shortNames)
this[shortName] = emoji
}
for ((name, strShortName) in fixCategory) {
val category = categoryNames.find { it.name == name }
?: error("fixCategory: missing category for $name")
val shortName = strShortName.toShortName("fixCategory")
?: error("fixCategory: can't parse $strShortName")
val emoji = nameMap[shortName]
?: error("fixCategory: missing emoji for $strShortName")
category.addEmoji(emoji, addingName = shortName.toString())
}
}
private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this)
private val vendorText = HashMap<CodepointList, ArrayList<String>>()
private val vendorUnicodeMap = HashMap<CodepointList, Pair<CodepointList, String>>()
private fun readVendorCode() {
var error = false
// まとまったxmlを読む
// 優先順位の都合でベンダ別に読み直す
val xml1 = File("emoji4unicode/data/emoji4unicode.xml")
.readAllBytes()
.decodeUtf8()
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
"""<e([^>]+)""".toRegex().findAll(xml1).forEach { mr1 ->
val attrs = HashMap<String, String>()
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
}
val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach
val strFrom = attrs[vendor] ?: return@forEach
if (strFrom.indexOf(">") != -1) return@forEach
val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach
val text = "${attrs["name"]}/${attrs["text_fallback"]}"
vendorText.prepare(from) { ArrayList() }.add(text)
val old = vendorUnicodeMap[from]
if (old != null) {
if (old.second == "kddi" && vendor == "softbank") return@forEach
error = true
log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)")
} else {
vendorUnicodeMap[from] = Pair(unicode, vendor)
}
}
}
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
// ベンダ個別ファイルから説明文を読む
val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml")
.readAllBytes()
.decodeUtf8()
"""<e([^>]+)""".toRegex().findAll(xml).forEach { mr1 ->
val attrs = HashMap<String, String>()
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
}
val code = attrs["unicode"]?.toCodepointList("emoji4unicode")
?: return@forEach
attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") }
}
}
if (error) error("readVendorCode failed.")
}
private var hasConflict = false
// コード=>画像の重複を調べる
private fun checkCodeConflict() {
val codeMap = HashMap<CodepointList, HashSet<Emoji>>()
for (emoji in emojiMap.values) {
for (code in emoji.codes) {
codeMap.prepare(code) { HashSet() }.add(emoji)
}
}
for ((code, emojis) in codeMap.entries.sortedBy { it.key }) {
if (emojis.size == 1) continue
val fixResName = fixCode[code]
if (fixResName != null) {
var found = false
for (emoji in emojis) {
if (emoji.resName == fixResName) {
found = true
} else {
emoji.codes.forEach {
if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}")
}
emoji.removeCodeByCode(code)
}
}
if (!found) error("checkCodeConflict: missing emoji resName=$fixResName")
continue
}
val onlyVendorCode = emojis.all { emoji ->
when (emoji.codes.find { it == code }?.from) {
"EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true
else -> false
}
}
if (onlyVendorCode) {
val preferCode = vendorUnicodeMap[code]?.first
if (preferCode != null) {
val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } }
if (targetEmoji != null) {
emojis.forEach { emoji ->
if (emoji != targetEmoji) {
emoji.codes.forEach {
if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}")
}
emoji.removeCodeByCode(code)
}
}
continue
}
log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode")
}
}
log.e("checkCodeConflict: code $code ${vendorText[code]} ${
emojis.joinToString(" ") {
"${it.resName}/${it.unified.toRawString()}"
}
}")
hasConflict = true
}
// コードのない絵文字のチェック
for (emoji in emojiMap.values) {
if (emoji.codes.isNotEmpty()) continue
val fixes = fixCode.entries.filter { it.value == emoji.resName }
when (fixes.size) {
0 -> {
log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
hasConflict = true
}
1 -> {
val fix = fixes.first()
val code = fix.key
emoji.addCode(code)
log.i("fixCode code=$code resName=${emoji.resName}")
}
else -> {
log.e("checkCodeConflict: multiple fix match for ${emoji.resName}")
hasConflict = true
}
}
}
}
private fun checkShortNameConflict() {
val nameMap = HashMap<ShortName, HashSet<Emoji>>().apply {
for (emoji in emojiMap.values) {
for (name in emoji.shortNames) {
prepare(name) { HashSet() }.add(emoji)
}
}
}
// cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない
for (emoji in emojiMap.values) {
if (emoji.shortNames.any { it.cameFrom != fromCategoryHtml }) {
emoji.removeShortNameByCameFrom(fromCategoryHtml)
}
}
for ((name, emojis) in nameMap.entries.sortedBy { it.key }) {
// shortNameからemojiを1意に解決できるなら正常
if (emojis.size == 1) continue
// fixNameで解決する
val fixResName = fixName[name]
if (fixResName != null) {
var found = false
for (emoji in emojis) {
if (emoji.resName == fixResName) {
found = true
} else {
emoji.removeShortName(name.name)
}
}
if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName")
continue
}
// emoji,cameFrom のペアのリスト
val froms = emojiMap.values
.flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } }
.filter { it.second == name }
.map { Pair(it.first, it.second.cameFrom) }
// どこ由来のShortNameかで優先順位をつける
val preferFrom = froms.find { it.second == "EmojiDataJson" }
?: froms.find { it.second == "EmojiSpec" }
?: froms.find { it.second == "EmojiOneJson" }
?: froms.find { it.second == fromCategoryHtml }
if (preferFrom != null) {
// 優先順位の低いemojiからshortNameを除去する
var found = false
for (emoji in emojis) {
if (emoji == preferFrom.first) {
found = true
} else {
emoji.removeShortName(name.name)
}
}
if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}")
continue
}
// 解決できなかった
log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",") { "${it.first.resName}${it.second}" }}")
hasConflict = true
}
// 名前のない絵文字のチェック
for (emoji in emojiMap.values) {
if (emoji.shortNames.isNotEmpty()) continue
val fix = fixName.entries.filter { it.value == emoji.resName }
if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}")
if (fix.size == 1) {
emoji.addShortName(fix.first().key)
continue
}
log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
hasConflict = true
}
}
private fun fixToneParent() {
var hasError = false
val nameMap = HashMap<String, Emoji>()
for (emoji in emojiMap.values) {
for (shortName in emoji.shortNames) {
nameMap[shortName.name] = emoji
}
}
val suffixList = skinToneModifiers.values
.flatMap { it.suffixList.toList() }
.sortedByDescending { it.length }
for (emoji in emojiMap.values) {
// トーンの絵文字の一部は内部に他のトーンの名前を含むので誤検出を回避する
if (emoji.resName in arrayOf("emj_1f3fc", "emj_1f3fe")) continue
fun String.removeToneSuffix(): String {
var name = this
when (name) {
"kiss_light_skin_tone" -> return "couplekiss"
"kiss_medium_light_skin_tone" -> return "couplekiss"
"kiss_medium_skin_tone" -> return "couplekiss"
"kiss_medium_dark_skin_tone" -> return "couplekiss"
"kiss_dark_skin_tone" -> return "couplekiss"
}
suffixList.forEach { name = name.replace(it, "") }
return when (name) {
"couple_with_heart_person_person" -> "couple_with_heart"
"kiss_person_person" -> "couplekiss"
"kiss_woman_woman" -> "woman_kiss_woman"
"kiss_woman_man" -> "woman_kiss_man"
"kiss_man_man" -> "man_kiss_man"
else -> name
}
}
for (shortName in emoji.shortNames) {
val parent = nameMap[shortName.name.removeToneSuffix()]
if (parent == emoji) continue
if (parent == null) {
log.e("${emoji.resName} $shortName looks like tone variation,but can't find parent.")
hasError = true
continue
}
emoji.addToneParent(parent)
}
}
for (emoji in emojiMap.values) {
val parents = emoji.toneParents
if (parents.isEmpty()) continue
if (parents.size > 1) {
log.e("${emoji.resName} has many parents. ${parents.joinToString(",")}")
hasError = true
continue
}
parents.forEach { parent ->
val toneCode = emoji.key.getToneCode("makeToneMap")
?: error("emoji $emoji has parent, but has no toneCode.")
when (val old = parent.toneChildren[toneCode]) {
null -> parent.toneChildren[toneCode] = emoji
emoji -> {
}
else -> error("conflict toneChildren. emoji ${parent.resName} has $old and $emoji.")
}
}
}
if (hasError) error("toneParent error.")
}
private fun writeData(){
val outFile = "emoji_map.txt"
UnixPrinter(File(outFile)).use { writer ->
// 絵文字をskipするか事前に調べる
for (emoji in emojiMap.values.sortedBy { it.key }) {
val codeSet = emoji.codeSet.sorted()
if (codeSet.isEmpty()) {
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
emoji.skip = true
} else if (emoji.unified.list.isAsciiEmoji()) {
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
emoji.skip = true
}
}
for (emoji in emojiMap.values.sortedBy { it.key }) {
if (emoji.skip) continue
// 画像リソースID
val strResName = emoji.resName
if (File("assets/$strResName.svg").isFile) {
writer.println("svg:$strResName.svg//${emoji.imageFiles.first().second}")
} else {
writer.println("drawable:$strResName//${emoji.imageFiles.first().second}")
}
// unified
writer.println("un:${emoji.unified.toRawString()}//${emoji.unified.from}")
// Unicodeシーケンス
val codeSet = emoji.codeSet.sorted()
for (code in codeSet) {
if (code == emoji.unified) continue
val raw = code.toRawString()
if (raw.isEmpty()) error("too short code! ${emoji.resName}")
writer.println("u:$raw//${code.from}")
}
// 画像リソースIDとshortcodeの関連付けを出力する
// 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する
val nameList = emoji.nameList.notEmpty()
?: error("missing shortName. ${emoji.resName}")
nameList.forEachIndexed { index, triple ->
val (_, name, froms) = triple
val header = if (index == 0) "sn" else "s"
writer.println("${header}:$name//${froms.joinToString(",")}")
}
}
fun Category.printCategory(list:List<Emoji>){
writer.println("cn:${this.name}")
for(emoji in list){
writer.println("c:${emoji.unified.toRawString()}")
emoji.usedInCategory = this
}
}
categoryNames.forEach { category ->
category.printCategory(category.emojis.filter { !it.skip })
}
run{
val category = categoryNames.find{ it.name == "Others"}!!
category.printCategory(
emojiMap.values
.filter { it.usedInCategory == null && it.toneParents.isEmpty() }
.sortedBy { it.shortNames.first() }
)
}
// スキントーン
emojiMap.values
.filter { it.toneChildren.isNotEmpty() }
.sortedBy { it.key }
.forEach { parent ->
if( parent.usedInCategory==null){
log.e("parent ${parent.resName} not used in any category!")
}
parent.toneChildren.entries
.toList()
.sortedBy { it.key }
.forEach eachChild@{
val child = it.value
if (child.skip) return@eachChild
writer.println("t:${parent.unified.toRawString()},${it.key.toRawString()},${child.unified.toRawString()}")
}
}
// 複合トーン
run{
val category = categoryNames.find { it.name == "ComplexTones" }!!
category.printCategory(
emojiMap.values
.filter { it.toneChildren.isNotEmpty() }
.sortedBy { it.key }
.flatMap { parent ->
if( parent.usedInCategory==null){
log.e("parent ${parent.resName} not used in any category!")
}
parent.toneChildren.entries
.toList()
.filter { it.key.list.size > 1 }
.sortedBy { it.key }
.map{ it.value}
}
)
}
}
log.d("wrote $outFile")
}
suspend fun run() {
// 修正用データを読む
readFixData()
// emojipediaからバージョン別一覧とカテゴリ別一覧を読む
val emojipediaData = HttpClient {
install(HttpTimeout) {
val t = 30000L
requestTimeoutMillis = t
connectTimeoutMillis = t
socketTimeoutMillis = t
}
}.use { client ->
readEmojipedia(client)
}
// 画像をスキャンする前に絵文字のqualified codeを調べておく
readEmojipediaQualified(emojipediaData)
// サブフォルダから絵文字の画像を収集する
scanEmojiImages()
// 収集した画像をコピーする
copyImages()
addEmojipediaShortnames()
readVendorCode()
readEmojiData()
readEmojiOne()
readCategoryShortName(emojipediaData)
checkCodeConflict()
checkShortNameConflict()
fixToneParent()
fixCategory()
if (hasConflict) error("please fix conflicts.")
// shortcodeに含まれる文字の種類を列挙する
val nameChars = HashSet<Char>()
val nameMap = HashMap<ShortName, Emoji>()
for (emoji in emojiMap.values) {
for (shortName in emoji.shortNames) {
nameMap[shortName] = emoji
for (c in shortName.name)
nameChars.add(c)
}
}
log.w("nameChars: [${nameChars.sorted().joinToString("")}]")
writeData()
log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}")
log.d("nameCameFroms: ${Emoji.nameCameFroms.joinToString(",")}")
}
}
fun main(args: Array<String>) = runBlocking {
log.d("args=${args.joinToString(",")}")
App().run()
}