860 lines
28 KiB
Kotlin
860 lines
28 KiB
Kotlin
package jp.juggler.subwaytooter.emoji
|
|
|
|
import io.ktor.client.*
|
|
import io.ktor.client.features.*
|
|
import io.ktor.http.*
|
|
import jp.juggler.subwaytooter.emoji.model.*
|
|
import kotlinx.coroutines.runBlocking
|
|
import org.apache.commons.text.StringEscapeUtils
|
|
import org.intellij.lang.annotations.Language
|
|
import java.io.*
|
|
|
|
|
|
//pngフォルダにある画像ファイルを参照する
|
|
//emoji-data/emoji.json を参照する
|
|
//
|
|
//以下のjavaコードを生成する
|
|
//- UTF-16文字列 => 画像リソースID のマップ。同一のIDに複数のUTF-16文字列が振られることがある。
|
|
//- shortcode => 画像リソースID のマップ。同一のIDに複数のshortcodeが振られることがある。
|
|
//- shortcode中の区切り文字はハイフンもアンダーバーもありうる。出力データではアンダーバーに寄せる
|
|
//- アプリはshortcodeの探索時にキー文字列の区切り文字をアンダーバーに正規化すること
|
|
|
|
const val pathCwebp = "C:/cygwin64/bin/cwebp.exe"
|
|
|
|
val emojiDataCodepointsVendors = arrayOf("docomo", "au", "softbank", "google")
|
|
|
|
fun copyFile(dst: File, src: File) {
|
|
try {
|
|
FileInputStream(src).use { streamIn ->
|
|
FileOutputStream(dst).use { streamOut ->
|
|
streamOut.write(streamIn.readAllBytes())
|
|
}
|
|
}
|
|
} catch (ex: Throwable) {
|
|
dst.delete()
|
|
throw IOException("copyFile failed. src=$src dst=$dst", ex)
|
|
}
|
|
}
|
|
|
|
//fun svgToVectorDrawable(dst: File, src: File) {
|
|
//
|
|
// val tmp = ByteArrayOutputStream()
|
|
//
|
|
// // Write all the error message during parsing into SvgTree. and return here as getErrorLog().
|
|
// // We will also log the exceptions here.
|
|
// try {
|
|
// val svgTree = Svg2Vector.parse(src)
|
|
// svgTree.mScaleFactor = 24 / max(svgTree.w, svgTree.h)
|
|
// if (svgTree.canConvertToVectorDrawable()) {
|
|
// Svg2Vector.writeFile(tmp, svgTree)
|
|
// }
|
|
// val errorLog = svgTree.errorLog
|
|
// if (errorLog.isNotEmpty()) println("$src $errorLog")
|
|
// FileOutputStream(dst).use { outStream ->
|
|
// outStream.write(tmp.toByteArray())
|
|
// }
|
|
// } catch (e: Exception) {
|
|
// println("svgToVectorDrawable: ${e.message} ${src.canonicalPath}")
|
|
// }
|
|
//}
|
|
|
|
|
|
class App {
|
|
companion object {
|
|
const val cameFromCategory = "CategoryHtml"
|
|
|
|
val reComment = """#.*""".toRegex()
|
|
|
|
val ignoreShortName = arrayOf(
|
|
"flag_for_texas_ustx"
|
|
)
|
|
|
|
private val ignoreImagePath = arrayOf(
|
|
"LICENSE",
|
|
// fe82b (フリーダイアル) はnoto-emoji では ?旗 になっていて使えない
|
|
"noto-emoji/png/128/emoji_ufe82b.png",
|
|
"noto-emoji/svg/emoji_ufe82b.svg",
|
|
// mastodonのフォルダにある余計なファイル
|
|
"mastodon/public/emoji/sheet_10.png"
|
|
)
|
|
|
|
}
|
|
|
|
|
|
// 修正用データを読む
|
|
private val fixCode = HashMap<CodepointList, String>()
|
|
private val fixName = HashMap<ShortName, String>()
|
|
private val fixCategory = ArrayList<Pair<String, String>>()
|
|
private val fixUnified = HashMap<CodepointList, CodepointList>()
|
|
|
|
private fun readFixData() {
|
|
val fixFile = "./fix_code.txt"
|
|
File(fixFile).forEachLine { lno, rawLine ->
|
|
val line = rawLine
|
|
.replace(reComment, "")
|
|
.trim()
|
|
|
|
val mr = """\A(\w+)\s*([\w._-]+)\s*(.*)""".toRegex().find(line)
|
|
if (mr != null) {
|
|
val type = mr.groupValues[1]
|
|
val arg1 = mr.groupValues[2]
|
|
|
|
if (type == "unified") {
|
|
val code = arg1.toCodepointList("fixUnified")!!
|
|
fixUnified[code.toKey("fixUnified")] = code
|
|
return@forEachLine
|
|
}
|
|
|
|
val data = """([\w+-]+)""".toRegex().findAll(mr.groupValues[3]).map { it.groupValues[1] }.toList()
|
|
if (data.size != 1) return@forEachLine
|
|
when (type) {
|
|
"code" -> fixCode[arg1.toCodepointList("fixCode")!!] = data.first()
|
|
"name" -> fixName[arg1.toShortName("fixName")!!] = data.first()
|
|
"category" -> Pair(arg1, data.first()).addTo(fixCategory)
|
|
else -> error("$fixFile $lno : bad fix_data type=$type")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
val emojipediaShortNames = HashMap<CodepointList,ShortName>()
|
|
|
|
// noto-emoji のファイル名はfeofが欠けているので、
|
|
// あらかじめemoji 13.1 の qualified name を取得しておく
|
|
@Suppress("FunctionName")
|
|
private suspend fun readQualified13_1(client: HttpClient) {
|
|
val cameFrom = "emojiQualified"
|
|
for( url in arrayOf(
|
|
"https://emojipedia.org/emoji-13.1/",
|
|
"https://emojipedia.org/emoji-13.0/",
|
|
)) {
|
|
val root = client.cachedGetString(url, mapOf()).parseHtml(url)
|
|
for( node in root.getElementsByClass("sidebar") ){
|
|
node.remove()
|
|
}
|
|
for( node in root.getElementsByClass("categories") ){
|
|
node.remove()
|
|
}
|
|
for( list in root.getElementsByTag("ul")){
|
|
for( li in list.getElementsByTag("li")) {
|
|
|
|
val span = li.getElementsByTag("span").find { it.hasClass("emoji") }
|
|
?: continue
|
|
val code = span.text().listCodePoints().toCodepointList(cameFrom)!!
|
|
val key = code.toKey(cameFrom)
|
|
|
|
fixUnified[ key] = code
|
|
|
|
val href = li.getElementsByTag("a")!!.attr("href")
|
|
.notEmpty()?:continue
|
|
val shortName = href
|
|
.replace("/", "")
|
|
.toShortName(cameFrom)
|
|
?: error("can't parse $href")
|
|
|
|
if( !ignoreShortName.any{ it == shortName.name}){
|
|
emojipediaShortNames[key] = shortName
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun addEmojipediaShortnames(){
|
|
for((key,shortName) in emojipediaShortNames){
|
|
emojiMap[key]?.addShortName(shortName)
|
|
}
|
|
}
|
|
|
|
// 画像ファイルをスキャンして絵文字コードとファイルの対応表を作る
|
|
// マップのキーはvariation selectorとZWJが除去される
|
|
private val emojiMap = HashMap<CodepointList, Emoji>()
|
|
|
|
|
|
private fun scanEmojiImages() {
|
|
fun HashMap<CodepointList, Emoji>.scanImageDir(
|
|
cameFrom: String,
|
|
dirPath: String,
|
|
@Language("RegExp") codeSpec: String,
|
|
unifiedQualifier: (CodepointList) -> CodepointList = { it }
|
|
) {
|
|
val dir = File(dirPath)
|
|
val reCodeSpec = codeSpec.toRegex()
|
|
var countFound = 0
|
|
var countCreate = 0
|
|
dir.listFiles()!!.forEach { imageFile ->
|
|
if (!imageFile.isFile) return@forEach
|
|
val unixPath = imageFile.path.replace("\\", "/")
|
|
if (ignoreImagePath.any { unixPath.indexOf(it) != -1 }) return@forEach
|
|
|
|
var name = imageFile.name
|
|
if (name == "LICENSE") return@forEach
|
|
name = name.replace("_border", "")
|
|
|
|
val code = reCodeSpec.find(name)
|
|
?.groupValues
|
|
?.elementAtOrNull(1)
|
|
?.toCodepointList(cameFrom)
|
|
?: error("can't parse $name")
|
|
|
|
// variation selector やZWJを除去したコードをキーにする
|
|
val key = code.toKey(cameFrom)
|
|
var emoji = get(key)
|
|
if (emoji == null) {
|
|
val unified2 = fixUnified[key]
|
|
emoji = Emoji(key, unified2 ?: unifiedQualifier(code))
|
|
put(key, emoji)
|
|
++countCreate
|
|
}
|
|
emoji.imageFiles.add(Pair(imageFile, cameFrom))
|
|
emoji.addCode(code)
|
|
++countFound
|
|
}
|
|
log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
|
|
}
|
|
|
|
emojiMap.scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("mastodonSVG", "mastodon/public/emoji", """([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("twemojiSvg", "twemoji/assets/svg/", """([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("notoSvg", "noto-emoji/svg", """emoji_u([0-9A-Fa-f_-]+)\.""") { code ->
|
|
if (code.list.last() != 0xfe0f)
|
|
"${code.toHex()}-fe0f".toCodepointList("notoSvgFix")!!
|
|
else
|
|
code
|
|
}
|
|
emojiMap.scanImageDir("notoPng", "noto-emoji/png/72", """emoji_u([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("emojiDataTw", "emoji-data/img-twitter-72", """([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("emojiDataGo", "emoji-data/img-google-136", """([0-9A-Fa-f_-]+)\.""")
|
|
emojiMap.scanImageDir("emojiOne", "emojione/assets/svg", """([0-9A-Fa-f_-]+)\.""")
|
|
|
|
}
|
|
|
|
// // resName => Resource
|
|
// private val resNameMap = HashMap<ResName, Resource>()
|
|
//
|
|
// // map: code => resName => Resource
|
|
// private val codeMap = HashMap<CodepointList, HashMap<ResName, Resource>>()
|
|
//
|
|
// // map shortname => resName => Resource
|
|
// private val nameMap = HashMap<ShortName, HashMap<ResName, Resource>>()
|
|
|
|
private fun copyImages() {
|
|
var countSvg = 0
|
|
var countPng = 0
|
|
for (emoji in emojiMap.values) {
|
|
val strResName = emoji.key.toResourceId()
|
|
emoji.resName = strResName
|
|
val (src, _) = emoji.imageFiles.first()
|
|
if (src.name.endsWith("svg")) {
|
|
++countSvg
|
|
val dst = File("assets/$strResName.svg")
|
|
if (!dst.exists()) {
|
|
//svgToVectorDrawable(dst, src)
|
|
copyFile(dst, src)
|
|
}
|
|
} else {
|
|
++countPng
|
|
val dst = File("drawable-nodpi/$strResName.webp")
|
|
if (!dst.exists()) {
|
|
val pb = ProcessBuilder(pathCwebp, src.path, "-quiet", "-o", dst.path)
|
|
val rv = pb.start().waitFor()
|
|
if (rv != 0) error("cwebp failed. dst=$dst src=$src")
|
|
}
|
|
}
|
|
}
|
|
log.d("copyImage: countSvg=$countSvg, countPng=$countPng")
|
|
}
|
|
|
|
// private fun updateCodeMap() {
|
|
// codeMap.clear()
|
|
// resNameMap.values.forEach { res_info ->
|
|
// res_info.codePoints.forEach { cp ->
|
|
// codeMap.prepare(cp) { HashMap() }[res_info.res_name] = res_info
|
|
// codeMap.prepare(cp.removeZWJ()) { HashMap() }[res_info.res_name] = res_info
|
|
// }
|
|
// }
|
|
// }
|
|
//
|
|
// private fun updateNameMap() {
|
|
// nameMap.clear()
|
|
// resNameMap.values.forEach { res_info ->
|
|
// res_info.shortNames.forEach { name ->
|
|
// nameMap.prepare(name) { HashMap() }[res_info.res_name] = res_info
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
private fun readEmojiData() {
|
|
File("./emoji-data/emoji.json")
|
|
.readAllBytes()
|
|
.decodeUtf8()
|
|
.decodeJsonArray()
|
|
.objectList()
|
|
.forEach { src ->
|
|
// 絵文字のコードポイント一覧
|
|
var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
|
|
var key = unified.toKey("EmojiDataJsonUnifiedKey")
|
|
var emoji = emojiMap[key] ?: error("can't find emoji for $key")
|
|
|
|
if (emoji.unified != unified) {
|
|
log.d("readEmojiData: unified not match. emoji=${emoji.unified}, emojiData=${unified}")
|
|
emoji.addCode(unified)
|
|
}
|
|
|
|
src.stringArrayList("variations")
|
|
?.mapNotNull { it.toCodepointList("EmojiDataJsonVariation") }
|
|
?.forEach { emoji.addCode(it) }
|
|
|
|
for (k in emojiDataCodepointsVendors) {
|
|
src.string(k)?.toCodepointList("EmojiDataJson($k)")
|
|
?.let { emoji.addCode(it) }
|
|
}
|
|
|
|
// short_name のリスト
|
|
val shortNames = HashSet<String>().also { dst ->
|
|
src.string("short_name")?.addTo(dst)
|
|
src.stringArrayList("short_names")?.forEach {
|
|
it.addTo(dst)
|
|
}
|
|
}.mapNotNull { it.toShortName("EmojiDataJson") }
|
|
|
|
if (shortNames.isEmpty())
|
|
error("emojiData ${src.string("unified")} has no shortName")
|
|
shortNames.forEach { emoji.addShortName(it) }
|
|
|
|
val parentEmoji = emoji
|
|
|
|
// スキントーン
|
|
src.jsonObject("skin_variations")?.let { skinVariations ->
|
|
val parentName = shortNames.first()
|
|
val skinToneUsed = HashSet<Int>()
|
|
for ((k, data) in skinVariations.entries) {
|
|
if (data !is JsonObject) continue
|
|
|
|
fun handleCode(list: IntArray, idx: Int, parentSuffix: String, suffixIndex: Int) {
|
|
val code = list.elementAtOrNull(idx) ?: return
|
|
val modifier = skinToneModifiers[code]
|
|
?: error("missing skinToneModifier u${list[idx].toString(16)} for $parentName")
|
|
skinToneUsed.add(code)
|
|
val lastSuffix = modifier.suffixList[suffixIndex]
|
|
val suffix =
|
|
if (!parentSuffix.contains(lastSuffix))
|
|
parentSuffix + lastSuffix
|
|
else
|
|
parentSuffix
|
|
if (idx <= list.size - 1) {
|
|
handleCode(list, idx + 1, suffix, suffixIndex)
|
|
} else {
|
|
unified = data.string("unified")!!.toCodepointList("EmojiData(skinTone)")!!
|
|
key = unified.toKey("EmojiData(skinTone)")
|
|
emoji = emojiMap[key] ?: error("can't find emoji for $key")
|
|
|
|
emoji.addCode(unified)
|
|
shortNames
|
|
.mapNotNull { (it.name + suffix).toShortName("EmojiData(skinTone)") }
|
|
.forEach { emoji.addShortName(it) }
|
|
|
|
emoji.toneParent = parentEmoji
|
|
emoji.isToneVariation = true
|
|
}
|
|
}
|
|
|
|
val codeList = k.toCodepointList("toneSpec")!!.list
|
|
for (suffixIndex in skinToneModifiers.values.first().suffixList.indices) {
|
|
handleCode(codeList, 0, "", suffixIndex)
|
|
}
|
|
}
|
|
if (skinToneUsed.size != skinToneModifiers.size) {
|
|
log.w("skin tone code not fully used: $parentName")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun readEmojione() {
|
|
val cameFrom = "EmojiOneJson"
|
|
val root = File("./old-emojione.json")
|
|
.readAllBytes()
|
|
.decodeUtf8()
|
|
.decodeJsonObject()
|
|
for ((strCode, item) in root.entries) {
|
|
if (item !is JsonObject) continue
|
|
|
|
// コードを確認する
|
|
val code = strCode.toCodepointList(cameFrom)
|
|
?: error("can't parse $strCode")
|
|
|
|
val key = code.toKey(cameFrom)
|
|
val emoji = emojiMap[key] ?: error("missing emoji for $key")
|
|
|
|
val names = ArrayList<String>()
|
|
item.string("alpha code")?.let { names.add(it) }
|
|
item.string("aliases")?.split("|")?.let { names.addAll(it) }
|
|
val shortNames = names.mapNotNull { it.toShortName(cameFrom) }
|
|
if (shortNames.isEmpty()) error("readEmojione: missing name for code $strCode")
|
|
shortNames.forEach { emoji.addShortName(it) }
|
|
}
|
|
}
|
|
|
|
private suspend fun readEmojiSpec(client: HttpClient) {
|
|
// 絵文字のショートネームを外部から拾ってくる
|
|
for (url in arrayOf(
|
|
"https://unicode.org/Public/emoji/13.1/emoji-sequences.txt",
|
|
"https://unicode.org/Public/emoji/13.1/emoji-zwj-sequences.txt"
|
|
)) {
|
|
client.cachedGetString(url, mapOf())
|
|
.split("""[\x0d\x0a]""".toRegex())
|
|
.forEach { rawLine ->
|
|
val line = rawLine.replace(reComment, "").trim()
|
|
if (line.isEmpty()) return@forEach
|
|
val cols = line.split(";", limit = 3).map { it.trim() }
|
|
if (cols.size != 3) return@forEach
|
|
|
|
val (strCode, _, descriptionSpec) = cols
|
|
if (strCode.indexOf("..") != -1) return@forEach
|
|
|
|
val code = strCode.toCodepointList("EmojiSpec")!!
|
|
|
|
val key = code.toKey("EmojiSpec")
|
|
val emoji = emojiMap[key] ?: error("can't find emoji for $key")
|
|
|
|
val strShortName = descriptionSpec.toLowerCase()
|
|
.replace("medium-light skin tone", "medium_light_skin_tone")
|
|
.replace("medium skin tone", "medium_skin_tone")
|
|
.replace("medium-dark skin tone", "medium_dark_skin_tone")
|
|
.replace("light skin tone", "light_skin_tone")
|
|
.replace("dark skin tone", "dark_skin_tone")
|
|
.replace("""[^\w\d]+""".toRegex(), "_")
|
|
|
|
val shortName = strShortName.toShortName("EmojiSpec")
|
|
?: error("can't parse $strShortName")
|
|
|
|
emoji.addShortName(shortName)
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// カテゴリ別
|
|
// 絵文字のショートネームを外部から拾ってくる
|
|
private suspend fun readCategoryShortName(client: HttpClient) {
|
|
categoryNames.values.forEach { category ->
|
|
if (category.url == null) return@forEach
|
|
val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
|
|
val list = root.getElementsByClass("emoji-list").first()
|
|
list.getElementsByTag("li").forEach liLoop@{ node ->
|
|
val shortName = node.getElementsByTag("a")!!.attr("href")
|
|
.replace("/", "")
|
|
.toShortName(cameFromCategory)
|
|
?: error("can't parse ${node.getElementsByTag("a")!!.attr("href")}")
|
|
|
|
if (ignoreShortName.any { shortName.name == it }) return@liLoop
|
|
|
|
val text = node.getElementsByClass("emoji").text()
|
|
val code = text.listCodePoints()
|
|
.takeIf { it.isNotEmpty() }?.toCodepointList("CategoryHtml")
|
|
?: error("can't parse code from $text")
|
|
|
|
val key = code.toKey("CategoryHtml")
|
|
val emoji = emojiMap[key]
|
|
?: error("can't find emoji for ${category.url} $shortName $key $text")
|
|
category.addEmoji(emoji, allowDuplicate = true, addingName = shortName.toString())
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun fixCategory() {
|
|
val nameMap = HashMap<ShortName, Emoji>().apply {
|
|
for (emoji in emojiMap.values)
|
|
for (shortName in emoji.shortNames)
|
|
this[shortName] = emoji
|
|
}
|
|
for ((enumId, strShortName) in fixCategory) {
|
|
val category = categoryNames.values.find { it.enumId == enumId }
|
|
?: error("fixCategory: missing category for $enumId")
|
|
val shortName = strShortName.toShortName("fixCategory")
|
|
?: error("fixCategory: can't parse $strShortName")
|
|
val emoji = nameMap[shortName]
|
|
?: error("fixCategory: missing emoji for $strShortName")
|
|
category.addEmoji(emoji, addingName = shortName.toString())
|
|
}
|
|
}
|
|
|
|
private fun String.unescapeXml() = StringEscapeUtils.unescapeXml(this)
|
|
|
|
private val vendorText = HashMap<CodepointList, ArrayList<String>>()
|
|
private val vendorUnicodeMap = HashMap<CodepointList, Pair<CodepointList, String>>()
|
|
|
|
private fun readVendorCode() {
|
|
var error = false
|
|
// まとまったxmlを読む
|
|
// 優先順位の都合でベンダ別に読み直す
|
|
val xml1 = File("emoji4unicode/data/emoji4unicode.xml")
|
|
.readAllBytes()
|
|
.decodeUtf8()
|
|
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
|
|
"""<e([^>]+)""".toRegex().findAll(xml1).forEach { mr1 ->
|
|
val attrs = HashMap<String, String>()
|
|
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
|
|
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
|
|
}
|
|
val unicode = attrs["unicode"]?.toCodepointList("emoji4unicode") ?: return@forEach
|
|
val strFrom = attrs[vendor] ?: return@forEach
|
|
if (strFrom.indexOf(">") != -1) return@forEach
|
|
val from = strFrom.toCodepointList("emoji4unicode") ?: return@forEach
|
|
val text = "${attrs["name"]}/${attrs["text_fallback"]}"
|
|
vendorText.prepare(from) { ArrayList() }.add(text)
|
|
val old = vendorUnicodeMap[from]
|
|
if (old != null) {
|
|
if (old.second == "kddi" && vendor == "softbank") return@forEach
|
|
error = true
|
|
log.e("vendorUnicodeMap conflict. code=$from old=$old new=$unicode($vendor)")
|
|
} else {
|
|
vendorUnicodeMap[from] = Pair(unicode, vendor)
|
|
}
|
|
}
|
|
}
|
|
|
|
for (vendor in arrayOf("docomo", "kddi", "softbank")) {
|
|
|
|
// ベンダ個別ファイルから説明文を読む
|
|
val xml = File("emoji4unicode/data/${vendor}/carrier_data.xml")
|
|
.readAllBytes()
|
|
.decodeUtf8()
|
|
|
|
"""<e([^>]+)""".toRegex().findAll(xml).forEach { mr1 ->
|
|
val attrs = HashMap<String, String>()
|
|
"""(\w+)="([^"]+)"""".toRegex().findAll(mr1.groupValues[1]).forEach { mr2 ->
|
|
attrs[mr2.groupValues[1].unescapeXml()] = mr2.groupValues[2].unescapeXml()
|
|
}
|
|
|
|
val code = attrs["unicode"]?.toCodepointList("emoji4unicode")
|
|
?: return@forEach
|
|
|
|
attrs["name_ja"]?.let { vendorText.prepare(code) { ArrayList() }.add("$it($vendor)") }
|
|
}
|
|
}
|
|
|
|
if (error) error("readVendorCode failed.")
|
|
}
|
|
|
|
private var hasConflict = false
|
|
|
|
// コード=>画像の重複を調べる
|
|
private fun checkCodeConflict() {
|
|
|
|
val codeMap = HashMap<CodepointList, HashSet<Emoji>>()
|
|
for (emoji in emojiMap.values) {
|
|
for (code in emoji.codes) {
|
|
codeMap.prepare(code) { HashSet() }.add(emoji)
|
|
}
|
|
}
|
|
|
|
for ((code, emojis) in codeMap.entries.sortedBy { it.key }) {
|
|
if (emojis.size == 1) continue
|
|
|
|
val fixResName = fixCode[code]
|
|
if (fixResName != null) {
|
|
var found = false
|
|
for (emoji in emojis) {
|
|
if (emoji.resName == fixResName) {
|
|
found = true
|
|
} else {
|
|
emoji.codes.forEach {
|
|
if (it == code) log.w("fixCode: delete(1) $it for ${emoji.resName}")
|
|
}
|
|
emoji.removeCodeByCode(code)
|
|
}
|
|
}
|
|
if (!found) error("checkCodeConflict: missing emoji resName=$fixResName")
|
|
continue
|
|
}
|
|
|
|
val onlyVendorCode = emojis.all { emoji ->
|
|
when (emoji.codes.find { it == code }?.from) {
|
|
"EmojiDataJson(au)", "EmojiDataJson(softbank)", "EmojiDataJson(docomo)" -> true
|
|
else -> false
|
|
}
|
|
}
|
|
|
|
if (onlyVendorCode) {
|
|
val preferCode = vendorUnicodeMap[code]?.first
|
|
if (preferCode != null) {
|
|
val targetEmoji = emojis.find { emoji -> emoji.codes.any { it == preferCode } }
|
|
if (targetEmoji != null) {
|
|
emojis.forEach { emoji ->
|
|
if (emoji != targetEmoji) {
|
|
emoji.codes.forEach {
|
|
if (it == code) log.w("fixCode: delete(2) $it for ${emoji.resName}")
|
|
}
|
|
emoji.removeCodeByCode(code)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
log.e("checkCodeConflict: can't use vendorUnicodeMap. code=$code, preferCode=$preferCode")
|
|
}
|
|
}
|
|
|
|
log.e("checkCodeConflict: code $code ${vendorText[code]} ${
|
|
emojis.joinToString(" ") {
|
|
"${it.resName}/${it.unified.toRawString()}"
|
|
}
|
|
}")
|
|
hasConflict = true
|
|
}
|
|
|
|
// コードのない絵文字のチェック
|
|
for (emoji in emojiMap.values) {
|
|
if (emoji.codes.isNotEmpty()) continue
|
|
val fixes = fixCode.entries.filter { it.value == emoji.resName }
|
|
when (fixes.size) {
|
|
0 -> {
|
|
log.e("checkCodeConflict: emoji has no code. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
|
|
hasConflict = true
|
|
}
|
|
1 -> {
|
|
val fix = fixes.first()
|
|
val code = fix.key
|
|
emoji.addCode(code)
|
|
log.i("fixCode code=$code resName=${emoji.resName}")
|
|
}
|
|
else -> {
|
|
log.e("checkCodeConflict: multiple fix match for ${emoji.resName}")
|
|
hasConflict = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun checkShortNameConflict() {
|
|
val nameMap = HashMap<ShortName, HashSet<Emoji>>().apply {
|
|
for (emoji in emojiMap.values) {
|
|
for (name in emoji.shortNames) {
|
|
prepare(name) { HashSet() }.add(emoji)
|
|
}
|
|
}
|
|
}
|
|
|
|
// cameFromCategory 以外のshortNameがあるなら、cameFromCategoryのshortNameは使わない
|
|
for (emoji in emojiMap.values) {
|
|
if (emoji.shortNames.any { it.cameFrom != cameFromCategory }) {
|
|
emoji.removeShortNameByCameFrom(cameFromCategory)
|
|
}
|
|
}
|
|
|
|
for ((name, emojis) in nameMap.entries.sortedBy { it.key }) {
|
|
// shortNameからemojiを1意に解決できるなら正常
|
|
if (emojis.size == 1) continue
|
|
|
|
// fixNameで解決する
|
|
val fixResName = fixName[name]
|
|
if (fixResName != null) {
|
|
var found = false
|
|
for (emoji in emojis) {
|
|
if (emoji.resName == fixResName) {
|
|
found = true
|
|
} else {
|
|
emoji.removeShortName(name.name)
|
|
}
|
|
}
|
|
if (!found) error("checkShortNameConflict: missing emoji resName=$fixResName")
|
|
continue
|
|
}
|
|
|
|
// emoji,cameFrom のペアのリスト
|
|
val froms = emojiMap.values
|
|
.flatMap { emoji -> emoji.shortNames.map { Pair(emoji, it) } }
|
|
.filter { it.second == name }
|
|
.map { Pair(it.first, it.second.cameFrom) }
|
|
|
|
// どこ由来のShortNameかで優先順位をつける
|
|
val preferFrom = froms.find { it.second == "EmojiDataJson" }
|
|
?: froms.find { it.second == "EmojiSpec" }
|
|
?: froms.find { it.second == "EmojiOneJson" }
|
|
?: froms.find { it.second == cameFromCategory }
|
|
if (preferFrom != null) {
|
|
// 優先順位の低いemojiからshortNameを除去する
|
|
var found = false
|
|
for (emoji in emojis) {
|
|
if (emoji == preferFrom.first) {
|
|
found = true
|
|
} else {
|
|
emoji.removeShortName(name.name)
|
|
}
|
|
}
|
|
if (!found) error("checkShortNameConflict: missing emoji ${preferFrom.first.key}")
|
|
continue
|
|
}
|
|
|
|
// 解決できなかった
|
|
log.e("checkShortNameConflict: name $name froms=${froms.joinToString(",")}")
|
|
hasConflict = true
|
|
}
|
|
|
|
// 名前のない絵文字のチェック
|
|
for (emoji in emojiMap.values) {
|
|
if (emoji.shortNames.isNotEmpty()) continue
|
|
val fix = fixName.entries.filter { it.value == emoji.resName }
|
|
if (fix.size > 1) error("checkShortNameConflict: multiple fix match for ${emoji.resName}")
|
|
if (fix.size == 1) {
|
|
emoji.addShortName(fix.first().key)
|
|
continue
|
|
}
|
|
log.e("checkShortNameConflict: emoji has no shortName. resName=${emoji.resName},cameFrom=${emoji.imageFiles.first().second}")
|
|
hasConflict = true
|
|
}
|
|
}
|
|
|
|
suspend fun run() {
|
|
HttpClient {
|
|
install(HttpTimeout) {
|
|
val t = 30000L
|
|
requestTimeoutMillis = t
|
|
connectTimeoutMillis = t
|
|
socketTimeoutMillis = t
|
|
}
|
|
}.use { client ->
|
|
|
|
readFixData()
|
|
|
|
readQualified13_1(client)
|
|
|
|
scanEmojiImages()
|
|
copyImages()
|
|
|
|
readVendorCode()
|
|
readEmojiData()
|
|
readEmojione()
|
|
readEmojiSpec(client)
|
|
readCategoryShortName(client)
|
|
|
|
checkCodeConflict()
|
|
|
|
|
|
checkShortNameConflict()
|
|
fixCategory()
|
|
|
|
if (hasConflict) error("please fix conflicts.")
|
|
|
|
// shortcodeに含まれる文字の種類を列挙する
|
|
val nameChars = HashSet<Char>()
|
|
val nameMap = HashMap<ShortName, Emoji>()
|
|
for (emoji in emojiMap.values) {
|
|
for (shortName in emoji.shortNames) {
|
|
nameMap[shortName] = emoji
|
|
for (c in shortName.name)
|
|
nameChars.add(c)
|
|
}
|
|
}
|
|
log.w("nameChars: [${nameChars.sorted().joinToString("")}]")
|
|
|
|
// JSONコードを出力する
|
|
val outFile = "EmojiMapInitializer.java"
|
|
JavaCodeWriter(File(outFile)).use { jcw ->
|
|
jcw.println(
|
|
"""
|
|
package jp.juggler.emoji;
|
|
|
|
public final class EmojiMapInitializer {
|
|
""".trimIndent()
|
|
)
|
|
|
|
for (emoji in emojiMap.values.sortedBy { it.key }) {
|
|
val codeSet = emoji.codeSet
|
|
// asciiコードだけの絵文字は処理しない
|
|
if (codeSet.isEmpty()) {
|
|
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
|
|
emoji.skip = true
|
|
|
|
} else if (emoji.unified.list.isAsciiEmoji()) {
|
|
log.w("skip emoji ${emoji.unified} ${emoji.resName} that has no valid codes")
|
|
emoji.skip = true
|
|
}
|
|
if (emoji.skip) continue
|
|
|
|
// 画像リソースIDとUnicodeシーケンスの関連付けを出力する
|
|
val strResName = emoji.resName
|
|
codeSet.forEach { code ->
|
|
val javaChars = code.makeUtf16()
|
|
|
|
if(javaChars.isEmpty()) error("too short code! ${emoji.resName}")
|
|
|
|
if (File("assets/$strResName.svg").isFile) {
|
|
jcw.addCode("e.code(\"$javaChars\", \"$strResName.svg\"); // ${code.from} ${emoji.imageFiles.first().second}")
|
|
} else {
|
|
jcw.addCode("e.code(\"$javaChars\", R.drawable.$strResName); // ${code.from} ${emoji.imageFiles.first().second}")
|
|
}
|
|
}
|
|
}
|
|
|
|
for (emoji in emojiMap.values.sortedBy { it.key }) {
|
|
if (emoji.skip) continue
|
|
|
|
// shortcodeから変換するunicode表現
|
|
val unified = emoji.unified
|
|
|
|
// 画像リソースIDとshortcodeの関連付けを出力する
|
|
// 投稿時にshortcodeをユニコードに変換するため、shortcodeとUTF-16シーケンスの関連付けを出力する
|
|
for (name in emoji.shortNames.map { it.name }.toSet().sorted()) {
|
|
val froms = emoji.shortNames.filter { it.name == name }.map { it.cameFrom }.sorted()
|
|
val javaChars = unified.makeUtf16()
|
|
jcw.addCode("e.name(\"${name}\", \"$javaChars\"); // ${froms.joinToString(",")}")
|
|
}
|
|
}
|
|
|
|
categoryNames.values.forEach { category ->
|
|
category.eachEmoji { emoji ->
|
|
if (emoji.skip) return@eachEmoji
|
|
val shortName = emoji.shortNames.first()
|
|
jcw.addCode("e.category( EmojiMap.${category.enumId}, \"${shortName.name}\"); // ${shortName.cameFrom}")
|
|
}
|
|
}
|
|
|
|
emojiMap.values
|
|
.filter { it.usedInCategory == null && !it.isToneVariation }
|
|
.sortedBy { it.shortNames.first() }
|
|
.forEach { emoji ->
|
|
if (emoji.skip) return@forEach
|
|
val enumId = "CATEGORY_OTHER"
|
|
val shortName = emoji.shortNames.first()
|
|
jcw.addCode("e.category( EmojiMap.$enumId, \"${shortName.name}\"); // ${shortName.cameFrom}")
|
|
}
|
|
|
|
jcw.closeFunction()
|
|
|
|
jcw.println("\tstatic void initAll(EmojiMap e){")
|
|
jcw.println("\t\te.utf16MaxLength=$utf16_max_length;")
|
|
for (i in 1..jcw.functionsCount) {
|
|
jcw.println("\t\tinit$i(e);")
|
|
}
|
|
jcw.println("\t}")
|
|
|
|
jcw.println("}")
|
|
}
|
|
|
|
log.d("wrote $outFile")
|
|
|
|
log.d("codeCameFroms: ${Emoji.codeCameFroms.joinToString(",")}")
|
|
|
|
// shortname => unicode
|
|
// JsonArray()
|
|
// .also { dst ->
|
|
// for ((shortName, rh) in nameMap.entries.sortedBy { it.key }) {
|
|
// val resInfo = rh.values.first()
|
|
// dst.add(jsonObject("shortcode" to shortName.name, "unicode" to resInfo.unified))
|
|
// }
|
|
// }
|
|
// .toString(2)
|
|
// .encodeUtf8()
|
|
// .saveTo(File("shortcode-emoji-data-and-old-emojione2.json"))
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
fun main(args: Array<String>) = runBlocking {
|
|
log.d("args=${args.joinToString(",")}")
|
|
App().run()
|
|
}
|