emojiConverterの依存関係を更新

This commit is contained in:
tateisu 2022-07-14 13:05:23 +09:00
parent 6ac50da328
commit f7564dcecf
9 changed files with 1207 additions and 1213 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="14" />
<bytecodeTargetLevel target="1.8" />
</component>
</project>

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_14" default="true" project-jdk-name="14" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="14" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -1,6 +1,6 @@
plugins {
id 'java'
id 'org.jetbrains.kotlin.jvm' version '1.5.10'
id 'org.jetbrains.kotlin.jvm' version '1.7.10'
}
group 'jp.juggler'
@ -12,22 +12,22 @@ repositories {
dependencies {
implementation fileTree(include: ['*.jar'], dir: 'src/lib')
implementation "com.google.guava:guava:28.1-jre"
implementation "com.google.guava:guava:31.1-jre"
implementation "org.jetbrains.kotlin:kotlin-stdlib"
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0'
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
def ktorVersion="1.5.0"
def ktorVersion="2.0.3"
implementation "io.ktor:ktor-client-core:$ktorVersion"
implementation "io.ktor:ktor-client-cio:$ktorVersion"
implementation "io.ktor:ktor-client-features:$ktorVersion"
// implementation "io.ktor:ktor-client-features:$ktorVersion"
implementation "io.ktor:ktor-client-encoding:$ktorVersion"
// StringEscapeUtils.unescapeHtml4
implementation "org.apache.commons:commons-text:1.9"
// HTML5パーサ
implementation "org.jsoup:jsoup:1.13.1"
implementation "org.jsoup:jsoup:1.14.3"
}
test {

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@ -860,7 +860,7 @@ private fun Writer.writeQuote(string: String): Writer {
in '\u0080' until '\u00a0',
in '\u2000' until '\u2100' -> {
write("\\u")
val hexCode: String = Integer.toHexString(c.toInt())
val hexCode: String = Integer.toHexString(c.code)
write("0000", 0, 4 - hexCode.length)
write(hexCode)
}
@ -1047,7 +1047,7 @@ fun Writer.writeJsonValue(
}
}
value is Char -> writeJsonValue(indentFactor, indent, value.toInt())
value is Char -> writeJsonValue(indentFactor, indent, value.code)
value is String -> writeQuote(value)
value is Enum<*> -> writeQuote(value.name)

View File

@ -1,7 +1,7 @@
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.features.*
import io.ktor.client.plugins.HttpTimeout
import jp.juggler.subwaytooter.emoji.model.*
import kotlinx.coroutines.runBlocking
import org.apache.commons.text.StringEscapeUtils
@ -11,7 +11,6 @@ import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.IOException
//pngフォルダにある画像ファイルを参照する
//emoji-data/emoji.json を参照する
//
@ -29,7 +28,7 @@ fun copyFile(dst: File, src: File) {
try {
FileInputStream(src).use { streamIn ->
FileOutputStream(dst).use { streamOut ->
streamOut.write(streamIn.readAllBytes())
streamOut.write(streamIn.readBytes())
}
}
} catch (ex: Throwable) {
@ -62,7 +61,7 @@ fun copyFile(dst: File, src: File) {
class App {
companion object{
companion object {
const val fromCategoryHtml = "CategoryHtml"
@ -145,15 +144,16 @@ class App {
// Emojipediaのバージョン別一覧とカテゴリ別一覧を読んでJSONに保存しておく
// サイトにアクセスできなくなったら困るからな…
@Suppress("FunctionName")
private suspend fun readEmojipedia(client: HttpClient) :JsonObject {
private suspend fun readEmojipedia(client: HttpClient): JsonObject {
val fileEmojipedia = File("Emojipedia.json")
if( fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject()
if (fileEmojipedia.isFile) return fileEmojipedia.readAllBytes().decodeUtf8().decodeJsonObject()
val dstRoot = JsonObject()
val dstQualified = JsonArray().also{ dstRoot["qualifiedCode"] = it }
val dstQualified = JsonArray().also { dstRoot["qualifiedCode"] = it }
for (url in arrayOf(
"https://emojipedia.org/emoji-14.0/",
"https://emojipedia.org/emoji-13.1/",
"https://emojipedia.org/emoji-13.0/",
"https://emojipedia.org/emoji-12.1/",
@ -166,6 +166,7 @@ class App {
"https://emojipedia.org/emoji-1.0/",
)) {
val root = client.cachedGetString(url, mapOf()).parseHtml(url)
?: error("parseHtml returns null!")
root.getElementsByClass("sidebar").forEach { it.remove() }
root.getElementsByClass("categories").forEach { it.remove() }
@ -173,7 +174,7 @@ class App {
for (list in root.getElementsByTag("ul")) {
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a")?.attr("href")
val href = li.getElementsByTag("a").attr("href")
.notEmpty() ?: continue
val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
@ -184,14 +185,16 @@ class App {
}
}
val dstCategory = JsonObject().also{ dstRoot["categories"]=it}
val dstCategory = JsonObject().also { dstRoot["categories"] = it }
categoryNames.forEach { category ->
if (category.url == null) return@forEach
val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it }
val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
?: error("parseHtml returns null!")
val list = root.getElementsByClass("emoji-list").first()
?: error("getElementsByClass(emoji-list) failed.")
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a").attr("href")
.notEmpty() ?: continue
@ -211,7 +214,7 @@ class App {
// noto-emoji のファイル名はfe0fが欠けている
// あらかじめEmojipediaのデータを参照してqualified name の一覧を作っておく
private fun readEmojipediaQualified(root:JsonObject) {
private fun readEmojipediaQualified(root: JsonObject) {
val ignoreName2 = setOf(
"zero_width_joiner",
@ -224,7 +227,7 @@ class App {
var countError = 0
for( cols in root.jsonArray("qualifiedCode")!!.filterIsInstance<JsonArray>()) {
for (cols in root.jsonArray("qualifiedCode")!!.filterIsInstance<JsonArray>()) {
val spanText = cols[0] as String
var href = cols[1] as String
@ -272,7 +275,7 @@ class App {
// hrefList.sortedBy{ it.first }.forEach { log.d("href=${it.first} ${it.second}") }
if(countError>0) error("please fix unified codes. countError=$countError")
if (countError > 0) error("please fix unified codes. countError=$countError")
}
private fun addEmojipediaShortnames() {
@ -282,10 +285,10 @@ class App {
}
// Emojipediaのデータを使ってカテゴリ別に絵文字一覧を用意する
private fun readCategoryShortName(root:JsonObject) {
for(category in categoryNames){
private fun readCategoryShortName(root: JsonObject) {
for (category in categoryNames) {
val list = root.jsonObject("categories")?.jsonArray(category.name) ?: continue
for( cols in list.filterIsInstance<JsonArray>()){
for (cols in list.filterIsInstance<JsonArray>()) {
val spanText = cols[0] as String
val href = cols[1] as String
@ -318,8 +321,8 @@ class App {
var countFound = 0
var countCreate = 0
var countError = 0
val files = dir.listFiles() ?:error("listFiles returns null. $dir")
for( imageFile in files){
val files = dir.listFiles() ?: error("listFiles returns null. $dir")
for (imageFile in files) {
if (!imageFile.isFile) continue
val unixPath = imageFile.path.replace("\\", "/")
if (ignoreImagePath.any { unixPath.endsWith(it) }) continue
@ -339,7 +342,7 @@ class App {
var emoji = emojiMap[key]
if (emoji == null) {
val unified2 = fixUnified[key] ?: unifiedQualifier(code)
if( unified2.list.size==1 && unified2.list.first()<256){
if (unified2.list.size == 1 && unified2.list.first() < 256) {
++countError
log.e("bad unified code: $unified2 $unixPath")
}
@ -354,10 +357,11 @@ class App {
}
log.d("scanImageDir: found=$countFound,create=$countCreate, dir=$dir")
if(countError>0) error("please fix unified codes. countError=$countError")
if (countError > 0) error("please fix unified codes. countError=$countError")
}
// サブフォルダをスキャンして絵文字別に画像データを確定する
@Suppress("RegExpSimplifiable")
private fun scanEmojiImages() {
scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
@ -405,12 +409,12 @@ class App {
// emojiDataのjsonを読んで変換コードポイントやショートネームを追加する
private fun readEmojiData() {
for( src in File("./emoji-data/emoji.json")
for (src in File("./emoji-data/emoji.json")
.readAllBytes()
.decodeUtf8()
.decodeJsonArray()
.objectList()
){
) {
// 絵文字のコードポイント一覧
var unified = src.string("unified")?.toCodepointList("EmojiDataJsonUnified")!!
var key = unified.toKey("EmojiDataJsonUnifiedKey")
@ -448,7 +452,7 @@ class App {
src.jsonObject("skin_variations")?.let { skinVariations ->
val parentName = shortNames.first()
val skinToneUsed = HashSet<Int>()
for ((k, data) in skinVariations.entries ) {
for ((k, data) in skinVariations.entries) {
if (data !is JsonObject) continue
// 再帰呼び出しあり
@ -492,7 +496,6 @@ class App {
}
private fun readEmojiOne() {
val cameFrom = "EmojiOneJson"
val root = File("./old-emojione.json")
@ -520,7 +523,6 @@ class App {
}
private fun fixCategory() {
val nameMap = HashMap<ShortName, Emoji>().apply {
for (emoji in emojiMap.values)
@ -840,7 +842,7 @@ class App {
if (hasError) error("toneParent error.")
}
private fun writeData(){
private fun writeData() {
val outFile = "emoji_map.txt"
UnixPrinter(File(outFile)).use { writer ->
@ -891,9 +893,9 @@ class App {
}
}
fun Category.printCategory(list:List<Emoji>){
fun Category.printCategory(list: List<Emoji>) {
writer.println("cn:${this.name}")
for(emoji in list){
for (emoji in list) {
writer.println("c:${emoji.unified.toRawString()}")
emoji.usedInCategory = this
}
@ -903,8 +905,8 @@ class App {
category.printCategory(category.emojis.filter { !it.skip })
}
run{
val category = categoryNames.find{ it.name == "Others"}!!
run {
val category = categoryNames.find { it.name == "Others" }!!
category.printCategory(
emojiMap.values
.filter { it.usedInCategory == null && it.toneParents.isEmpty() }
@ -917,7 +919,7 @@ class App {
.filter { it.toneChildren.isNotEmpty() }
.sortedBy { it.key }
.forEach { parent ->
if( parent.usedInCategory==null){
if (parent.usedInCategory == null) {
log.e("parent ${parent.resName} not used in any category!")
}
parent.toneChildren.entries
@ -931,21 +933,21 @@ class App {
}
// 複合トーン
run{
run {
val category = categoryNames.find { it.name == "ComplexTones" }!!
category.printCategory(
emojiMap.values
.filter { it.toneChildren.isNotEmpty() }
.sortedBy { it.key }
.flatMap { parent ->
if( parent.usedInCategory==null){
if (parent.usedInCategory == null) {
log.e("parent ${parent.resName} not used in any category!")
}
parent.toneChildren.entries
.toList()
.filter { it.key.list.size > 1 }
.sortedBy { it.key }
.map{ it.value}
.map { it.value }
}
)
}

View File

@ -1,12 +1,18 @@
@file:Suppress("unused")
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import io.ktor.client.HttpClient
import io.ktor.client.request.get
import io.ktor.client.request.header
import io.ktor.client.statement.readBytes
import io.ktor.http.HttpStatusCode
import org.jsoup.Jsoup
import java.io.*
import java.io.BufferedReader
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.InputStreamReader
import java.nio.charset.Charset
import java.security.MessageDigest
import java.util.*
@ -26,39 +32,27 @@ fun CharSequence.eachCodePoint(block: (Int) -> Unit) {
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
if (Character.isHighSurrogate(c1) && i < end) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
++i
block(Character.toCodePoint(c1, c2))
continue
}
}
block(c1.toInt())
block(c1.code)
}
}
// split CharSequence to Unicode codepoints
fun CharSequence.listCodePoints() = ArrayList<Int>().also{ dst->
val end = length
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
dst.add(Character.toCodePoint(c1, c2))
continue
}
}
dst.add(c1.toInt())
}
fun CharSequence.listCodePoints() = ArrayList<Int>().also { dst ->
eachCodePoint { dst.add(it) }
}.toIntArray()
// split codepoint to UTF-8 bytes
fun codePointToUtf8(cp: Int, block: (Int) -> Unit) {
// incorrect codepoint
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.toInt(), block)
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.code, block)
if (cp >= 128) {
if (cp >= 2048) {
@ -82,12 +76,12 @@ private const val hexString = "0123456789ABCDEF"
private val encodePercentSkipChars by lazy {
HashSet<Int>().apply {
('0'..'9').forEach { add(it.toInt()) }
('A'..'Z').forEach { add(it.toInt()) }
('a'..'z').forEach { add(it.toInt()) }
add('-'.toInt())
add('_'.toInt())
add('.'.toInt())
('0'..'9').forEach { add(it.code) }
('A'..'Z').forEach { add(it.code) }
('a'..'z').forEach { add(it.code) }
add('-'.code)
add('_'.code)
add('.'.code)
}
}
@ -129,7 +123,7 @@ fun ByteArray.encodeBase64UrlSafe(): String {
val bytes = Base64.getUrlEncoder().encode(this)
return StringBuilder(bytes.size).apply {
for (b in bytes) {
val c = b.toChar()
val c = b.toInt().toChar()
if (c != '=') append(c)
}
}.toString()
@ -139,7 +133,7 @@ fun ByteArray.decodeUtf8() = toString(Charsets.UTF_8)
fun String.encodeUtf8() = toByteArray(Charsets.UTF_8)
inline fun <reified T> Any?.castOrThrow(name:String,block: T.() -> Unit){
inline fun <reified T> Any?.castOrThrow(name: String, block: T.() -> Unit) {
if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}")
block()
}
@ -178,8 +172,8 @@ fun File.save(data: ByteArray) {
fun ByteArray.saveTo(file: File) = file.save(this)
fun File.forEachLine(charset: Charset = Charsets.UTF_8, block:(Int, String)->Unit)=
BufferedReader(InputStreamReader(FileInputStream(this),charset)).use { reader ->
fun File.forEachLine(charset: Charset = Charsets.UTF_8, block: (Int, String) -> Unit) =
BufferedReader(InputStreamReader(FileInputStream(this), charset)).use { reader ->
var lno = 0
reader.forEachLine {
block(++lno, it)
@ -187,26 +181,26 @@ fun File.forEachLine(charset: Charset = Charsets.UTF_8, block:(Int, String)->Uni
lno
}
inline fun <K,V> HashMap<K,V>.prepare(key:K,creator:()->V):V{
inline fun <K, V> HashMap<K, V>.prepare(key: K, creator: () -> V): V {
var value = get(key)
if( value == null) {
if (value == null) {
value = creator()
put(key,value)
put(key, value)
}
return value!!
}
private val reFileNameBadChars = """[\\/:*?"<>|-]+""".toRegex()
private val cacheDir by lazy{ File("./cache").apply { mkdirs() }}
private val cacheDir by lazy { File("./cache").apply { mkdirs() } }
fun clearCache(){
cacheDir.list()?.forEach { name->
File(cacheDir,name).takeIf { it.isFile }?.delete()
fun clearCache() {
cacheDir.list()?.forEach { name ->
File(cacheDir, name).takeIf { it.isFile }?.delete()
}
}
private val cacheExpire by lazy{ 8 * 3600000L }
private val cacheExpire by lazy { 8 * 3600000L }
suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>): ByteArray {
val fName = reFileNameBadChars.replace(url, "-")
@ -217,14 +211,14 @@ suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>)
}
println("GET $url")
get<HttpResponse>(url) {
get(url) {
headers.entries.forEach {
header(it.key, it.value)
}
}.let { res ->
return when (res.status) {
HttpStatusCode.OK ->
res.receive<ByteArray>().also { it.saveTo(cacheFile) }
res.readBytes().also { it.saveTo(cacheFile) }
else -> {
cacheFile.delete()
error("get failed. $url ${res.status}")
@ -234,8 +228,7 @@ suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>)
}
suspend fun HttpClient.cachedGetString(url: String, headers: Map<String, String>): String =
cachedGetBytes(url,headers).decodeUtf8()
cachedGetBytes(url, headers).decodeUtf8()
fun String.parseHtml(baseUri: String) =
fun String.parseHtml(baseUri: String): org.jsoup.nodes.Document? =
Jsoup.parse(this, baseUri)

View File

@ -2,7 +2,6 @@ package jp.juggler.subwaytooter.emoji.model
import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
import java.lang.StringBuilder
/*
絵文字はコードポイントのリストで表現される
@ -71,7 +70,7 @@ class CodepointList(
fun toHex() = StringBuilder(list.size * 5).also {
list.forEachIndexed { i, v ->
if (i > 0) it.append('-')
it.append(String.format("%x", v).toLowerCase())
it.append("%x".format(v).lowercase())
}
}.toString()
@ -116,7 +115,7 @@ class CodepointList(
.toIntArray().toCodepointList(from)
fun getToneCode(from: String) :CodepointList? {
fun getToneCode(from: String): CodepointList? {
val used = HashSet<Int>()
return list
.filter { skinToneModifiers.containsKey(it) }

View File

@ -4,7 +4,7 @@ import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
class ShortName(val cameFrom:String,val name:String) :Comparable<ShortName>{
class ShortName(val cameFrom: String, val name: String) : Comparable<ShortName> {
override fun equals(other: Any?): Boolean =
name == other.cast<ShortName>()?.name
@ -23,10 +23,10 @@ private val reColonTail = """:\z""".toRegex()
private val reNotCode = """[^\w\d+_]+""".toRegex()
private val reUnderTail = """_+\z""".toRegex()
fun String.toShortName(cameFrom:String) =
toLowerCase()
fun String.toShortName(cameFrom: String) =
lowercase()
.replace(reColonHead, "")
.replace(reColonTail, "")
.replace(reNotCode, "_")
.replace(reUnderTail,"")
.notEmpty()?.let{ ShortName(cameFrom=cameFrom,it) }
.replace(reUnderTail, "")
.notEmpty()?.let { ShortName(cameFrom = cameFrom, it) }