emojiConverterの依存関係を更新

This commit is contained in:
tateisu 2022-07-14 13:05:23 +09:00
parent 6ac50da328
commit f7564dcecf
9 changed files with 1207 additions and 1213 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="14" />
<bytecodeTargetLevel target="1.8" />
</component>
</project>

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_14" default="true" project-jdk-name="14" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="14" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -1,6 +1,6 @@
plugins {
id 'java'
id 'org.jetbrains.kotlin.jvm' version '1.5.10'
id 'org.jetbrains.kotlin.jvm' version '1.7.10'
}
group 'jp.juggler'
@ -12,22 +12,22 @@ repositories {
dependencies {
implementation fileTree(include: ['*.jar'], dir: 'src/lib')
implementation "com.google.guava:guava:28.1-jre"
implementation "com.google.guava:guava:31.1-jre"
implementation "org.jetbrains.kotlin:kotlin-stdlib"
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0'
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
def ktorVersion="1.5.0"
def ktorVersion="2.0.3"
implementation "io.ktor:ktor-client-core:$ktorVersion"
implementation "io.ktor:ktor-client-cio:$ktorVersion"
implementation "io.ktor:ktor-client-features:$ktorVersion"
// implementation "io.ktor:ktor-client-features:$ktorVersion"
implementation "io.ktor:ktor-client-encoding:$ktorVersion"
// StringEscapeUtils.unescapeHtml4
implementation "org.apache.commons:commons-text:1.9"
// HTML5パーサ
implementation "org.jsoup:jsoup:1.13.1"
implementation "org.jsoup:jsoup:1.14.3"
}
test {

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@ -860,7 +860,7 @@ private fun Writer.writeQuote(string: String): Writer {
in '\u0080' until '\u00a0',
in '\u2000' until '\u2100' -> {
write("\\u")
val hexCode: String = Integer.toHexString(c.toInt())
val hexCode: String = Integer.toHexString(c.code)
write("0000", 0, 4 - hexCode.length)
write(hexCode)
}
@ -1047,7 +1047,7 @@ fun Writer.writeJsonValue(
}
}
value is Char -> writeJsonValue(indentFactor, indent, value.toInt())
value is Char -> writeJsonValue(indentFactor, indent, value.code)
value is String -> writeQuote(value)
value is Enum<*> -> writeQuote(value.name)

View File

@ -1,7 +1,7 @@
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.features.*
import io.ktor.client.plugins.HttpTimeout
import jp.juggler.subwaytooter.emoji.model.*
import kotlinx.coroutines.runBlocking
import org.apache.commons.text.StringEscapeUtils
@ -11,7 +11,6 @@ import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.IOException
//pngフォルダにある画像ファイルを参照する
//emoji-data/emoji.json を参照する
//
@ -29,7 +28,7 @@ fun copyFile(dst: File, src: File) {
try {
FileInputStream(src).use { streamIn ->
FileOutputStream(dst).use { streamOut ->
streamOut.write(streamIn.readAllBytes())
streamOut.write(streamIn.readBytes())
}
}
} catch (ex: Throwable) {
@ -154,6 +153,7 @@ class App {
val dstQualified = JsonArray().also { dstRoot["qualifiedCode"] = it }
for (url in arrayOf(
"https://emojipedia.org/emoji-14.0/",
"https://emojipedia.org/emoji-13.1/",
"https://emojipedia.org/emoji-13.0/",
"https://emojipedia.org/emoji-12.1/",
@ -166,6 +166,7 @@ class App {
"https://emojipedia.org/emoji-1.0/",
)) {
val root = client.cachedGetString(url, mapOf()).parseHtml(url)
?: error("parseHtml returns null!")
root.getElementsByClass("sidebar").forEach { it.remove() }
root.getElementsByClass("categories").forEach { it.remove() }
@ -173,7 +174,7 @@ class App {
for (list in root.getElementsByTag("ul")) {
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a")?.attr("href")
val href = li.getElementsByTag("a").attr("href")
.notEmpty() ?: continue
val spanText = li.getElementsByTag("span").find { it.hasClass("emoji") }?.text()
@ -191,7 +192,9 @@ class App {
val dstCategoryItems = JsonArray().also { dstCategory[category.name] = it }
val root = client.cachedGetString(category.url, mapOf()).parseHtml(category.url)
?: error("parseHtml returns null!")
val list = root.getElementsByClass("emoji-list").first()
?: error("getElementsByClass(emoji-list) failed.")
for (li in list.getElementsByTag("li")) {
val href = li.getElementsByTag("a").attr("href")
.notEmpty() ?: continue
@ -358,6 +361,7 @@ class App {
}
// サブフォルダをスキャンして絵文字別に画像データを確定する
@Suppress("RegExpSimplifiable")
private fun scanEmojiImages() {
scanImageDir("override", "override", """([0-9A-Fa-f_-]+)\.""")
@ -492,7 +496,6 @@ class App {
}
private fun readEmojiOne() {
val cameFrom = "EmojiOneJson"
val root = File("./old-emojione.json")
@ -520,7 +523,6 @@ class App {
}
private fun fixCategory() {
val nameMap = HashMap<ShortName, Emoji>().apply {
for (emoji in emojiMap.values)

View File

@ -1,12 +1,18 @@
@file:Suppress("unused")
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import io.ktor.client.HttpClient
import io.ktor.client.request.get
import io.ktor.client.request.header
import io.ktor.client.statement.readBytes
import io.ktor.http.HttpStatusCode
import org.jsoup.Jsoup
import java.io.*
import java.io.BufferedReader
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.InputStreamReader
import java.nio.charset.Charset
import java.security.MessageDigest
import java.util.*
@ -26,39 +32,27 @@ fun CharSequence.eachCodePoint(block: (Int) -> Unit) {
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
if (Character.isHighSurrogate(c1) && i < end) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
++i
block(Character.toCodePoint(c1, c2))
continue
}
}
block(c1.toInt())
block(c1.code)
}
}
// split CharSequence to Unicode codepoints
fun CharSequence.listCodePoints() = ArrayList<Int>().also { dst ->
val end = length
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
dst.add(Character.toCodePoint(c1, c2))
continue
}
}
dst.add(c1.toInt())
}
eachCodePoint { dst.add(it) }
}.toIntArray()
// split codepoint to UTF-8 bytes
fun codePointToUtf8(cp: Int, block: (Int) -> Unit) {
// incorrect codepoint
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.toInt(), block)
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.code, block)
if (cp >= 128) {
if (cp >= 2048) {
@ -82,12 +76,12 @@ private const val hexString = "0123456789ABCDEF"
private val encodePercentSkipChars by lazy {
HashSet<Int>().apply {
('0'..'9').forEach { add(it.toInt()) }
('A'..'Z').forEach { add(it.toInt()) }
('a'..'z').forEach { add(it.toInt()) }
add('-'.toInt())
add('_'.toInt())
add('.'.toInt())
('0'..'9').forEach { add(it.code) }
('A'..'Z').forEach { add(it.code) }
('a'..'z').forEach { add(it.code) }
add('-'.code)
add('_'.code)
add('.'.code)
}
}
@ -129,7 +123,7 @@ fun ByteArray.encodeBase64UrlSafe(): String {
val bytes = Base64.getUrlEncoder().encode(this)
return StringBuilder(bytes.size).apply {
for (b in bytes) {
val c = b.toChar()
val c = b.toInt().toChar()
if (c != '=') append(c)
}
}.toString()
@ -217,14 +211,14 @@ suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>)
}
println("GET $url")
get<HttpResponse>(url) {
get(url) {
headers.entries.forEach {
header(it.key, it.value)
}
}.let { res ->
return when (res.status) {
HttpStatusCode.OK ->
res.receive<ByteArray>().also { it.saveTo(cacheFile) }
res.readBytes().also { it.saveTo(cacheFile) }
else -> {
cacheFile.delete()
error("get failed. $url ${res.status}")
@ -236,6 +230,5 @@ suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>)
suspend fun HttpClient.cachedGetString(url: String, headers: Map<String, String>): String =
cachedGetBytes(url, headers).decodeUtf8()
fun String.parseHtml(baseUri: String) =
fun String.parseHtml(baseUri: String): org.jsoup.nodes.Document? =
Jsoup.parse(this, baseUri)

View File

@ -2,7 +2,6 @@ package jp.juggler.subwaytooter.emoji.model
import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
import java.lang.StringBuilder
/*
絵文字はコードポイントのリストで表現される
@ -71,7 +70,7 @@ class CodepointList(
fun toHex() = StringBuilder(list.size * 5).also {
list.forEachIndexed { i, v ->
if (i > 0) it.append('-')
it.append(String.format("%x", v).toLowerCase())
it.append("%x".format(v).lowercase())
}
}.toString()

View File

@ -24,7 +24,7 @@ private val reNotCode = """[^\w\d+_]+""".toRegex()
private val reUnderTail = """_+\z""".toRegex()
fun String.toShortName(cameFrom: String) =
toLowerCase()
lowercase()
.replace(reColonHead, "")
.replace(reColonTail, "")
.replace(reNotCode, "_")