emojiConverterの依存関係を更新

This commit is contained in:
tateisu 2022-07-14 13:05:23 +09:00
parent 6ac50da328
commit f7564dcecf
9 changed files with 1207 additions and 1213 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="14" />
<bytecodeTargetLevel target="1.8" />
</component>
</project>

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_14" default="true" project-jdk-name="14" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="14" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -1,6 +1,6 @@
plugins {
id 'java'
id 'org.jetbrains.kotlin.jvm' version '1.5.10'
id 'org.jetbrains.kotlin.jvm' version '1.7.10'
}
group 'jp.juggler'
@ -12,22 +12,22 @@ repositories {
dependencies {
implementation fileTree(include: ['*.jar'], dir: 'src/lib')
implementation "com.google.guava:guava:28.1-jre"
implementation "com.google.guava:guava:31.1-jre"
implementation "org.jetbrains.kotlin:kotlin-stdlib"
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0'
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
def ktorVersion="1.5.0"
def ktorVersion="2.0.3"
implementation "io.ktor:ktor-client-core:$ktorVersion"
implementation "io.ktor:ktor-client-cio:$ktorVersion"
implementation "io.ktor:ktor-client-features:$ktorVersion"
// implementation "io.ktor:ktor-client-features:$ktorVersion"
implementation "io.ktor:ktor-client-encoding:$ktorVersion"
// StringEscapeUtils.unescapeHtml4
implementation "org.apache.commons:commons-text:1.9"
// HTML5パーサ
implementation "org.jsoup:jsoup:1.13.1"
implementation "org.jsoup:jsoup:1.14.3"
}
test {

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@ -860,7 +860,7 @@ private fun Writer.writeQuote(string: String): Writer {
in '\u0080' until '\u00a0',
in '\u2000' until '\u2100' -> {
write("\\u")
val hexCode: String = Integer.toHexString(c.toInt())
val hexCode: String = Integer.toHexString(c.code)
write("0000", 0, 4 - hexCode.length)
write(hexCode)
}
@ -1047,7 +1047,7 @@ fun Writer.writeJsonValue(
}
}
value is Char -> writeJsonValue(indentFactor, indent, value.toInt())
value is Char -> writeJsonValue(indentFactor, indent, value.code)
value is String -> writeQuote(value)
value is Enum<*> -> writeQuote(value.name)

View File

@ -1,147 +1,141 @@
@file:Suppress("unused")
package jp.juggler.subwaytooter.emoji
import io.ktor.client.*
import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import io.ktor.client.HttpClient
import io.ktor.client.request.get
import io.ktor.client.request.header
import io.ktor.client.statement.readBytes
import io.ktor.http.HttpStatusCode
import org.jsoup.Jsoup
import java.io.*
import java.io.BufferedReader
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.InputStreamReader
import java.nio.charset.Charset
import java.security.MessageDigest
import java.util.*
fun String.isTruth() = when {
this == "" -> false
this == "0" -> false
this.startsWith("f", ignoreCase = true) -> false
this.startsWith("t", ignoreCase = true) -> true
this == "on" -> true
else -> true
this == "" -> false
this == "0" -> false
this.startsWith("f", ignoreCase = true) -> false
this.startsWith("t", ignoreCase = true) -> true
this == "on" -> true
else -> true
}
// split CharSequence to Unicode codepoints
fun CharSequence.eachCodePoint(block: (Int) -> Unit) {
val end = length
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
block(Character.toCodePoint(c1, c2))
continue
}
}
block(c1.toInt())
}
val end = length
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < end) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
++i
block(Character.toCodePoint(c1, c2))
continue
}
}
block(c1.code)
}
}
// split CharSequence to Unicode codepoints
fun CharSequence.listCodePoints() = ArrayList<Int>().also{ dst->
val end = length
var i = 0
while (i < end) {
val c1 = get(i++)
if (Character.isHighSurrogate(c1) && i < length) {
val c2 = get(i)
if (Character.isLowSurrogate(c2)) {
i++
dst.add(Character.toCodePoint(c1, c2))
continue
}
}
dst.add(c1.toInt())
}
fun CharSequence.listCodePoints() = ArrayList<Int>().also { dst ->
eachCodePoint { dst.add(it) }
}.toIntArray()
// split codepoint to UTF-8 bytes
fun codePointToUtf8(cp: Int, block: (Int) -> Unit) {
// incorrect codepoint
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.toInt(), block)
// incorrect codepoint
if (cp < 0 || cp > 0x10FFFF) codePointToUtf8('?'.code, block)
if (cp >= 128) {
if (cp >= 2048) {
if (cp >= 65536) {
block(0xF0.or(cp.shr(18)))
block(0x80.or(cp.shr(12).and(0x3f)))
} else {
block(0xE0.or(cp.shr(12)))
}
block(0x80.or(cp.shr(6).and(0x3f)))
} else {
block(0xC0.or(cp.shr(6)))
}
block(0x80.or(cp.and(0x3f)))
} else {
block(cp)
}
if (cp >= 128) {
if (cp >= 2048) {
if (cp >= 65536) {
block(0xF0.or(cp.shr(18)))
block(0x80.or(cp.shr(12).and(0x3f)))
} else {
block(0xE0.or(cp.shr(12)))
}
block(0x80.or(cp.shr(6).and(0x3f)))
} else {
block(0xC0.or(cp.shr(6)))
}
block(0x80.or(cp.and(0x3f)))
} else {
block(cp)
}
}
private const val hexString = "0123456789ABCDEF"
private val encodePercentSkipChars by lazy {
HashSet<Int>().apply {
('0'..'9').forEach { add(it.toInt()) }
('A'..'Z').forEach { add(it.toInt()) }
('a'..'z').forEach { add(it.toInt()) }
add('-'.toInt())
add('_'.toInt())
add('.'.toInt())
}
HashSet<Int>().apply {
('0'..'9').forEach { add(it.code) }
('A'..'Z').forEach { add(it.code) }
('a'..'z').forEach { add(it.code) }
add('-'.code)
add('_'.code)
add('.'.code)
}
}
fun String.encodePercent(): String =
StringBuilder(length).also { sb ->
eachCodePoint { cp ->
if (encodePercentSkipChars.contains(cp)) {
sb.append(cp.toChar())
} else {
codePointToUtf8(cp) { b ->
sb.append('%')
.append(hexString[b shr 4])
.append(hexString[b and 15])
}
}
}
}.toString()
StringBuilder(length).also { sb ->
eachCodePoint { cp ->
if (encodePercentSkipChars.contains(cp)) {
sb.append(cp.toChar())
} else {
codePointToUtf8(cp) { b ->
sb.append('%')
.append(hexString[b shr 4])
.append(hexString[b and 15])
}
}
}
}.toString()
// same as x?.let{ dst.add(it) }
fun <T> T.addTo(dst: ArrayList<T>) = dst.add(this)
fun <T> T.addTo(dst: HashSet<T>) = dst.add(this)
fun <E : List<*>> E?.notEmpty(): E? =
if (this?.isNotEmpty() == true) this else null
if (this?.isNotEmpty() == true) this else null
fun <E : Map<*, *>> E?.notEmpty(): E? =
if (this?.isNotEmpty() == true) this else null
if (this?.isNotEmpty() == true) this else null
fun <T : CharSequence> T?.notEmpty(): T? =
if (this?.isNotEmpty() == true) this else null
if (this?.isNotEmpty() == true) this else null
fun ByteArray.digestSha256() =
MessageDigest.getInstance("SHA-256")?.let {
it.update(this@digestSha256)
it.digest()
}!!
MessageDigest.getInstance("SHA-256")?.let {
it.update(this@digestSha256)
it.digest()
}!!
fun ByteArray.encodeBase64UrlSafe(): String {
val bytes = Base64.getUrlEncoder().encode(this)
return StringBuilder(bytes.size).apply {
for (b in bytes) {
val c = b.toChar()
if (c != '=') append(c)
}
}.toString()
val bytes = Base64.getUrlEncoder().encode(this)
return StringBuilder(bytes.size).apply {
for (b in bytes) {
val c = b.toInt().toChar()
if (c != '=') append(c)
}
}.toString()
}
fun ByteArray.decodeUtf8() = toString(Charsets.UTF_8)
fun String.encodeUtf8() = toByteArray(Charsets.UTF_8)
inline fun <reified T> Any?.castOrThrow(name:String,block: T.() -> Unit){
if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}")
block()
inline fun <reified T> Any?.castOrThrow(name: String, block: T.() -> Unit) {
if (this !is T) error("type mismatch. $name is ${T::class.qualifiedName}")
block()
}
// 型推論できる文脈だと型名を書かずにすむ
@ -155,87 +149,86 @@ fun <T : Comparable<T>> minComparable(a: T, b: T): T = if (a <= b) a else b
fun <T : Comparable<T>> maxComparable(a: T, b: T): T = if (a >= b) a else b
fun <T : Any> MutableCollection<T>.removeFirst(check: (T) -> Boolean): T? {
val it = iterator()
while (it.hasNext()) {
val item = it.next()
if (check(item)) {
it.remove()
return item
}
}
return null
val it = iterator()
while (it.hasNext()) {
val item = it.next()
if (check(item)) {
it.remove()
return item
}
}
return null
}
fun File.readAllBytes() =
FileInputStream(this).use { it.readBytes() }
FileInputStream(this).use { it.readBytes() }
fun File.save(data: ByteArray) {
val tmpFile = File("$absolutePath.tmp")
FileOutputStream(tmpFile).use { it.write(data) }
this.delete()
if (!tmpFile.renameTo(this)) error("$this: rename failed.")
val tmpFile = File("$absolutePath.tmp")
FileOutputStream(tmpFile).use { it.write(data) }
this.delete()
if (!tmpFile.renameTo(this)) error("$this: rename failed.")
}
fun ByteArray.saveTo(file: File) = file.save(this)
fun File.forEachLine(charset: Charset = Charsets.UTF_8, block:(Int, String)->Unit)=
BufferedReader(InputStreamReader(FileInputStream(this),charset)).use { reader ->
var lno = 0
reader.forEachLine {
block(++lno, it)
}
lno
}
fun File.forEachLine(charset: Charset = Charsets.UTF_8, block: (Int, String) -> Unit) =
BufferedReader(InputStreamReader(FileInputStream(this), charset)).use { reader ->
var lno = 0
reader.forEachLine {
block(++lno, it)
}
lno
}
inline fun <K,V> HashMap<K,V>.prepare(key:K,creator:()->V):V{
var value = get(key)
if( value == null) {
value = creator()
put(key,value)
}
return value!!
inline fun <K, V> HashMap<K, V>.prepare(key: K, creator: () -> V): V {
var value = get(key)
if (value == null) {
value = creator()
put(key, value)
}
return value!!
}
private val reFileNameBadChars = """[\\/:*?"<>|-]+""".toRegex()
private val cacheDir by lazy{ File("./cache").apply { mkdirs() }}
private val cacheDir by lazy { File("./cache").apply { mkdirs() } }
fun clearCache(){
cacheDir.list()?.forEach { name->
File(cacheDir,name).takeIf { it.isFile }?.delete()
}
fun clearCache() {
cacheDir.list()?.forEach { name ->
File(cacheDir, name).takeIf { it.isFile }?.delete()
}
}
private val cacheExpire by lazy{ 8 * 3600000L }
private val cacheExpire by lazy { 8 * 3600000L }
suspend fun HttpClient.cachedGetBytes(url: String, headers: Map<String, String>): ByteArray {
val fName = reFileNameBadChars.replace(url, "-")
val cacheFile = File(cacheDir, fName)
if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) {
println("GET(cached) $url")
return cacheFile.readAllBytes()
}
println("GET $url")
val fName = reFileNameBadChars.replace(url, "-")
val cacheFile = File(cacheDir, fName)
if (System.currentTimeMillis() - cacheFile.lastModified() <= cacheExpire) {
println("GET(cached) $url")
return cacheFile.readAllBytes()
}
println("GET $url")
get<HttpResponse>(url) {
headers.entries.forEach {
header(it.key, it.value)
}
}.let { res ->
return when (res.status) {
HttpStatusCode.OK ->
res.receive<ByteArray>().also { it.saveTo(cacheFile) }
else -> {
cacheFile.delete()
error("get failed. $url ${res.status}")
}
}
}
get(url) {
headers.entries.forEach {
header(it.key, it.value)
}
}.let { res ->
return when (res.status) {
HttpStatusCode.OK ->
res.readBytes().also { it.saveTo(cacheFile) }
else -> {
cacheFile.delete()
error("get failed. $url ${res.status}")
}
}
}
}
suspend fun HttpClient.cachedGetString(url: String, headers: Map<String, String>): String =
cachedGetBytes(url,headers).decodeUtf8()
fun String.parseHtml(baseUri: String) =
Jsoup.parse(this, baseUri)
cachedGetBytes(url, headers).decodeUtf8()
fun String.parseHtml(baseUri: String): org.jsoup.nodes.Document? =
Jsoup.parse(this, baseUri)

View File

@ -2,7 +2,6 @@ package jp.juggler.subwaytooter.emoji.model
import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
import java.lang.StringBuilder
/*
絵文字はコードポイントのリストで表現される
@ -30,61 +29,61 @@ import java.lang.StringBuilder
// list of codepoints
class CodepointList(
val from: String,
val list: IntArray
val from: String,
val list: IntArray
) : Comparable<CodepointList> {
override fun equals(other: Any?): Boolean =
list.contentEquals(other.cast<CodepointList>()?.list)
override fun equals(other: Any?): Boolean =
list.contentEquals(other.cast<CodepointList>()?.list)
override fun hashCode(): Int {
var code = 0
for (v in list) code = code.shl(2).xor(v)
return code
}
override fun hashCode(): Int {
var code = 0
for (v in list) code = code.shl(2).xor(v)
return code
}
override fun compareTo(other: CodepointList): Int {
val la = this.list
val lb = other.list
var i = 0
do {
val a = la.elementAtOrNull(i)
val b = lb.elementAtOrNull(i)
override fun compareTo(other: CodepointList): Int {
val la = this.list
val lb = other.list
var i = 0
do {
val a = la.elementAtOrNull(i)
val b = lb.elementAtOrNull(i)
val r = if (a == null) {
if (b == null) break
-1
} else if (b == null) {
1
} else {
a.compareTo(b)
}
if (r != 0) return r
++i
} while (true)
return 0
}
val r = if (a == null) {
if (b == null) break
-1
} else if (b == null) {
1
} else {
a.compareTo(b)
}
if (r != 0) return r
++i
} while (true)
return 0
}
// make string like as "uuuu-uuuu-uuuu-uuuu"
// cp値の余分な0は除去される
// 常に小文字である
fun toHex() = StringBuilder(list.size * 5).also {
list.forEachIndexed { i, v ->
if (i > 0) it.append('-')
it.append(String.format("%x", v).toLowerCase())
}
}.toString()
// make string like as "uuuu-uuuu-uuuu-uuuu"
// cp値の余分な0は除去される
// 常に小文字である
fun toHex() = StringBuilder(list.size * 5).also {
list.forEachIndexed { i, v ->
if (i > 0) it.append('-')
it.append("%x".format(v).lowercase())
}
}.toString()
// make raw string
fun toRawString() = StringBuilder(list.size + 10).also { sb ->
for (cp in list) {
sb.appendCodePoint(cp)
}
}.toString()
// make raw string
fun toRawString() = StringBuilder(list.size + 10).also { sb ->
for (cp in list) {
sb.appendCodePoint(cp)
}
}.toString()
fun toResourceId() = "emj_${toHex().replace("-", "_")}"
fun toResourceId() = "emj_${toHex().replace("-", "_")}"
override fun toString() = "${toHex()},$from"
override fun toString() = "${toHex()},$from"
// fun makeUtf16(): String {
// // java の文字列にする
@ -111,28 +110,28 @@ class CodepointList(
// return sb.toString()
// }
fun toKey(from: String) =
list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d }
.toIntArray().toCodepointList(from)
fun toKey(from: String) =
list.filter { it != 0xfe0f && it != 0xfe0e && it != 0x200d }
.toIntArray().toCodepointList(from)
fun getToneCode(from: String) :CodepointList? {
val used = HashSet<Int>()
return list
.filter { skinToneModifiers.containsKey(it) }
.mapNotNull {
if (used.contains(it)) {
null
} else {
used.add(it)
it
}
}.toIntArray().toCodepointList(from)
}
fun getToneCode(from: String): CodepointList? {
val used = HashSet<Int>()
return list
.filter { skinToneModifiers.containsKey(it) }
.mapNotNull {
if (used.contains(it)) {
null
} else {
used.add(it)
it
}
}.toIntArray().toCodepointList(from)
}
}
fun IntArray.isAsciiEmoji() =
size == 1 && first() < 0xae
size == 1 && first() < 0xae
fun IntArray.toCodepointList(from: String) = if (isEmpty()) null else CodepointList(from, this)
@ -140,8 +139,8 @@ private val reHex = """([0-9A-Fa-f]+)""".toRegex()
// cp-cp-cp-cp => CodepointList
fun String.toCodepointList(from: String) =
reHex.findAll(this)
.map { mr -> mr.groupValues[1].toInt(16) }
.toList().notEmpty()
?.toIntArray()
?.toCodepointList(from)
reHex.findAll(this)
.map { mr -> mr.groupValues[1].toInt(16) }
.toList().notEmpty()
?.toIntArray()
?.toCodepointList(from)

View File

@ -4,18 +4,18 @@ import jp.juggler.subwaytooter.emoji.cast
import jp.juggler.subwaytooter.emoji.notEmpty
class ShortName(val cameFrom:String,val name:String) :Comparable<ShortName>{
override fun equals(other: Any?): Boolean =
name == other.cast<ShortName>()?.name
class ShortName(val cameFrom: String, val name: String) : Comparable<ShortName> {
override fun equals(other: Any?): Boolean =
name == other.cast<ShortName>()?.name
override fun hashCode(): Int =
name.hashCode()
override fun hashCode(): Int =
name.hashCode()
override fun toString(): String =
"SN($cameFrom)$name"
override fun toString(): String =
"SN($cameFrom)$name"
override fun compareTo(other: ShortName): Int =
name.compareTo(other.name)
override fun compareTo(other: ShortName): Int =
name.compareTo(other.name)
}
private val reColonHead = """\A:""".toRegex()
@ -23,10 +23,10 @@ private val reColonTail = """:\z""".toRegex()
private val reNotCode = """[^\w\d+_]+""".toRegex()
private val reUnderTail = """_+\z""".toRegex()
fun String.toShortName(cameFrom:String) =
toLowerCase()
.replace(reColonHead, "")
.replace(reColonTail, "")
.replace(reNotCode, "_")
.replace(reUnderTail,"")
.notEmpty()?.let{ ShortName(cameFrom=cameFrom,it) }
fun String.toShortName(cameFrom: String) =
lowercase()
.replace(reColonHead, "")
.replace(reColonTail, "")
.replace(reNotCode, "_")
.replace(reUnderTail, "")
.notEmpty()?.let { ShortName(cameFrom = cameFrom, it) }