1
0
mirror of https://github.com/tateisu/SubwayTooter synced 2025-02-05 13:17:43 +01:00

support some html tags

This commit is contained in:
tateisu 2021-01-29 21:59:27 +09:00
parent 832fa23698
commit 6676b9b41f
2 changed files with 1106 additions and 808 deletions

View File

@ -1,9 +1,11 @@
package jp.juggler.subwaytooter.util package jp.juggler.subwaytooter.util
import android.graphics.Typeface
import android.text.Spannable import android.text.Spannable
import android.text.SpannableString import android.text.SpannableString
import android.text.SpannableStringBuilder import android.text.SpannableStringBuilder
import android.text.Spanned import android.text.Spanned
import android.text.style.*
import jp.juggler.subwaytooter.App1 import jp.juggler.subwaytooter.App1
import jp.juggler.subwaytooter.Pref import jp.juggler.subwaytooter.Pref
import jp.juggler.subwaytooter.R import jp.juggler.subwaytooter.R
@ -14,6 +16,8 @@ import jp.juggler.subwaytooter.table.HighlightWord
import jp.juggler.util.* import jp.juggler.util.*
import java.util.* import java.util.*
import java.util.regex.Pattern import java.util.regex.Pattern
import kotlin.math.max
import kotlin.math.min
object HTMLDecoder { object HTMLDecoder {
@ -35,6 +39,8 @@ object HTMLDecoder {
private val reHref = "\\bhref=\"([^\"]*)\"".asciiPattern() private val reHref = "\\bhref=\"([^\"]*)\"".asciiPattern()
private val reAttribute = "\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2".asciiPattern() private val reAttribute = "\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2".asciiPattern()
private val reShortcode = ":[A-Za-z0-9_-]+:".asciiPattern() private val reShortcode = ":[A-Za-z0-9_-]+:".asciiPattern()
private val reNotestockEmojiAlt = """\A:[^:]+:\z""".toRegex()
private val reUrlStart = """\Ahttps?://""".toRegex()
// Block-level Elements // Block-level Elements
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
@ -79,11 +85,10 @@ object HTMLDecoder {
"pre", "pre",
"progress", "progress",
"section", "section",
"textarea",
"table", "table",
"tbody", "tbody",
"textarea",
"tfoot", "tfoot",
"th",
"thead", "thead",
"tr", "tr",
"ul", "ul",
@ -274,6 +279,128 @@ object HTMLDecoder {
} }
} }
// 末尾の改行を数える
private fun SpannableStringBuilder.lastBrCount(): Int {
var count = 0
var pos = length - 1
while (pos > 0) {
val c = this[pos--]
when {
c == '\n' -> {
++count
continue
}
Character.isWhitespace(c) -> continue
else -> break
}
}
return count
}
private val listMarkers = arrayOf("", "-", "*", "")
private enum class ListType {
None,
Ordered,
Unordered,
Definition,
Quote
}
private class ListContext(
val type: ListType,
val nestLevelOrdered: Int,
val nestLevelUnordered: Int,
val nestLevelDefinition: Int,
val nestLevelQuote: Int,
var order: Int = 0
) {
fun subOrdered() = ListContext(
type = ListType.Ordered,
nestLevelOrdered + 1,
nestLevelUnordered,
nestLevelDefinition,
nestLevelQuote
)
fun subUnordered() = ListContext(
type = ListType.Unordered,
nestLevelOrdered,
nestLevelUnordered + 1,
nestLevelDefinition,
nestLevelQuote
)
fun subDefinition() = ListContext(
type = ListType.Definition,
nestLevelOrdered,
nestLevelUnordered,
nestLevelDefinition + 1,
nestLevelQuote
)
fun subQuote() = ListContext(
type = ListType.Quote,
nestLevelOrdered,
nestLevelUnordered,
nestLevelDefinition,
nestLevelQuote + 1
)
val indent: String
get() = " ".repeat(2 * max(0, nestLevelOrdered + nestLevelUnordered + nestLevelDefinition - 1))
fun increment() = when (type) {
ListType.Ordered -> "${++order}. "
ListType.Unordered -> "${listMarkers[nestLevelUnordered % listMarkers.size]} "
ListType.Definition -> ""
else -> ""
}
fun inList() = nestLevelOrdered + nestLevelUnordered + nestLevelDefinition > 0
fun quoteColor(): Int {
val quoteNestColors = MisskeyMarkdownDecoder.quoteNestColors
return quoteNestColors[nestLevelQuote % quoteNestColors.size]
}
}
// SpannableStringBuilderを行ごとに分解する
// 行末の改行文字は各行の末尾に残る
// 最終行の長さが0(改行文字もなし)だった場合は出力されない
fun SpannableStringBuilder.splitLines() =
ArrayList<SpannableStringBuilder>().also { dst ->
// 入力の末尾のtrim
var end = this.length
while (end > 0 && CharacterGroup.isWhitespace(this[end - 1].toInt())) --end
// 入力の最初の非空白文字の位置を調べておく
var firstNonSpace = 0
while (firstNonSpace <end && CharacterGroup.isWhitespace(this[firstNonSpace].toInt())) ++firstNonSpace
var i = 0
while (i < end) {
var lineStart = i
while (i < end && this[i] != '\n') ++i
val lineEnd = if (i >= end) end else i + 1
++i
// 行頭の空白を削る
// while (lineStart < lineEnd &&
// this[lineStart] != '\n' &&
// CharacterGroup.isWhitespace(this[lineStart].toInt())
// ) ++lineStart
// 最初の非空白文字以降の行を出力する
if(lineEnd > firstNonSpace) {
dst.add(this.subSequence(lineStart, lineEnd) as SpannableStringBuilder)
}
}
if(dst.isEmpty()){
// ブロック要素は最低1行は存在するので、1行だけの要素を作る
dst.add(SpannableStringBuilder())
}
}
private val reLastLine = """(?:\A|\n)([^\n]*)\z""".toRegex()
private class Node { private class Node {
val child_nodes = ArrayList<Node>() val child_nodes = ArrayList<Node>()
@ -337,9 +464,12 @@ object HTMLDecoder {
fun encodeSpan( fun encodeSpan(
options: DecodeOptions, options: DecodeOptions,
sb : SpannableStringBuilder sb: SpannableStringBuilder,
listContext: ListContext
) { ) {
if(TAG_TEXT == tag) { val isBlock = blockLevelElements.contains(tag)
when(tag){
TAG_TEXT->{
if (options.context != null && options.decodeEmoji) { if (options.context != null && options.decodeEmoji) {
sb.append(options.decodeEmoji(decodeEntity(text))) sb.append(options.decodeEmoji(decodeEntity(text)))
} else { } else {
@ -347,17 +477,10 @@ object HTMLDecoder {
} }
return return
} }
"img"->{
val sb_tmp = when(tag) {
"a", "style", "script" -> SpannableStringBuilder()
else -> sb
}
if("img" == tag) {
var replaced = false
val reNotestockEmojiAlt= """\A:[^:]+:\z""".toRegex()
if(options.unwrapEmojiImageTag) {
val attrs = parseAttributes(text) val attrs = parseAttributes(text)
if (options.unwrapEmojiImageTag) {
val cssClass = attrs["class"] val cssClass = attrs["class"]
val title = attrs["title"] val title = attrs["title"]
val url = attrs["src"] val url = attrs["src"]
@ -367,12 +490,11 @@ object HTMLDecoder {
&& cssClass.contains("emojione") && cssClass.contains("emojione")
&& reShortcode.matcher(title).find() && reShortcode.matcher(title).find()
) { ) {
replaced = true sb.append(options.decodeEmoji(title))
sb_tmp.append(options.decodeEmoji(title)) return
} else if (cssClass == "emoji" && url != null && alt != null && reNotestockEmojiAlt.matches(alt)) { } else if (cssClass == "emoji" && url != null && alt != null && reNotestockEmojiAlt.matches(alt)) {
// notestock custom emoji // notestock custom emoji
replaced = true sb.run {
sb_tmp.run{
val start = length val start = length
append(alt) append(alt)
val end = length val end = length
@ -383,21 +505,99 @@ object HTMLDecoder {
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE Spanned.SPAN_EXCLUSIVE_EXCLUSIVE
) )
} }
return
} }
} }
if(! replaced) { sb.append("<img ")
sb_tmp.append("<img/>") val url = attrs["src"] ?: ""
val caption = attrs["alt"] ?: ""
if (caption.isNotEmpty() || url.isNotEmpty()) {
val start = sb.length
sb.append(caption.notEmpty() ?: url)
if (reUrlStart.find(url) != null) {
val span = MyClickableSpan(LinkInfo(url = url, ac = null, tag = null, caption = caption, mention = null))
sb.setSpan(span, start, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
} }
} else { sb.append(" ")
for(child in child_nodes) {
child.encodeSpan(options, sb_tmp)
} }
// sb_tmpを作成したa 以外のタグ(style,script)は読み捨てる sb.append("/>")
return
} }
if("a" == tag) { "script","style" -> return
"th", "td" -> sb.append("|")
else -> if( isBlock && tag !="script" && tag != "style" ){
val lastLine = reLastLine.find(sb)?.groupValues?.firstOrNull() ?: ""
if(CharacterGroup.reNotWhitespace.matcher(lastLine).find()){
sb.append("\n")
}
}
}
var spanStart = 0
val tmpFlusherOriginal: (SpannableStringBuilder) -> Unit = {
when (tag) {
"s", "strike", "del" -> {
sb.setSpan(StrikethroughSpan(), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"em" -> {
sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.ITALIC)), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"strong" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"tr" -> {
sb.append("|")
}
"style", "script" -> {
// sb_tmpにレンダリングした分は読み捨てる
}
"h1" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(1.8f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"h2" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(1.6f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"h3" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(1.4f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"h4" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(1.2f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"h5" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(1.0f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"h6" -> {
sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(0.8f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"pre" -> {
sb.setSpan(BackgroundColorSpan(0x40808080), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(RelativeSizeSpan(0.7f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(fontSpan(Typeface.MONOSPACE), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"code" ->{
sb.setSpan(BackgroundColorSpan(0x40808080), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.setSpan(fontSpan(Typeface.MONOSPACE), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
"hr" -> sb.append("----------")
}
}
val tmpFlusher = when (tag) {
"a" -> {
{ sb_tmp ->
val linkInfo = formatLinkCaption(options, sb_tmp, href ?: "") val linkInfo = formatLinkCaption(options, sb_tmp, href ?: "")
val caption = linkInfo.caption val caption = linkInfo.caption
if (caption.isNotEmpty()) { if (caption.isNotEmpty()) {
@ -434,35 +634,121 @@ object HTMLDecoder {
} }
} }
} }
when {
// 空のテキストには改行を追加しない
sb.isEmpty() -> {
} }
// 改行タグ "style", "script" -> {
"br" == tag -> sb.append('\n') {
// 読み捨てる
// 最適化によりtmpFlusherOriginalとこのラムダが同一オブジェクトにならないようにする
}
}
"blockquote" -> {
{ sb_tmp ->
val bg_color = listContext.quoteColor()
// TextView の文字装飾では「ブロック要素の入れ子」を表現できない
// 内容の各行の始端に何か追加するというのがまずキツい
// しかし各行の頭に引用マークをつけないと引用のネストで意味が通じなくなってしまう
val startItalic = sb.length
sb_tmp.splitLines().forEach { line ->
val lineStart = sb.length
sb.append("> ")
sb.setSpan(BackgroundColorSpan(bg_color), lineStart, lineStart + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
sb.append(line)
}
sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.ITALIC)), startItalic, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
}
"li" -> {
{ sb_tmp ->
val lineHeader1 = listContext.increment()
val lineHeader2 = " ".repeat(lineHeader1.length)
sb_tmp.splitLines().forEachIndexed { i, line ->
sb.append(if (i == 0) lineHeader1 else lineHeader2)
sb.append(line)
}
}
}
"dt" -> {
{ sb_tmp ->
val prefix = listContext.increment()
val startBold = sb.length
sb_tmp.splitLines().forEachIndexed { i, line ->
sb.append(prefix)
sb.append(line)
}
sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.BOLD)), startBold, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
}
}
"dd" -> {
{ sb_tmp ->
val prefix = listContext.increment() + " "
sb_tmp.splitLines().forEachIndexed { i, line ->
sb.append(prefix)
sb.append(line)
}
}
}
else -> tmpFlusherOriginal
}
val sb_tmp = if(tmpFlusher == tmpFlusherOriginal) {
sb
}else {
SpannableStringBuilder()
}
spanStart = sb_tmp.length
val childListContext = when (tag) {
"ol" -> listContext.subOrdered()
"ul" -> listContext.subUnordered()
"dl" -> listContext.subDefinition()
"blockquote" -> listContext.subQuote()
else -> listContext
}
fun String.tagsCanRemoveNearSpaces() = when(this){
"li","ol","ul","dl","dt","dd","blockquote","h1","h2","h3","h4","h5","h6",
"table","tbody","thead","tfoot","tr","td","th" ->true
else->false
}
val childLast = child_nodes.size-1
child_nodes.forEachIndexed{ i,child->
if(child.tag == TAG_TEXT && child.text.isBlank() && isBlock){
val preNode = child_nodes.elementAtOrNull(i-1)
val nextNode = child_nodes.elementAtOrNull(i+1)
if(preNode?.tag?.tagsCanRemoveNearSpaces()== true ||
nextNode?.tag?.tagsCanRemoveNearSpaces()==true ||
((i==0 || i==childLast) && tag.tagsCanRemoveNearSpaces())
){
return@forEachIndexed
}
}
child.encodeSpan(options, sb_tmp, childListContext)
}
tmpFlusher(sb_tmp)
if (isBlock) {
// ブロック要素 // ブロック要素
blockLevelElements.contains(tag) -> {
// 末尾の改行を数える
var last_br_count = 0
var last = sb.length - 1
loop@ while(last > 0) {
val c = sb[last --]
when {
c == '\n' -> {
++ last_br_count
continue@loop
}
Character.isWhitespace(c) -> continue@loop
else -> break@loop
}
}
// 末尾の改行が2文字未満なら改行を追加する // 末尾の改行が2文字未満なら改行を追加する
while(last_br_count ++ < 2) sb.append('\n') var appendCount = 2 - sb.lastBrCount()
if( listContext.inList()) appendCount = min(1,appendCount)
when(tag){
"tr" -> appendCount = min(1,appendCount)
"thead","tfoot","tbody" -> appendCount = 0
} }
repeat(appendCount){ sb.append( "\n" )}
} else {
// インライン要素で改行タグでテキストがカラでないなら、改行を追加する
if ("br" == tag && sb.isNotEmpty()) sb.append('\n')
} }
} }
} }
@ -497,7 +783,7 @@ object HTMLDecoder {
} }
// encode to SpannableStringBuilder // encode to SpannableStringBuilder
rootNode.encodeSpan(options, sb) rootNode.encodeSpan(options, sb, ListContext(type = ListType.None, 0, 0, 0,0))
// 末尾の空白を取り除く // 末尾の空白を取り除く
sb.removeEndWhitespaces() sb.removeEndWhitespaces()
@ -514,7 +800,7 @@ object HTMLDecoder {
status: TootStatus status: TootStatus
): Spannable? { ): Spannable? {
val mentionList: List<TootMention>? = status.mentions val mentionList: List<TootMention>? = status.mentions
val link_tag : Any? = status val link_tag: Any = status
if (mentionList == null || mentionList.isEmpty()) return null if (mentionList == null || mentionList.isEmpty()) return null

View File

@ -80,6 +80,18 @@ object CharacterGroup {
) )
"[${quotedKeys}]+".asciiPattern() "[${quotedKeys}]+".asciiPattern()
} }
internal val reNotWhitespace by lazy {
val quotedKeys = Pattern.quote(
StringBuilder().apply {
val size = mapWhitespace.size()
ensureCapacity(size)
for(i in 0 until size) {
append(mapWhitespace.keyAt(i).toChar())
}
}.toString()
)
"[^${quotedKeys}]+".asciiPattern()
}
private fun SparseBooleanArray.keys() = (0 until size()).map { keyAt(it) } private fun SparseBooleanArray.keys() = (0 until size()).map { keyAt(it) }