From 6676b9b41fc32663ed0209782bd0889f1e95db79 Mon Sep 17 00:00:00 2001 From: tateisu Date: Fri, 29 Jan 2021 21:59:27 +0900 Subject: [PATCH] support some html tags --- .../juggler/subwaytooter/util/HTMLDecoder.kt | 1900 ++++++++++------- .../java/jp/juggler/util/CharacterGroup.kt | 14 +- 2 files changed, 1106 insertions(+), 808 deletions(-) diff --git a/app/src/main/java/jp/juggler/subwaytooter/util/HTMLDecoder.kt b/app/src/main/java/jp/juggler/subwaytooter/util/HTMLDecoder.kt index f1f65ad6..0ee0bcdc 100644 --- a/app/src/main/java/jp/juggler/subwaytooter/util/HTMLDecoder.kt +++ b/app/src/main/java/jp/juggler/subwaytooter/util/HTMLDecoder.kt @@ -1,9 +1,11 @@ package jp.juggler.subwaytooter.util +import android.graphics.Typeface import android.text.Spannable import android.text.SpannableString import android.text.SpannableStringBuilder import android.text.Spanned +import android.text.style.* import jp.juggler.subwaytooter.App1 import jp.juggler.subwaytooter.Pref import jp.juggler.subwaytooter.R @@ -14,32 +16,36 @@ import jp.juggler.subwaytooter.table.HighlightWord import jp.juggler.util.* import java.util.* import java.util.regex.Pattern +import kotlin.math.max +import kotlin.math.min object HTMLDecoder { - - private val log = LogCategory("HTMLDecoder") - - private const val DEBUG_HTML_PARSER = false - - private enum class OpenType { - Open, - Close, - OpenClose, - } - - private const val TAG_TEXT = "<>text" - private const val TAG_END = "<>end" - - private val reTag = "<(/?)(\\w+)".asciiPattern() - private val reTagEnd = "(/?)>$".asciiPattern() - private val reHref = "\\bhref=\"([^\"]*)\"".asciiPattern() - private val reAttribute = "\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2".asciiPattern() - private val reShortcode = ":[A-Za-z0-9_-]+:".asciiPattern() - - // Block-level Elements - // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements - // https://www.w3schools.com/html/html_blocks.asp - private val blockLevelElements = arrayOf( + + private val log = LogCategory("HTMLDecoder") + + private const val DEBUG_HTML_PARSER = false + + private enum class OpenType { + Open, + Close, + OpenClose, + } + + private const val TAG_TEXT = "<>text" + private const val TAG_END = "<>end" + + private val reTag = "<(/?)(\\w+)".asciiPattern() + private val reTagEnd = "(/?)>$".asciiPattern() + private val reHref = "\\bhref=\"([^\"]*)\"".asciiPattern() + private val reAttribute = "\\s+([A-Za-z0-9:_-]+)\\s*=([\"'])([^>]*?)\\2".asciiPattern() + private val reShortcode = ":[A-Za-z0-9_-]+:".asciiPattern() + private val reNotestockEmojiAlt = """\A:[^:]+:\z""".toRegex() + private val reUrlStart = """\Ahttps?://""".toRegex() + + // Block-level Elements + // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + // https://www.w3schools.com/html/html_blocks.asp + private val blockLevelElements = arrayOf( "address", "article", "aside", @@ -79,22 +85,21 @@ object HTMLDecoder { "pre", "progress", "section", + "textarea", "table", "tbody", - "textarea", "tfoot", - "th", "thead", "tr", "ul", "video" ).toHashSet() - - // Empty element - // https://developer.mozilla.org/en-US/docs/Glossary/Empty_element - // elements that cannot have any child nodes (i.e., nested elements or text nodes). - // In HTML, using a closing tag on an empty element is usually invalid. - private val emptyElements = arrayOf( + + // Empty element + // https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + // elements that cannot have any child nodes (i.e., nested elements or text nodes). + // In HTML, using a closing tag on an empty element is usually invalid. + private val emptyElements = arrayOf( "area", "base", "br", @@ -111,519 +116,800 @@ object HTMLDecoder { "track", "wbr" ).toHashSet() - - private val reEntity = "&(#?)(\\w+);".asciiPattern() - private val entity_map = HashMap() - private fun _addEntity(s : String, c : Char) { - entity_map[s] = c - } - - private fun chr(num : Int) : Char { - return num.toChar() - } - - fun decodeEntity(src : String?) : String { - src ?: return "" - var sb : StringBuilder? = null - val m = reEntity.matcher(src) - var last_end = 0 - while(m.find()) { - if(sb == null) sb = StringBuilder() - val start = m.start() - val end = m.end() - try { - if(start > last_end) { - sb.append(src.substring(last_end, start)) - } - val is_numeric = m.groupEx(1) !!.isNotEmpty() - val part = m.groupEx(2) !! - if(! is_numeric) { - val c = entity_map[part] - if(c != null) { - sb.append(c) - continue - } - } else { - try { - val cp = when { - part[0] == 'x' -> part.substring(1).toInt(16) - else -> part.toInt(10) - } - when { - Character.isBmpCodePoint(cp) -> sb.append(cp.toChar()) - - Character.isValidCodePoint(cp) -> { - sb.append(Character.highSurrogate(cp)) - sb.append(Character.lowSurrogate(cp)) - } - - else -> sb.append('?') - } - continue - } catch(ex : Throwable) { - log.trace(ex) - } - } - sb.append(src.substring(start, end)) - } finally { - last_end = end - } - } - - // 全くマッチしなかった - sb ?: return src - - val end = src.length - if(end > last_end) { - sb.append(src.substring(last_end, end)) - } - return sb.toString() - } - - fun encodeEntity(src : String) : String { - val size = src.length - val sb = StringBuilder() - sb.ensureCapacity(size) - for(i in 0 until size) { - when(val c = src[i]) { + + private val reEntity = "&(#?)(\\w+);".asciiPattern() + private val entity_map = HashMap() + private fun _addEntity(s: String, c: Char) { + entity_map[s] = c + } + + private fun chr(num: Int): Char { + return num.toChar() + } + + fun decodeEntity(src: String?): String { + src ?: return "" + var sb: StringBuilder? = null + val m = reEntity.matcher(src) + var last_end = 0 + while (m.find()) { + if (sb == null) sb = StringBuilder() + val start = m.start() + val end = m.end() + try { + if (start > last_end) { + sb.append(src.substring(last_end, start)) + } + val is_numeric = m.groupEx(1)!!.isNotEmpty() + val part = m.groupEx(2)!! + if (!is_numeric) { + val c = entity_map[part] + if (c != null) { + sb.append(c) + continue + } + } else { + try { + val cp = when { + part[0] == 'x' -> part.substring(1).toInt(16) + else -> part.toInt(10) + } + when { + Character.isBmpCodePoint(cp) -> sb.append(cp.toChar()) + + Character.isValidCodePoint(cp) -> { + sb.append(Character.highSurrogate(cp)) + sb.append(Character.lowSurrogate(cp)) + } + + else -> sb.append('?') + } + continue + } catch (ex: Throwable) { + log.trace(ex) + } + } + sb.append(src.substring(start, end)) + } finally { + last_end = end + } + } + + // 全くマッチしなかった + sb ?: return src + + val end = src.length + if (end > last_end) { + sb.append(src.substring(last_end, end)) + } + return sb.toString() + } + + fun encodeEntity(src: String): String { + val size = src.length + val sb = StringBuilder() + sb.ensureCapacity(size) + for (i in 0 until size) { + when (val c = src[i]) { '<' -> sb.append("<") '>' -> sb.append(">") '"' -> sb.append(""") '\'' -> sb.append("'") '&' -> sb.append("&") - else -> sb.append(c) - } - } - return sb.toString() - } - - ////////////////////////////////////////////////////////////////////////////////////// - - private val reDoctype = """\A\s*]*>""".asciiPattern(Pattern.CASE_INSENSITIVE) - private val reComment = """""".asciiPattern(Pattern.DOTALL) - - private fun String.quoteMeta() = Pattern.quote(this) - - private class TokenParser(srcArg : String) { - - val src : String - var next : Int = 0 - - var open_type = OpenType.OpenClose - var tag = "" - var text = "" - - init { - this.src = srcArg - .replaceFirst(reDoctype, "") - .replaceAll(reComment, " ") - eat() - } - - fun eat() { - // end? - if(next >= src.length) { - tag = TAG_END - open_type = OpenType.OpenClose - return - } - - // text ? - var end = src.indexOf('<', next) - if(end == - 1) end = src.length - if(end > next) { - this.text = src.substring(next, end) - this.tag = TAG_TEXT - this.open_type = OpenType.OpenClose - next = end - return - } - - // tag ? - end = src.indexOf('>', next) - if(end == - 1) { - end = src.length - } else { - ++ end - } - text = src.substring(next, end) - - next = end - - val m = reTag.matcher(text) - if(m.find()) { - val is_close = m.groupEx(1) !!.isNotEmpty() - tag = m.groupEx(2) !!.toLowerCase(Locale.JAPAN) - - val m2 = reTagEnd.matcher(text) - val is_openclose = when { - m2.find() -> m2.groupEx(1) !!.isNotEmpty() - else -> false - } - - open_type = when { - is_close -> OpenType.Close - is_openclose || emptyElements.contains(tag) -> OpenType.OpenClose - else -> OpenType.Open - } - - } else { - tag = TAG_TEXT - this.open_type = OpenType.OpenClose - } + else -> sb.append(c) + } + } + return sb.toString() + } + + ////////////////////////////////////////////////////////////////////////////////////// + + private val reDoctype = """\A\s*]*>""".asciiPattern(Pattern.CASE_INSENSITIVE) + private val reComment = """""".asciiPattern(Pattern.DOTALL) + + private fun String.quoteMeta() = Pattern.quote(this) + + private class TokenParser(srcArg: String) { + + val src: String + var next: Int = 0 + + var open_type = OpenType.OpenClose + var tag = "" + var text = "" + + init { + this.src = srcArg + .replaceFirst(reDoctype, "") + .replaceAll(reComment, " ") + eat() + } + + fun eat() { + // end? + if (next >= src.length) { + tag = TAG_END + open_type = OpenType.OpenClose + return + } + + // text ? + var end = src.indexOf('<', next) + if (end == -1) end = src.length + if (end > next) { + this.text = src.substring(next, end) + this.tag = TAG_TEXT + this.open_type = OpenType.OpenClose + next = end + return + } + + // tag ? + end = src.indexOf('>', next) + if (end == -1) { + end = src.length + } else { + ++end + } + text = src.substring(next, end) + + next = end + + val m = reTag.matcher(text) + if (m.find()) { + val is_close = m.groupEx(1)!!.isNotEmpty() + tag = m.groupEx(2)!!.toLowerCase(Locale.JAPAN) + + val m2 = reTagEnd.matcher(text) + val is_openclose = when { + m2.find() -> m2.groupEx(1)!!.isNotEmpty() + else -> false + } + + open_type = when { + is_close -> OpenType.Close + is_openclose || emptyElements.contains(tag) -> OpenType.OpenClose + else -> OpenType.Open + } + + } else { + tag = TAG_TEXT + this.open_type = OpenType.OpenClose + } + } + } + + // 末尾の改行を数える + private fun SpannableStringBuilder.lastBrCount(): Int { + var count = 0 + var pos = length - 1 + while (pos > 0) { + val c = this[pos--] + when { + c == '\n' -> { + ++count + continue + } + Character.isWhitespace(c) -> continue + else -> break + } + } + return count + } + + private val listMarkers = arrayOf("●", "-", "*", "・") + + private enum class ListType { + None, + Ordered, + Unordered, + Definition, + Quote + } + + private class ListContext( + val type: ListType, + val nestLevelOrdered: Int, + val nestLevelUnordered: Int, + val nestLevelDefinition: Int, + val nestLevelQuote: Int, + var order: Int = 0 + ) { + fun subOrdered() = ListContext( + type = ListType.Ordered, + nestLevelOrdered + 1, + nestLevelUnordered, + nestLevelDefinition, + nestLevelQuote + ) + fun subUnordered() = ListContext( + type = ListType.Unordered, + nestLevelOrdered, + nestLevelUnordered + 1, + nestLevelDefinition, + nestLevelQuote + ) + fun subDefinition() = ListContext( + type = ListType.Definition, + nestLevelOrdered, + nestLevelUnordered, + nestLevelDefinition + 1, + nestLevelQuote + ) + fun subQuote() = ListContext( + type = ListType.Quote, + nestLevelOrdered, + nestLevelUnordered, + nestLevelDefinition, + nestLevelQuote + 1 + ) + + val indent: String + get() = " ".repeat(2 * max(0, nestLevelOrdered + nestLevelUnordered + nestLevelDefinition - 1)) + + fun increment() = when (type) { + ListType.Ordered -> "${++order}. " + ListType.Unordered -> "${listMarkers[nestLevelUnordered % listMarkers.size]} " + ListType.Definition -> "" + else -> "" + } + + fun inList() = nestLevelOrdered + nestLevelUnordered + nestLevelDefinition > 0 + + fun quoteColor(): Int { + val quoteNestColors = MisskeyMarkdownDecoder.quoteNestColors + return quoteNestColors[nestLevelQuote % quoteNestColors.size] } } - + + // SpannableStringBuilderを行ごとに分解する + // 行末の改行文字は各行の末尾に残る + // 最終行の長さが0(改行文字もなし)だった場合は出力されない + fun SpannableStringBuilder.splitLines() = + ArrayList().also { dst -> + // 入力の末尾のtrim + var end = this.length + while (end > 0 && CharacterGroup.isWhitespace(this[end - 1].toInt())) --end + + // 入力の最初の非空白文字の位置を調べておく + var firstNonSpace = 0 + while (firstNonSpace = end) end else i + 1 + ++i + + // 行頭の空白を削る +// while (lineStart < lineEnd && +// this[lineStart] != '\n' && +// CharacterGroup.isWhitespace(this[lineStart].toInt()) +// ) ++lineStart + + // 最初の非空白文字以降の行を出力する + if(lineEnd > firstNonSpace) { + dst.add(this.subSequence(lineStart, lineEnd) as SpannableStringBuilder) + } + } + if(dst.isEmpty()){ + // ブロック要素は最低1行は存在するので、1行だけの要素を作る + dst.add(SpannableStringBuilder()) + } + } + + private val reLastLine = """(?:\A|\n)([^\n]*)\z""".toRegex() + private class Node { - - val child_nodes = ArrayList() - - val tag : String - val text : String - - private val href : String? - get() { - val m = reHref.matcher(text) - if(m.find()) { - val href = decodeEntity(m.groupEx(1)) - if(href.isNotEmpty()) { - return href - } - } - return null - } - - constructor() { - tag = "<>root" - text = "" - } - - constructor(t : TokenParser) { - this.tag = t.tag - this.text = t.text - } - - fun addChild(t : TokenParser, indent : String) { - if(DEBUG_HTML_PARSER) log.d("addChild: $indent($tag") - while(t.tag != TAG_END) { - - // 閉じるタグ - if(t.open_type == OpenType.Close) { - if(t.tag != this.tag) { - // 閉じるタグが現在の階層とマッチしないなら無視する - log.w("unexpected close tag! ${t.tag}") - t.eat() - continue - } - // この階層の終端 - t.eat() - break - } - - val open_type = t.open_type - val child = Node(t) - child_nodes.add(child) - t.eat() - - if(DEBUG_HTML_PARSER) - log.d("addChild: $indent|${child.tag} $open_type [${child.text.quoteMeta()}]") - - if(open_type == OpenType.Open) { - child.addChild(t, "$indent--") - } - } - if(DEBUG_HTML_PARSER) log.d("addChild: $indent)$tag") - } - - fun encodeSpan( - options : DecodeOptions, - sb : SpannableStringBuilder + + val child_nodes = ArrayList() + + val tag: String + val text: String + + private val href: String? + get() { + val m = reHref.matcher(text) + if (m.find()) { + val href = decodeEntity(m.groupEx(1)) + if (href.isNotEmpty()) { + return href + } + } + return null + } + + constructor() { + tag = "<>root" + text = "" + } + + constructor(t: TokenParser) { + this.tag = t.tag + this.text = t.text + } + + fun addChild(t: TokenParser, indent: String) { + if (DEBUG_HTML_PARSER) log.d("addChild: $indent($tag") + while (t.tag != TAG_END) { + + // 閉じるタグ + if (t.open_type == OpenType.Close) { + if (t.tag != this.tag) { + // 閉じるタグが現在の階層とマッチしないなら無視する + log.w("unexpected close tag! ${t.tag}") + t.eat() + continue + } + // この階層の終端 + t.eat() + break + } + + val open_type = t.open_type + val child = Node(t) + child_nodes.add(child) + t.eat() + + if (DEBUG_HTML_PARSER) + log.d("addChild: $indent|${child.tag} $open_type [${child.text.quoteMeta()}]") + + if (open_type == OpenType.Open) { + child.addChild(t, "$indent--") + } + } + if (DEBUG_HTML_PARSER) log.d("addChild: $indent)$tag") + } + + fun encodeSpan( + options: DecodeOptions, + sb: SpannableStringBuilder, + listContext: ListContext ) { - if(TAG_TEXT == tag) { - if(options.context != null && options.decodeEmoji) { - sb.append(options.decodeEmoji(decodeEntity(text))) - } else { - sb.append(decodeEntity(text)) + val isBlock = blockLevelElements.contains(tag) + when(tag){ + TAG_TEXT->{ + if (options.context != null && options.decodeEmoji) { + sb.append(options.decodeEmoji(decodeEntity(text))) + } else { + sb.append(decodeEntity(text)) + } + return } - return - } - - val sb_tmp = when(tag) { - "a", "style", "script" -> SpannableStringBuilder() - else -> sb - } - - if("img" == tag) { - var replaced = false - val reNotestockEmojiAlt= """\A:[^:]+:\z""".toRegex() - if(options.unwrapEmojiImageTag) { + "img"->{ val attrs = parseAttributes(text) - val cssClass = attrs["class"] - val title = attrs["title"] - val url = attrs["src"] - val alt = attrs["alt"] - if(cssClass != null - && title != null - && cssClass.contains("emojione") - && reShortcode.matcher(title).find() - ) { - replaced = true - sb_tmp.append(options.decodeEmoji(title)) - }else if( cssClass == "emoji" && url!=null && alt !=null && reNotestockEmojiAlt.matches(alt) ){ - // notestock custom emoji - replaced = true - sb_tmp.run{ - val start = length - append(alt) - val end = length - setSpan( - NetworkEmojiSpan(url, scale = options.enlargeCustomEmoji), - start, - end, - Spanned.SPAN_EXCLUSIVE_EXCLUSIVE - ) + + if (options.unwrapEmojiImageTag) { + val cssClass = attrs["class"] + val title = attrs["title"] + val url = attrs["src"] + val alt = attrs["alt"] + if (cssClass != null + && title != null + && cssClass.contains("emojione") + && reShortcode.matcher(title).find() + ) { + sb.append(options.decodeEmoji(title)) + return + } else if (cssClass == "emoji" && url != null && alt != null && reNotestockEmojiAlt.matches(alt)) { + // notestock custom emoji + sb.run { + val start = length + append(alt) + val end = length + setSpan( + NetworkEmojiSpan(url, scale = options.enlargeCustomEmoji), + start, + end, + Spanned.SPAN_EXCLUSIVE_EXCLUSIVE + ) + } + return } } - } - - if(! replaced) { - sb_tmp.append("") - } - } else { - for(child in child_nodes) { - child.encodeSpan(options, sb_tmp) - } - // sb_tmpを作成したa 以外のタグ(style,script)は読み捨てる - } - - if("a" == tag) { - - val linkInfo = formatLinkCaption(options, sb_tmp, href ?: "") - val caption = linkInfo.caption - if(caption.isNotEmpty()) { - val start = sb.length - sb.append(linkInfo.caption) - val end = sb.length - if(linkInfo.url.isNotEmpty()) { - val span = MyClickableSpan(linkInfo) - sb.setSpan(span, start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) - } - - // リンクスパンを設定した後に色をつける - val list = options.highlightTrie?.matchList(sb, start, end) - if(list != null) { - for(range in list) { - val word = HighlightWord.load(range.word) ?: continue - sb.setSpan( - HighlightSpan(word.color_fg, word.color_bg), - range.start, - range.end, - Spanned.SPAN_EXCLUSIVE_EXCLUSIVE - ) - - if(word.sound_type != HighlightWord.SOUND_TYPE_NONE) { - if(options.highlightSound == null) options.highlightSound = word - } - - if(word.speech != 0) { - if(options.highlightSpeech == null) options.highlightSpeech = word - } - - if(options.highlightAny == null) options.highlightAny = word + + sb.append("") + return + } + + "script","style" -> return + + "th", "td" -> sb.append("|") + + else -> if( isBlock && tag !="script" && tag != "style" ){ + val lastLine = reLastLine.find(sb)?.groupValues?.firstOrNull() ?: "" + if(CharacterGroup.reNotWhitespace.matcher(lastLine).find()){ + sb.append("\n") } } } - - when { - // 空のテキストには改行を追加しない - sb.isEmpty() -> { + + var spanStart = 0 + + val tmpFlusherOriginal: (SpannableStringBuilder) -> Unit = { + + when (tag) { + "s", "strike", "del" -> { + sb.setSpan(StrikethroughSpan(), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "em" -> { + sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.ITALIC)), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "strong" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "tr" -> { + sb.append("|") + } + + "style", "script" -> { + // sb_tmpにレンダリングした分は読み捨てる + } + "h1" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(1.8f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "h2" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(1.6f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "h3" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(1.4f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "h4" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(1.2f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "h5" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(1.0f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "h6" -> { + sb.setSpan(StyleSpan(Typeface.BOLD), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(0.8f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "pre" -> { + sb.setSpan(BackgroundColorSpan(0x40808080), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(RelativeSizeSpan(0.7f), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(fontSpan(Typeface.MONOSPACE), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "code" ->{ + sb.setSpan(BackgroundColorSpan(0x40808080), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.setSpan(fontSpan(Typeface.MONOSPACE), spanStart, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + "hr" -> sb.append("----------") } - - // 改行タグ - "br" == tag -> sb.append('\n') - + } + + val tmpFlusher = when (tag) { + "a" -> { + { sb_tmp -> + val linkInfo = formatLinkCaption(options, sb_tmp, href ?: "") + val caption = linkInfo.caption + if (caption.isNotEmpty()) { + val start = sb.length + sb.append(linkInfo.caption) + val end = sb.length + if (linkInfo.url.isNotEmpty()) { + val span = MyClickableSpan(linkInfo) + sb.setSpan(span, start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + + // リンクスパンを設定した後に色をつける + val list = options.highlightTrie?.matchList(sb, start, end) + if (list != null) { + for (range in list) { + val word = HighlightWord.load(range.word) ?: continue + sb.setSpan( + HighlightSpan(word.color_fg, word.color_bg), + range.start, + range.end, + Spanned.SPAN_EXCLUSIVE_EXCLUSIVE + ) + + if (word.sound_type != HighlightWord.SOUND_TYPE_NONE) { + if (options.highlightSound == null) options.highlightSound = word + } + + if (word.speech != 0) { + if (options.highlightSpeech == null) options.highlightSpeech = word + } + + if (options.highlightAny == null) options.highlightAny = word + } + } + } + } + } + + "style", "script" -> { + { + // 読み捨てる + // 最適化によりtmpFlusherOriginalとこのラムダが同一オブジェクトにならないようにする + } + } + + "blockquote" -> { + { sb_tmp -> + val bg_color = listContext.quoteColor() + + // TextView の文字装飾では「ブロック要素の入れ子」を表現できない + // 内容の各行の始端に何か追加するというのがまずキツい + // しかし各行の頭に引用マークをつけないと引用のネストで意味が通じなくなってしまう + + + val startItalic = sb.length + sb_tmp.splitLines().forEach { line -> + val lineStart = sb.length + sb.append("> ") + sb.setSpan(BackgroundColorSpan(bg_color), lineStart, lineStart + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + sb.append(line) + } + sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.ITALIC)), startItalic, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + } + + "li" -> { + { sb_tmp -> + val lineHeader1 = listContext.increment() + val lineHeader2 = " ".repeat(lineHeader1.length) + sb_tmp.splitLines().forEachIndexed { i, line -> + sb.append(if (i == 0) lineHeader1 else lineHeader2) + sb.append(line) + } + } + } + + "dt" -> { + { sb_tmp -> + val prefix = listContext.increment() + val startBold = sb.length + sb_tmp.splitLines().forEachIndexed { i, line -> + sb.append(prefix) + sb.append(line) + } + sb.setSpan(fontSpan(Typeface.defaultFromStyle(Typeface.BOLD)), startBold, sb.length, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE) + } + } + + "dd" -> { + { sb_tmp -> + val prefix = listContext.increment() + " " + sb_tmp.splitLines().forEachIndexed { i, line -> + sb.append(prefix) + sb.append(line) + } + } + } + + else -> tmpFlusherOriginal + } + + val sb_tmp = if(tmpFlusher == tmpFlusherOriginal) { + sb + }else { + SpannableStringBuilder() + } + + spanStart = sb_tmp.length + + val childListContext = when (tag) { + "ol" -> listContext.subOrdered() + "ul" -> listContext.subUnordered() + "dl" -> listContext.subDefinition() + "blockquote" -> listContext.subQuote() + else -> listContext + } + fun String.tagsCanRemoveNearSpaces() = when(this){ + "li","ol","ul","dl","dt","dd","blockquote","h1","h2","h3","h4","h5","h6", + "table","tbody","thead","tfoot","tr","td","th" ->true + else->false + } + val childLast = child_nodes.size-1 + child_nodes.forEachIndexed{ i,child-> + if(child.tag == TAG_TEXT && child.text.isBlank() && isBlock){ + val preNode = child_nodes.elementAtOrNull(i-1) + val nextNode = child_nodes.elementAtOrNull(i+1) + if(preNode?.tag?.tagsCanRemoveNearSpaces()== true || + nextNode?.tag?.tagsCanRemoveNearSpaces()==true || + ((i==0 || i==childLast) && tag.tagsCanRemoveNearSpaces()) + ){ + return@forEachIndexed + } + } + child.encodeSpan(options, sb_tmp, childListContext) + } + + tmpFlusher(sb_tmp) + + if (isBlock) { // ブロック要素 - blockLevelElements.contains(tag) -> { - // 末尾の改行を数える - var last_br_count = 0 - var last = sb.length - 1 - loop@ while(last > 0) { - val c = sb[last --] - when { - c == '\n' -> { - ++ last_br_count - continue@loop - } - - Character.isWhitespace(c) -> continue@loop - else -> break@loop - } - } - // 末尾の改行が2文字未満なら改行を追加する - while(last_br_count ++ < 2) sb.append('\n') + // 末尾の改行が2文字未満なら改行を追加する + var appendCount = 2 - sb.lastBrCount() + if( listContext.inList()) appendCount = min(1,appendCount) + when(tag){ + "tr" -> appendCount = min(1,appendCount) + "thead","tfoot","tbody" -> appendCount = 0 } + repeat(appendCount){ sb.append( "\n" )} + } else { + // インライン要素で改行タグでテキストがカラでないなら、改行を追加する + if ("br" == tag && sb.isNotEmpty()) sb.append('\n') } - } - } - - // split attributes - private fun parseAttributes(text : String) : HashMap { - val dst = HashMap() - val m = reAttribute.matcher(text) - while(m.find()) { - val name = m.groupEx(1) !!.toLowerCase(Locale.JAPAN) - val value = decodeEntity(m.groupEx(3)) - dst[name] = value - } - return dst - } - - fun decodeHTML(options : DecodeOptions, src : String?) : SpannableStringBuilder { - - if(options.linkHelper?.isMisskey == true && ! options.forceHtml) { - return MisskeyMarkdownDecoder.decodeMarkdown(options, src) - } - - val sb = SpannableStringBuilder() - - try { - if(src != null) { - // parse HTML node tree - val tracker = TokenParser(src) - val rootNode = Node() - while(TAG_END != tracker.tag) { - rootNode.addChild(tracker, "") - } - - // encode to SpannableStringBuilder - rootNode.encodeSpan(options, sb) - - // 末尾の空白を取り除く - sb.removeEndWhitespaces() - } - } catch(ex : Throwable) { - log.trace(ex) - } - - return sb - } - - fun decodeMentions( - linkHelper : LinkHelper, - status:TootStatus - ) : Spannable? { - val mentionList : List? = status.mentions - val link_tag : Any? = status + } + } - if(mentionList == null || mentionList.isEmpty()) return null + // split attributes + private fun parseAttributes(text: String): HashMap { + val dst = HashMap() + val m = reAttribute.matcher(text) + while (m.find()) { + val name = m.groupEx(1)!!.toLowerCase(Locale.JAPAN) + val value = decodeEntity(m.groupEx(3)) + dst[name] = value + } + return dst + } - val sb = SpannableStringBuilder() - for(item in mentionList) { - if(sb.isNotEmpty()) sb.append(" ") - - val fullAcct = getFullAcctOrNull( + fun decodeHTML(options: DecodeOptions, src: String?): SpannableStringBuilder { + + if (options.linkHelper?.isMisskey == true && !options.forceHtml) { + return MisskeyMarkdownDecoder.decodeMarkdown(options, src) + } + + val sb = SpannableStringBuilder() + + try { + if (src != null) { + // parse HTML node tree + val tracker = TokenParser(src) + val rootNode = Node() + while (TAG_END != tracker.tag) { + rootNode.addChild(tracker, "") + } + + // encode to SpannableStringBuilder + rootNode.encodeSpan(options, sb, ListContext(type = ListType.None, 0, 0, 0,0)) + + // 末尾の空白を取り除く + sb.removeEndWhitespaces() + } + } catch (ex: Throwable) { + log.trace(ex) + } + + return sb + } + + fun decodeMentions( + linkHelper: LinkHelper, + status: TootStatus + ): Spannable? { + val mentionList: List? = status.mentions + val link_tag: Any = status + + if (mentionList == null || mentionList.isEmpty()) return null + + val sb = SpannableStringBuilder() + for (item in mentionList) { + if (sb.isNotEmpty()) sb.append(" ") + + val fullAcct = getFullAcctOrNull( item.acct, item.url, linkHelper, status.account ) - - val linkInfo = if(fullAcct != null) { - LinkInfo( + + val linkInfo = if (fullAcct != null) { + LinkInfo( url = item.url, - caption = "@${(if(Pref.bpMentionFullAcct(App1.pref)) fullAcct else item.acct).pretty}", + caption = "@${(if (Pref.bpMentionFullAcct(App1.pref)) fullAcct else item.acct).pretty}", ac = AcctColor.load(fullAcct), mention = item, tag = link_tag ) - } else { - LinkInfo( + } else { + LinkInfo( url = item.url, caption = "@${item.acct.pretty}", ac = null, mention = item, tag = link_tag ) - } - - val start = sb.length - sb.append(linkInfo.caption) - val end = sb.length - if(end > start) - sb.setSpan( + } + + val start = sb.length + sb.append(linkInfo.caption) + val end = sb.length + if (end > start) + sb.setSpan( MyClickableSpan(linkInfo), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE ) - } - return sb - } - - private val reNormalLink = """\A(\w+://)[^/]*""".asciiPattern() - - // URLの表記を短くする - // Punycode のデコードはサーバ側で行われる?ので、ここでは元リンクの表示テキストを元にURL短縮を試みる - fun shortenUrl(originalUrl : CharSequence) : CharSequence { - try { - - val m = reNormalLink.matcher(originalUrl) - if(m.find()) return SpannableStringBuilder().apply { - // 文字装飾をそのまま残したいのでsubSequenceを返す - - // WebUIでは非表示スパンに隠れているが、 - // 通常のリンクなら スキーマ名 + :// が必ず出現する - val schema = m.groupEx(1) - val start = if(schema?.startsWith("http") == true) { - // http,https の場合はスキーマ表記を省略する - schema.length - } else { - // その他のスキーマもMastodonでは許容される - // この場合はスキーマ名を省略しない - // https://github.com/tootsuite/mastodon/pull/7810 - 0 - } - - val length = originalUrl.length - val limit = m.end() + 10 // 10 characters for ( path + query + fragment ) - if(length > limit) { - append(originalUrl.subSequence(start, limit)) - append('…') - } else { - append(originalUrl.subSequence(start, length)) - } - } - } catch(ex : Throwable) { - log.trace(ex) - } - - return originalUrl - } - - private val reNicodic = """\Ahttps?://dic.nicovideo.jp/a/([^?#/]+)""".asciiPattern() - - private fun formatLinkCaption( - options : DecodeOptions, - originalCaption : CharSequence, - href : String + } + return sb + } + + private val reNormalLink = """\A(\w+://)[^/]*""".asciiPattern() + + // URLの表記を短くする + // Punycode のデコードはサーバ側で行われる?ので、ここでは元リンクの表示テキストを元にURL短縮を試みる + fun shortenUrl(originalUrl: CharSequence): CharSequence { + try { + + val m = reNormalLink.matcher(originalUrl) + if (m.find()) return SpannableStringBuilder().apply { + // 文字装飾をそのまま残したいのでsubSequenceを返す + + // WebUIでは非表示スパンに隠れているが、 + // 通常のリンクなら スキーマ名 + :// が必ず出現する + val schema = m.groupEx(1) + val start = if (schema?.startsWith("http") == true) { + // http,https の場合はスキーマ表記を省略する + schema.length + } else { + // その他のスキーマもMastodonでは許容される + // この場合はスキーマ名を省略しない + // https://github.com/tootsuite/mastodon/pull/7810 + 0 + } + + val length = originalUrl.length + val limit = m.end() + 10 // 10 characters for ( path + query + fragment ) + if (length > limit) { + append(originalUrl.subSequence(start, limit)) + append('…') + } else { + append(originalUrl.subSequence(start, length)) + } + } + } catch (ex: Throwable) { + log.trace(ex) + } + + return originalUrl + } + + private val reNicodic = """\Ahttps?://dic.nicovideo.jp/a/([^?#/]+)""".asciiPattern() + + private fun formatLinkCaption( + options: DecodeOptions, + originalCaption: CharSequence, + href: String ) = LinkInfo( caption = originalCaption, url = href, tag = options.linkTag ).also { linkInfo -> - when(originalCaption.firstOrNull()) { - - // #hashtag は変更しない + when (originalCaption.firstOrNull()) { + + // #hashtag は変更しない '#' -> { } - - // @mention + + // @mention '@' -> { - - fun afterFullAcctResolved(fullAcct:Acct){ + + fun afterFullAcctResolved(fullAcct: Acct) { linkInfo.ac = AcctColor.load(fullAcct) - if(options.mentionFullAcct || Pref.bpMentionFullAcct(App1.pref)) { + if (options.mentionFullAcct || Pref.bpMentionFullAcct(App1.pref)) { linkInfo.caption = "@${fullAcct.pretty}" } } @@ -631,19 +917,19 @@ object HTMLDecoder { // https://github.com/tateisu/SubwayTooter/issues/108 // check mentions to skip getAcctFromUrl val mention = options.mentions?.find { it.url == href } - if( mention != null ){ + if (mention != null) { getFullAcctOrNull( mention.acct, href, options.linkHelper, options.mentionDefaultHostDomain - )?.let{afterFullAcctResolved(it)} - }else{ + )?.let { afterFullAcctResolved(it) } + } else { // case A // Account.note does not have mentions metadata. // fallback to resolve acct by mention URL. - + // case B // https://mastodon.juggler.jp/@tateisu/104897039191509317 // リモートのMisskeyからMastodonに流れてきた投稿をSTで見ると @@ -653,14 +939,14 @@ object HTMLDecoder { // メンション情報がない場合がありうる。 // acctのドメイン部分を補う際、閲覧者のドメインや投稿者のドメインへの変換を試みる - + val rawAcct = Acct.parse(originalCaption.toString().substring(1)) getFullAcctOrNull( rawAcct, href, options.linkHelper, options.mentionDefaultHostDomain - )?.let{ fullAcct -> + )?.let { fullAcct -> // mentionメタデータを捏造する linkInfo.mention = TootMention( @@ -674,326 +960,326 @@ object HTMLDecoder { } } } - - else -> { - - val context = options.context - - when { - - context == null || ! options.short || href.isEmpty() -> { - } - - options.isMediaAttachment(href) -> { - // 添付メディアのURLなら絵文字に変換する - linkInfo.caption = SpannableString(href).apply { - setSpan( + + else -> { + + val context = options.context + + when { + + context == null || !options.short || href.isEmpty() -> { + } + + options.isMediaAttachment(href) -> { + // 添付メディアのURLなら絵文字に変換する + linkInfo.caption = SpannableString(href).apply { + setSpan( EmojiImageSpan(context, R.drawable.emj_1f5bc_fe0f), 0, length, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE ) - } - return@also - } - - else -> { - // ニコニコ大百科のURLを変える - val m = reNicodic.matcher(href) - when { - m.find() -> { - linkInfo.caption = - SpannableString( + } + return@also + } + + else -> { + // ニコニコ大百科のURLを変える + val m = reNicodic.matcher(href) + when { + m.find() -> { + linkInfo.caption = + SpannableString( "${ - m.groupEx(1) !!.decodePercent() + m.groupEx(1)!!.decodePercent() }:nicodic:" ).apply { - setSpan( + setSpan( EmojiImageSpan(context, R.drawable.nicodic), length - 9, length, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE ) - } - return@also - } - - else -> linkInfo.caption = shortenUrl(originalCaption) - } - } - } - } - } - } - - private fun init1() { - _addEntity("amp", '&') // ampersand - _addEntity("gt", '>') // greater than - _addEntity("lt", '<') // less than - _addEntity("quot", '"') // double quote - _addEntity("apos", '\'') // single quote - _addEntity("AElig", chr(198)) // capital AE diphthong (ligature) - _addEntity("Aacute", chr(193)) // capital A, acute accent - _addEntity("Acirc", chr(194)) // capital A, circumflex accent - _addEntity("Agrave", chr(192)) // capital A, grave accent - _addEntity("Aring", chr(197)) // capital A, ring - _addEntity("Atilde", chr(195)) // capital A, tilde - _addEntity("Auml", chr(196)) // capital A, dieresis or umlaut mark - _addEntity("Ccedil", chr(199)) // capital C, cedilla - _addEntity("ETH", chr(208)) // capital Eth, Icelandic - _addEntity("Eacute", chr(201)) // capital E, acute accent - _addEntity("Ecirc", chr(202)) // capital E, circumflex accent - _addEntity("Egrave", chr(200)) // capital E, grave accent - _addEntity("Euml", chr(203)) // capital E, dieresis or umlaut mark - _addEntity("Iacute", chr(205)) // capital I, acute accent - _addEntity("Icirc", chr(206)) // capital I, circumflex accent - _addEntity("Igrave", chr(204)) // capital I, grave accent - _addEntity("Iuml", chr(207)) // capital I, dieresis or umlaut mark - _addEntity("Ntilde", chr(209)) // capital N, tilde - _addEntity("Oacute", chr(211)) // capital O, acute accent - _addEntity("Ocirc", chr(212)) // capital O, circumflex accent - _addEntity("Ograve", chr(210)) // capital O, grave accent - _addEntity("Oslash", chr(216)) // capital O, slash - _addEntity("Otilde", chr(213)) // capital O, tilde - _addEntity("Ouml", chr(214)) // capital O, dieresis or umlaut mark - _addEntity("THORN", chr(222)) // capital THORN, Icelandic - _addEntity("Uacute", chr(218)) // capital U, acute accent - _addEntity("Ucirc", chr(219)) // capital U, circumflex accent - _addEntity("Ugrave", chr(217)) // capital U, grave accent - _addEntity("Uuml", chr(220)) // capital U, dieresis or umlaut mark - _addEntity("Yacute", chr(221)) // capital Y, acute accent - _addEntity("aacute", chr(225)) // small a, acute accent - _addEntity("acirc", chr(226)) // small a, circumflex accent - _addEntity("aelig", chr(230)) // small ae diphthong (ligature) - _addEntity("agrave", chr(224)) // small a, grave accent - _addEntity("aring", chr(229)) // small a, ring - _addEntity("atilde", chr(227)) // small a, tilde - _addEntity("auml", chr(228)) // small a, dieresis or umlaut mark - _addEntity("ccedil", chr(231)) // small c, cedilla - _addEntity("eacute", chr(233)) // small e, acute accent - _addEntity("ecirc", chr(234)) // small e, circumflex accent - _addEntity("egrave", chr(232)) // small e, grave accent - _addEntity("eth", chr(240)) // small eth, Icelandic - _addEntity("euml", chr(235)) // small e, dieresis or umlaut mark - _addEntity("iacute", chr(237)) // small i, acute accent - _addEntity("icirc", chr(238)) // small i, circumflex accent - _addEntity("igrave", chr(236)) // small i, grave accent - _addEntity("iuml", chr(239)) // small i, dieresis or umlaut mark - _addEntity("ntilde", chr(241)) // small n, tilde - _addEntity("oacute", chr(243)) // small o, acute accent - _addEntity("ocirc", chr(244)) // small o, circumflex accent - _addEntity("ograve", chr(242)) // small o, grave accent - _addEntity("oslash", chr(248)) // small o, slash - _addEntity("otilde", chr(245)) // small o, tilde - _addEntity("ouml", chr(246)) // small o, dieresis or umlaut mark - _addEntity("szlig", chr(223)) // small sharp s, German (sz ligature) - _addEntity("thorn", chr(254)) // small thorn, Icelandic - _addEntity("uacute", chr(250)) // small u, acute accent - _addEntity("ucirc", chr(251)) // small u, circumflex accent - _addEntity("ugrave", chr(249)) // small u, grave accent - _addEntity("uuml", chr(252)) // small u, dieresis or umlaut mark - _addEntity("yacute", chr(253)) // small y, acute accent - _addEntity("yuml", chr(255)) // small y, dieresis or umlaut mark - _addEntity("copy", chr(169)) // copyright sign - _addEntity("reg", chr(174)) // registered sign - _addEntity("nbsp", chr(160)) // non breaking space - _addEntity("iexcl", chr(161)) - _addEntity("cent", chr(162)) - _addEntity("pound", chr(163)) - _addEntity("curren", chr(164)) - _addEntity("yen", chr(165)) - _addEntity("brvbar", chr(166)) - _addEntity("sect", chr(167)) - _addEntity("uml", chr(168)) - _addEntity("ordf", chr(170)) - _addEntity("laquo", chr(171)) - _addEntity("not", chr(172)) - _addEntity("shy", chr(173)) - _addEntity("macr", chr(175)) - _addEntity("deg", chr(176)) - _addEntity("plusmn", chr(177)) - _addEntity("sup1", chr(185)) - _addEntity("sup2", chr(178)) - _addEntity("sup3", chr(179)) - _addEntity("acute", chr(180)) - _addEntity("micro", chr(181)) - _addEntity("para", chr(182)) - _addEntity("middot", chr(183)) - _addEntity("cedil", chr(184)) - _addEntity("ordm", chr(186)) - _addEntity("raquo", chr(187)) - _addEntity("frac14", chr(188)) - _addEntity("frac12", chr(189)) - _addEntity("frac34", chr(190)) - _addEntity("iquest", chr(191)) - _addEntity("times", chr(215)) - - } - - private fun init2() { - _addEntity("divide", chr(247)) - _addEntity("OElig", chr(338)) - _addEntity("oelig", chr(339)) - _addEntity("Scaron", chr(352)) - _addEntity("scaron", chr(353)) - _addEntity("Yuml", chr(376)) - _addEntity("fnof", chr(402)) - _addEntity("circ", chr(710)) - _addEntity("tilde", chr(732)) - _addEntity("Alpha", chr(913)) - _addEntity("Beta", chr(914)) - _addEntity("Gamma", chr(915)) - _addEntity("Delta", chr(916)) - _addEntity("Epsilon", chr(917)) - _addEntity("Zeta", chr(918)) - _addEntity("Eta", chr(919)) - _addEntity("Theta", chr(920)) - _addEntity("Iota", chr(921)) - _addEntity("Kappa", chr(922)) - _addEntity("Lambda", chr(923)) - _addEntity("Mu", chr(924)) - _addEntity("Nu", chr(925)) - _addEntity("Xi", chr(926)) - _addEntity("Omicron", chr(927)) - _addEntity("Pi", chr(928)) - _addEntity("Rho", chr(929)) - _addEntity("Sigma", chr(931)) - _addEntity("Tau", chr(932)) - _addEntity("Upsilon", chr(933)) - _addEntity("Phi", chr(934)) - _addEntity("Chi", chr(935)) - _addEntity("Psi", chr(936)) - _addEntity("Omega", chr(937)) - _addEntity("alpha", chr(945)) - _addEntity("beta", chr(946)) - _addEntity("gamma", chr(947)) - _addEntity("delta", chr(948)) - _addEntity("epsilon", chr(949)) - _addEntity("zeta", chr(950)) - _addEntity("eta", chr(951)) - _addEntity("theta", chr(952)) - _addEntity("iota", chr(953)) - _addEntity("kappa", chr(954)) - _addEntity("lambda", chr(955)) - _addEntity("mu", chr(956)) - _addEntity("nu", chr(957)) - _addEntity("xi", chr(958)) - _addEntity("omicron", chr(959)) - _addEntity("pi", chr(960)) - _addEntity("rho", chr(961)) - _addEntity("sigmaf", chr(962)) - _addEntity("sigma", chr(963)) - _addEntity("tau", chr(964)) - _addEntity("upsilon", chr(965)) - _addEntity("phi", chr(966)) - _addEntity("chi", chr(967)) - _addEntity("psi", chr(968)) - _addEntity("omega", chr(969)) - _addEntity("thetasym", chr(977)) - _addEntity("upsih", chr(978)) - _addEntity("piv", chr(982)) - _addEntity("ensp", chr(8194)) - _addEntity("emsp", chr(8195)) - _addEntity("thinsp", chr(8201)) - _addEntity("zwnj", chr(8204)) - _addEntity("zwj", chr(8205)) - _addEntity("lrm", chr(8206)) - _addEntity("rlm", chr(8207)) - _addEntity("ndash", chr(8211)) - _addEntity("mdash", chr(8212)) - _addEntity("lsquo", chr(8216)) - _addEntity("rsquo", chr(8217)) - _addEntity("sbquo", chr(8218)) - _addEntity("ldquo", chr(8220)) - _addEntity("rdquo", chr(8221)) - _addEntity("bdquo", chr(8222)) - _addEntity("dagger", chr(8224)) - _addEntity("Dagger", chr(8225)) - _addEntity("bull", chr(8226)) - _addEntity("hellip", chr(8230)) - _addEntity("permil", chr(8240)) - _addEntity("prime", chr(8242)) - _addEntity("Prime", chr(8243)) - _addEntity("lsaquo", chr(8249)) - _addEntity("rsaquo", chr(8250)) - _addEntity("oline", chr(8254)) - _addEntity("frasl", chr(8260)) - _addEntity("euro", chr(8364)) - _addEntity("image", chr(8465)) - _addEntity("weierp", chr(8472)) - _addEntity("real", chr(8476)) - _addEntity("trade", chr(8482)) - _addEntity("alefsym", chr(8501)) - _addEntity("larr", chr(8592)) - _addEntity("uarr", chr(8593)) - _addEntity("rarr", chr(8594)) - _addEntity("darr", chr(8595)) - _addEntity("harr", chr(8596)) - _addEntity("crarr", chr(8629)) - _addEntity("lArr", chr(8656)) - - } - - private fun init3() { - _addEntity("uArr", chr(8657)) - _addEntity("rArr", chr(8658)) - _addEntity("dArr", chr(8659)) - _addEntity("hArr", chr(8660)) - _addEntity("forall", chr(8704)) - _addEntity("part", chr(8706)) - _addEntity("exist", chr(8707)) - _addEntity("empty", chr(8709)) - _addEntity("nabla", chr(8711)) - _addEntity("isin", chr(8712)) - _addEntity("notin", chr(8713)) - _addEntity("ni", chr(8715)) - _addEntity("prod", chr(8719)) - _addEntity("sum", chr(8721)) - _addEntity("minus", chr(8722)) - _addEntity("lowast", chr(8727)) - _addEntity("radic", chr(8730)) - _addEntity("prop", chr(8733)) - _addEntity("infin", chr(8734)) - _addEntity("ang", chr(8736)) - _addEntity("and", chr(8743)) - _addEntity("or", chr(8744)) - _addEntity("cap", chr(8745)) - _addEntity("cup", chr(8746)) - _addEntity("int", chr(8747)) - _addEntity("there4", chr(8756)) - _addEntity("sim", chr(8764)) - _addEntity("cong", chr(8773)) - _addEntity("asymp", chr(8776)) - _addEntity("ne", chr(8800)) - _addEntity("equiv", chr(8801)) - _addEntity("le", chr(8804)) - _addEntity("ge", chr(8805)) - _addEntity("sub", chr(8834)) - _addEntity("sup", chr(8835)) - _addEntity("nsub", chr(8836)) - _addEntity("sube", chr(8838)) - _addEntity("supe", chr(8839)) - _addEntity("oplus", chr(8853)) - _addEntity("otimes", chr(8855)) - _addEntity("perp", chr(8869)) - _addEntity("sdot", chr(8901)) - _addEntity("lceil", chr(8968)) - _addEntity("rceil", chr(8969)) - _addEntity("lfloor", chr(8970)) - _addEntity("rfloor", chr(8971)) - _addEntity("lang", chr(9001)) - _addEntity("rang", chr(9002)) - _addEntity("loz", chr(9674)) - _addEntity("spades", chr(9824)) - _addEntity("clubs", chr(9827)) - _addEntity("hearts", chr(9829)) - _addEntity("diams", chr(9830)) - - } - - init { - init1() - init2() - init3() - } + } + return@also + } + + else -> linkInfo.caption = shortenUrl(originalCaption) + } + } + } + } + } + } + + private fun init1() { + _addEntity("amp", '&') // ampersand + _addEntity("gt", '>') // greater than + _addEntity("lt", '<') // less than + _addEntity("quot", '"') // double quote + _addEntity("apos", '\'') // single quote + _addEntity("AElig", chr(198)) // capital AE diphthong (ligature) + _addEntity("Aacute", chr(193)) // capital A, acute accent + _addEntity("Acirc", chr(194)) // capital A, circumflex accent + _addEntity("Agrave", chr(192)) // capital A, grave accent + _addEntity("Aring", chr(197)) // capital A, ring + _addEntity("Atilde", chr(195)) // capital A, tilde + _addEntity("Auml", chr(196)) // capital A, dieresis or umlaut mark + _addEntity("Ccedil", chr(199)) // capital C, cedilla + _addEntity("ETH", chr(208)) // capital Eth, Icelandic + _addEntity("Eacute", chr(201)) // capital E, acute accent + _addEntity("Ecirc", chr(202)) // capital E, circumflex accent + _addEntity("Egrave", chr(200)) // capital E, grave accent + _addEntity("Euml", chr(203)) // capital E, dieresis or umlaut mark + _addEntity("Iacute", chr(205)) // capital I, acute accent + _addEntity("Icirc", chr(206)) // capital I, circumflex accent + _addEntity("Igrave", chr(204)) // capital I, grave accent + _addEntity("Iuml", chr(207)) // capital I, dieresis or umlaut mark + _addEntity("Ntilde", chr(209)) // capital N, tilde + _addEntity("Oacute", chr(211)) // capital O, acute accent + _addEntity("Ocirc", chr(212)) // capital O, circumflex accent + _addEntity("Ograve", chr(210)) // capital O, grave accent + _addEntity("Oslash", chr(216)) // capital O, slash + _addEntity("Otilde", chr(213)) // capital O, tilde + _addEntity("Ouml", chr(214)) // capital O, dieresis or umlaut mark + _addEntity("THORN", chr(222)) // capital THORN, Icelandic + _addEntity("Uacute", chr(218)) // capital U, acute accent + _addEntity("Ucirc", chr(219)) // capital U, circumflex accent + _addEntity("Ugrave", chr(217)) // capital U, grave accent + _addEntity("Uuml", chr(220)) // capital U, dieresis or umlaut mark + _addEntity("Yacute", chr(221)) // capital Y, acute accent + _addEntity("aacute", chr(225)) // small a, acute accent + _addEntity("acirc", chr(226)) // small a, circumflex accent + _addEntity("aelig", chr(230)) // small ae diphthong (ligature) + _addEntity("agrave", chr(224)) // small a, grave accent + _addEntity("aring", chr(229)) // small a, ring + _addEntity("atilde", chr(227)) // small a, tilde + _addEntity("auml", chr(228)) // small a, dieresis or umlaut mark + _addEntity("ccedil", chr(231)) // small c, cedilla + _addEntity("eacute", chr(233)) // small e, acute accent + _addEntity("ecirc", chr(234)) // small e, circumflex accent + _addEntity("egrave", chr(232)) // small e, grave accent + _addEntity("eth", chr(240)) // small eth, Icelandic + _addEntity("euml", chr(235)) // small e, dieresis or umlaut mark + _addEntity("iacute", chr(237)) // small i, acute accent + _addEntity("icirc", chr(238)) // small i, circumflex accent + _addEntity("igrave", chr(236)) // small i, grave accent + _addEntity("iuml", chr(239)) // small i, dieresis or umlaut mark + _addEntity("ntilde", chr(241)) // small n, tilde + _addEntity("oacute", chr(243)) // small o, acute accent + _addEntity("ocirc", chr(244)) // small o, circumflex accent + _addEntity("ograve", chr(242)) // small o, grave accent + _addEntity("oslash", chr(248)) // small o, slash + _addEntity("otilde", chr(245)) // small o, tilde + _addEntity("ouml", chr(246)) // small o, dieresis or umlaut mark + _addEntity("szlig", chr(223)) // small sharp s, German (sz ligature) + _addEntity("thorn", chr(254)) // small thorn, Icelandic + _addEntity("uacute", chr(250)) // small u, acute accent + _addEntity("ucirc", chr(251)) // small u, circumflex accent + _addEntity("ugrave", chr(249)) // small u, grave accent + _addEntity("uuml", chr(252)) // small u, dieresis or umlaut mark + _addEntity("yacute", chr(253)) // small y, acute accent + _addEntity("yuml", chr(255)) // small y, dieresis or umlaut mark + _addEntity("copy", chr(169)) // copyright sign + _addEntity("reg", chr(174)) // registered sign + _addEntity("nbsp", chr(160)) // non breaking space + _addEntity("iexcl", chr(161)) + _addEntity("cent", chr(162)) + _addEntity("pound", chr(163)) + _addEntity("curren", chr(164)) + _addEntity("yen", chr(165)) + _addEntity("brvbar", chr(166)) + _addEntity("sect", chr(167)) + _addEntity("uml", chr(168)) + _addEntity("ordf", chr(170)) + _addEntity("laquo", chr(171)) + _addEntity("not", chr(172)) + _addEntity("shy", chr(173)) + _addEntity("macr", chr(175)) + _addEntity("deg", chr(176)) + _addEntity("plusmn", chr(177)) + _addEntity("sup1", chr(185)) + _addEntity("sup2", chr(178)) + _addEntity("sup3", chr(179)) + _addEntity("acute", chr(180)) + _addEntity("micro", chr(181)) + _addEntity("para", chr(182)) + _addEntity("middot", chr(183)) + _addEntity("cedil", chr(184)) + _addEntity("ordm", chr(186)) + _addEntity("raquo", chr(187)) + _addEntity("frac14", chr(188)) + _addEntity("frac12", chr(189)) + _addEntity("frac34", chr(190)) + _addEntity("iquest", chr(191)) + _addEntity("times", chr(215)) + + } + + private fun init2() { + _addEntity("divide", chr(247)) + _addEntity("OElig", chr(338)) + _addEntity("oelig", chr(339)) + _addEntity("Scaron", chr(352)) + _addEntity("scaron", chr(353)) + _addEntity("Yuml", chr(376)) + _addEntity("fnof", chr(402)) + _addEntity("circ", chr(710)) + _addEntity("tilde", chr(732)) + _addEntity("Alpha", chr(913)) + _addEntity("Beta", chr(914)) + _addEntity("Gamma", chr(915)) + _addEntity("Delta", chr(916)) + _addEntity("Epsilon", chr(917)) + _addEntity("Zeta", chr(918)) + _addEntity("Eta", chr(919)) + _addEntity("Theta", chr(920)) + _addEntity("Iota", chr(921)) + _addEntity("Kappa", chr(922)) + _addEntity("Lambda", chr(923)) + _addEntity("Mu", chr(924)) + _addEntity("Nu", chr(925)) + _addEntity("Xi", chr(926)) + _addEntity("Omicron", chr(927)) + _addEntity("Pi", chr(928)) + _addEntity("Rho", chr(929)) + _addEntity("Sigma", chr(931)) + _addEntity("Tau", chr(932)) + _addEntity("Upsilon", chr(933)) + _addEntity("Phi", chr(934)) + _addEntity("Chi", chr(935)) + _addEntity("Psi", chr(936)) + _addEntity("Omega", chr(937)) + _addEntity("alpha", chr(945)) + _addEntity("beta", chr(946)) + _addEntity("gamma", chr(947)) + _addEntity("delta", chr(948)) + _addEntity("epsilon", chr(949)) + _addEntity("zeta", chr(950)) + _addEntity("eta", chr(951)) + _addEntity("theta", chr(952)) + _addEntity("iota", chr(953)) + _addEntity("kappa", chr(954)) + _addEntity("lambda", chr(955)) + _addEntity("mu", chr(956)) + _addEntity("nu", chr(957)) + _addEntity("xi", chr(958)) + _addEntity("omicron", chr(959)) + _addEntity("pi", chr(960)) + _addEntity("rho", chr(961)) + _addEntity("sigmaf", chr(962)) + _addEntity("sigma", chr(963)) + _addEntity("tau", chr(964)) + _addEntity("upsilon", chr(965)) + _addEntity("phi", chr(966)) + _addEntity("chi", chr(967)) + _addEntity("psi", chr(968)) + _addEntity("omega", chr(969)) + _addEntity("thetasym", chr(977)) + _addEntity("upsih", chr(978)) + _addEntity("piv", chr(982)) + _addEntity("ensp", chr(8194)) + _addEntity("emsp", chr(8195)) + _addEntity("thinsp", chr(8201)) + _addEntity("zwnj", chr(8204)) + _addEntity("zwj", chr(8205)) + _addEntity("lrm", chr(8206)) + _addEntity("rlm", chr(8207)) + _addEntity("ndash", chr(8211)) + _addEntity("mdash", chr(8212)) + _addEntity("lsquo", chr(8216)) + _addEntity("rsquo", chr(8217)) + _addEntity("sbquo", chr(8218)) + _addEntity("ldquo", chr(8220)) + _addEntity("rdquo", chr(8221)) + _addEntity("bdquo", chr(8222)) + _addEntity("dagger", chr(8224)) + _addEntity("Dagger", chr(8225)) + _addEntity("bull", chr(8226)) + _addEntity("hellip", chr(8230)) + _addEntity("permil", chr(8240)) + _addEntity("prime", chr(8242)) + _addEntity("Prime", chr(8243)) + _addEntity("lsaquo", chr(8249)) + _addEntity("rsaquo", chr(8250)) + _addEntity("oline", chr(8254)) + _addEntity("frasl", chr(8260)) + _addEntity("euro", chr(8364)) + _addEntity("image", chr(8465)) + _addEntity("weierp", chr(8472)) + _addEntity("real", chr(8476)) + _addEntity("trade", chr(8482)) + _addEntity("alefsym", chr(8501)) + _addEntity("larr", chr(8592)) + _addEntity("uarr", chr(8593)) + _addEntity("rarr", chr(8594)) + _addEntity("darr", chr(8595)) + _addEntity("harr", chr(8596)) + _addEntity("crarr", chr(8629)) + _addEntity("lArr", chr(8656)) + + } + + private fun init3() { + _addEntity("uArr", chr(8657)) + _addEntity("rArr", chr(8658)) + _addEntity("dArr", chr(8659)) + _addEntity("hArr", chr(8660)) + _addEntity("forall", chr(8704)) + _addEntity("part", chr(8706)) + _addEntity("exist", chr(8707)) + _addEntity("empty", chr(8709)) + _addEntity("nabla", chr(8711)) + _addEntity("isin", chr(8712)) + _addEntity("notin", chr(8713)) + _addEntity("ni", chr(8715)) + _addEntity("prod", chr(8719)) + _addEntity("sum", chr(8721)) + _addEntity("minus", chr(8722)) + _addEntity("lowast", chr(8727)) + _addEntity("radic", chr(8730)) + _addEntity("prop", chr(8733)) + _addEntity("infin", chr(8734)) + _addEntity("ang", chr(8736)) + _addEntity("and", chr(8743)) + _addEntity("or", chr(8744)) + _addEntity("cap", chr(8745)) + _addEntity("cup", chr(8746)) + _addEntity("int", chr(8747)) + _addEntity("there4", chr(8756)) + _addEntity("sim", chr(8764)) + _addEntity("cong", chr(8773)) + _addEntity("asymp", chr(8776)) + _addEntity("ne", chr(8800)) + _addEntity("equiv", chr(8801)) + _addEntity("le", chr(8804)) + _addEntity("ge", chr(8805)) + _addEntity("sub", chr(8834)) + _addEntity("sup", chr(8835)) + _addEntity("nsub", chr(8836)) + _addEntity("sube", chr(8838)) + _addEntity("supe", chr(8839)) + _addEntity("oplus", chr(8853)) + _addEntity("otimes", chr(8855)) + _addEntity("perp", chr(8869)) + _addEntity("sdot", chr(8901)) + _addEntity("lceil", chr(8968)) + _addEntity("rceil", chr(8969)) + _addEntity("lfloor", chr(8970)) + _addEntity("rfloor", chr(8971)) + _addEntity("lang", chr(9001)) + _addEntity("rang", chr(9002)) + _addEntity("loz", chr(9674)) + _addEntity("spades", chr(9824)) + _addEntity("clubs", chr(9827)) + _addEntity("hearts", chr(9829)) + _addEntity("diams", chr(9830)) + + } + + init { + init1() + init2() + init3() + } } diff --git a/app/src/main/java/jp/juggler/util/CharacterGroup.kt b/app/src/main/java/jp/juggler/util/CharacterGroup.kt index 6e35a30b..6ad52e7f 100644 --- a/app/src/main/java/jp/juggler/util/CharacterGroup.kt +++ b/app/src/main/java/jp/juggler/util/CharacterGroup.kt @@ -80,7 +80,19 @@ object CharacterGroup { ) "[${quotedKeys}]+".asciiPattern() } - + internal val reNotWhitespace by lazy { + val quotedKeys = Pattern.quote( + StringBuilder().apply { + val size = mapWhitespace.size() + ensureCapacity(size) + for(i in 0 until size) { + append(mapWhitespace.keyAt(i).toChar()) + } + }.toString() + ) + "[^${quotedKeys}]+".asciiPattern() + } + private fun SparseBooleanArray.keys() = (0 until size()).map { keyAt(it) } internal val reWhitespaceBeforeLineFeed by lazy {