package org.joinmastodon.android.ui.text; import android.content.Context; import android.text.SpannableStringBuilder; import android.text.Spanned; import android.text.TextUtils; import android.text.style.BackgroundColorSpan; import android.text.style.ForegroundColorSpan; import android.widget.TextView; import com.twitter.twittertext.Regex; import org.joinmastodon.android.R; import org.joinmastodon.android.model.Emoji; import org.joinmastodon.android.model.FilterResult; import org.joinmastodon.android.model.Hashtag; import org.joinmastodon.android.model.Mention; import org.joinmastodon.android.ui.utils.UiUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.safety.Cleaner; import org.jsoup.safety.Safelist; import org.jsoup.select.NodeVisitor; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import androidx.annotation.NonNull; public class HtmlParser{ private static final String TAG="HtmlParser"; private static final String VALID_URL_PATTERN_STRING = "(" + // $1 total match "(" + Regex.URL_VALID_PRECEDING_CHARS + ")" + // $2 Preceding character "(" + // $3 URL "(https?://)" + // $4 Protocol (optional) "(" + Regex.URL_VALID_DOMAIN + ")" + // $5 Domain(s) "(?::(" + Regex.URL_VALID_PORT_NUMBER + "))?" + // $6 Port number (optional) "(/" + Regex.URL_VALID_PATH + "*+" + ")?" + // $7 URL Path and anchor "(\\?" + Regex.URL_VALID_URL_QUERY_CHARS + "*" + // $8 Query String Regex.URL_VALID_URL_QUERY_ENDING_CHARS + ")?" + ")" + ")"; public static final Pattern URL_PATTERN=Pattern.compile(VALID_URL_PATTERN_STRING, Pattern.CASE_INSENSITIVE); private static final Pattern INVITE_LINK_PATH=Pattern.compile("/invite/[a-z\\d]+$", Pattern.CASE_INSENSITIVE); private static Pattern EMOJI_CODE_PATTERN=Pattern.compile(":([\\w]+):"); private HtmlParser(){} /** * Parse HTML and custom emoji into a spanned string for display. * Supported tags: * @param source Source HTML * @param emojis Custom emojis that are present in source as :code: * @return a spanned string */ public static SpannableStringBuilder parse(String source, List emojis, List mentions, List tags, String accountID, Object parentObject){ class SpanInfo{ public Object span; public int start; public Element element; public SpanInfo(Object span, int start, Element element){ this.span=span; this.start=start; this.element=element; } } Map idsByUrl=mentions.stream().distinct().collect(Collectors.toMap(m->m.url, m->m.id)); // Hashtags in remote posts have remote URLs, these have local URLs so they don't match. // Map tagsByUrl=tags.stream().collect(Collectors.toMap(t->t.url, t->t.name)); Map tagsByTag=tags.stream().distinct().collect(Collectors.toMap(t->t.name.toLowerCase(), Function.identity())); Map mentionsByID=mentions.stream().distinct().collect(Collectors.toMap(m->m.id, Function.identity())); final SpannableStringBuilder ssb=new SpannableStringBuilder(); Jsoup.parseBodyFragment(source).body().traverse(new NodeVisitor(){ private final ArrayList openSpans=new ArrayList<>(); @Override public void head(@NonNull Node node, int depth){ if(node instanceof TextNode textNode){ ssb.append(textNode.text()); }else if(node instanceof Element el){ switch(el.nodeName()){ case "a" -> { Object linkObject=null; String href=el.attr("href"); LinkSpan.Type linkType; if(el.hasClass("hashtag")){ String text=el.text(); if(text.startsWith("#")){ linkType=LinkSpan.Type.HASHTAG; href=text.substring(1); linkObject=tagsByTag.get(text.substring(1).toLowerCase()); }else{ linkType=LinkSpan.Type.URL; } }else if(el.hasClass("mention")){ String id=idsByUrl.get(href); if(id!=null){ linkType=LinkSpan.Type.MENTION; href=id; linkObject=mentionsByID.get(id); }else{ linkType=LinkSpan.Type.URL; } }else{ linkType=LinkSpan.Type.URL; } openSpans.add(new SpanInfo(new LinkSpan(href, null, linkType, accountID, linkObject, parentObject), ssb.length(), el)); } case "br" -> ssb.append('\n'); case "span" -> { if(el.hasClass("invisible")){ openSpans.add(new SpanInfo(new InvisibleSpan(), ssb.length(), el)); } } } } } @Override public void tail(@NonNull Node node, int depth){ if(node instanceof Element el){ if("span".equals(el.nodeName()) && el.hasClass("ellipsis")){ ssb.append("…", new DeleteWhenCopiedSpan(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); }else if("p".equals(el.nodeName())){ if(node.nextSibling()!=null) ssb.append("\n\n"); }else if(!openSpans.isEmpty()){ SpanInfo si=openSpans.get(openSpans.size()-1); if(si.element==el){ ssb.setSpan(si.span, si.start, ssb.length(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); openSpans.remove(openSpans.size()-1); } } } } }); if(!emojis.isEmpty()) parseCustomEmoji(ssb, emojis); return ssb; } public static void parseCustomEmoji(SpannableStringBuilder ssb, List emojis){ Map emojiByCode = emojis.stream() .collect( Collectors.toMap(e->e.shortcode, Function.identity(), (emoji1, emoji2) -> { // Ignore duplicate shortcodes and just take the first, it will be // the same emoji anyway return emoji1; }) ); Matcher matcher=EMOJI_CODE_PATTERN.matcher(ssb); int spanCount=0; CustomEmojiSpan lastSpan=null; while(matcher.find()){ Emoji emoji=emojiByCode.get(matcher.group(1)); if(emoji==null) continue; ssb.setSpan(lastSpan=new CustomEmojiSpan(emoji), matcher.start(), matcher.end(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); spanCount++; } if(spanCount==1 && ssb.getSpanStart(lastSpan)==0 && ssb.getSpanEnd(lastSpan)==ssb.length()){ ssb.append(' '); // To fix line height } } public static SpannableStringBuilder parseCustomEmoji(String text, List emojis){ SpannableStringBuilder ssb=new SpannableStringBuilder(text); parseCustomEmoji(ssb, emojis); return ssb; } public static void setTextWithCustomEmoji(TextView view, String text, List emojis){ if(!EMOJI_CODE_PATTERN.matcher(text).find()){ view.setText(text); return; } view.setText(parseCustomEmoji(text, emojis)); UiUtils.loadCustomEmojiInTextView(view); } public static String strip(String html){ return Jsoup.clean(html, Safelist.none()); } public static String stripAndRemoveInvisibleSpans(String html){ Document doc=Jsoup.parseBodyFragment(html); doc.body().select("span.invisible").remove(); Cleaner cleaner=new Cleaner(Safelist.none().addTags("br", "p")); StringBuilder sb=new StringBuilder(); cleaner.clean(doc).body().traverse(new NodeVisitor(){ @Override public void head(Node node, int depth){ if(node instanceof TextNode tn){ sb.append(tn.text()); }else if(node instanceof Element el){ if("br".equals(el.tagName())){ sb.append('\n'); } } } @Override public void tail(Node node, int depth){ if(node instanceof Element el && "p".equals(el.tagName()) && el.nextSibling()!=null){ sb.append("\n\n"); } } }); return sb.toString(); } public static CharSequence parseLinks(String text){ Matcher matcher=URL_PATTERN.matcher(text); if(!matcher.find()) // Return the original string if there are no URLs return text; SpannableStringBuilder ssb=new SpannableStringBuilder(text); do{ String url=matcher.group(3); if(TextUtils.isEmpty(matcher.group(4))) url="http://"+url; ssb.setSpan(new LinkSpan(url, null, LinkSpan.Type.URL, null, null, null), matcher.start(3), matcher.end(3), 0); }while(matcher.find()); // Find more URLs return ssb; } public static void applyFilterHighlights(Context context, SpannableStringBuilder text, List filters){ int fgColor=UiUtils.getThemeColor(context, R.attr.colorM3Error); int bgColor=UiUtils.getThemeColor(context, R.attr.colorM3ErrorContainer); for(FilterResult filter:filters){ if(!filter.filter.isActive()) continue;; for(String word:filter.keywordMatches){ Matcher matcher=Pattern.compile("\\b"+Pattern.quote(word)+"\\b", Pattern.CASE_INSENSITIVE).matcher(text); while(matcher.find()){ ForegroundColorSpan fg=new ForegroundColorSpan(fgColor); BackgroundColorSpan bg=new BackgroundColorSpan(bgColor); text.setSpan(bg, matcher.start(), matcher.end(), 0); text.setSpan(fg, matcher.start(), matcher.end(), 0); } } } } public static boolean isValidInviteUrl(String url){ return url.startsWith("https://") && INVITE_LINK_PATH.matcher(url).find(); } public static String normalizeDomain(String domain){ if(domain.startsWith("www.")) domain=domain.substring(4); return domain; } }