Merge pull request #2998 from oogm/develop

Update reactions to Unicode 13.1, improve backwards compatibility
2021-03-29 14:11:45 +02:00 · 2021-03-29 14:11:45 +02:00 · 3e8370cdc7
commit 3e8370cdc7
parent 0e71dfa8e1 86e23a5300
5 changed files with 167 additions and 16 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -11,6 +11,7 @@ Improvements 🙌:
 - Improve message with Emoji only detection (#3017)
 - Picture preview when replying. Also add the image preview in the message detail bottomsheet (#2916)
 - Api interceptor to allow app developers peek responses (#2986)
+ - Update reactions to Unicode 13.1 (#2998)
 - Be more robust when parsing some enums

 Bugfix 🐛:
--- a/tools/import_emojis.py
+++ b/tools/import_emojis.py
@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+from collections import OrderedDict
+
+import requests
+import json
+import re
+import os
+from bs4 import BeautifulSoup
+
+# A list of words to not capitalize in emoji-names
+capitalization_exclude = {'with', 'a', 'at', 'of', 'for', 'and', 'over', 'the', 'off', 'on', 'out', 'in', 'but', 'or'}
+
+# Create skeleton of the final json file as a python dictionary:
+emoji_picker_datasource = {
+    "compressed": True,
+    "categories": [],
+    "emojis": {},
+    "aliases": {}
+}
+emoji_picker_datasource_categories = emoji_picker_datasource["categories"]
+emoji_picker_datasource_emojis = emoji_picker_datasource["emojis"]
+
+
+# Get official emoji list from unicode.org (Emoji List, v13.1 at time of writing)
+print("Fetching emoji list from Unicode.org...",)
+req = requests.get("https://unicode.org/emoji/charts/emoji-list.html")
+soup = BeautifulSoup(req.content, 'html.parser')
+
+# Navigate to table
+table = soup.body.table
+
+# Go over all rows
+print("Extracting emojis...")
+for row in table.find_all('tr'):
+    # Add "bigheads"  rows to categories
+    if 'bighead' in next(row.children)['class']:
+        relevant_element = row.find('a')
+        category_id = relevant_element['name']
+        category_name = relevant_element.text
+        emoji_picker_datasource_categories.append({
+            "id": category_id,
+            "name": category_name,
+            "emojis": []
+        })
+
+    # Add information in "rchars" rows to the last encountered category and emojis
+    if row.find('td', class_='code'):
+        # Get columns
+        cols = row.find_all('td')
+        no_element = cols[0]
+        code_element = cols[1]
+        sample_element = cols[2]
+        cldr_element = cols[3]
+        keywords_element = cols[4]
+
+        # Extract information from columns
+        # Extract name and id
+        # => Remove spaces, colons and unicode-characters
+        emoji_name = cldr_element.text
+        emoji_id = cldr_element.text.lower()
+        emoji_id = re.sub(r'[^A-Za-z0-9 ]+', '', emoji_id, flags=re.UNICODE)  # Only keep alphanumeric, space characters
+        emoji_id = emoji_id.strip()  # Remove leading/trailing whitespaces
+        emoji_id = emoji_id.replace(' ', '-')
+
+        # Capitalize name according to the same rules as the previous emoji_picker_datasource.json
+        # - Words are separated by any non-word character (\W), e.g. space, comma, parentheses, dots, etc.
+        # - Words are capitalized if they are either at the beginning of the name OR not in capitalization_exclude (extracted from the previous datasource, too)
+        emoji_name_cap = "".join([w.capitalize() if i == 0 or w not in capitalization_exclude else w for i, w in enumerate(re.split('(\W)', emoji_name))])
+
+        # Extract emoji unicode-codepoint
+        emoji_code_raw = code_element.text
+        emoji_code_list = emoji_code_raw.split(" ")
+        emoji_code_list = [e[2:] for e in emoji_code_list]
+        emoji_code = "-".join(emoji_code_list)
+
+        # Extract keywords
+        emoji_keywords = keywords_element.text.split(" | ")
+
+        # Add the emoji-id to the last entry in "categories"
+        emoji_picker_datasource_categories[-1]["emojis"].append(emoji_id)
+
+        # Add the emoji itself to the "emojis" dict
+        emoji_picker_datasource_emojis[emoji_id] = {
+                "a": emoji_name_cap,
+                "b": emoji_code,
+                "j": emoji_keywords
+        }
+
+# The keywords of unicode.org are usually quite sparse.
+# There is no official specification of keywords beyond that, but muan/emojilib maintains a well maintained and
+# established repository with additional keywords. We extend our list with the keywords from there.
+# At the time of writing it had additional keyword information for all emojis except a few from the newest unicode 13.1.
+print("Fetching additional keywords from Emojilib...")
+req = requests.get("https://raw.githubusercontent.com/muan/emojilib/main/dist/emoji-en-US.json")
+emojilib_data = json.loads(req.content)
+
+# We just go over all the official emojis from unicode, and add the keywords there
+print("Adding keywords to emojis...")
+for emoji in emoji_picker_datasource_emojis:
+    emoji_name = emoji_picker_datasource_emojis[emoji]["a"]
+    emoji_code = emoji_picker_datasource_emojis[emoji]["b"]
+
+    # Convert back to actual unicode emoji
+    emoji_unicode = ''.join(map(lambda s: chr(int(s, 16)), emoji_code.split("-")))
+
+    # Search for emoji in emojilib
+    if emoji_unicode in emojilib_data:
+        emoji_additional_keywords = emojilib_data[emoji_unicode]
+    elif emoji_unicode+chr(0xfe0f)  in emojilib_data:
+        emoji_additional_keywords = emojilib_data[emoji_unicode+chr(0xfe0f)]
+    else:
+        print("* No additional keywords for", emoji_unicode, emoji_picker_datasource_emojis[emoji])
+        continue
+
+    # If additional keywords exist, add them to emoji_picker_datasource_emojis
+    # Avoid duplicates and keep order. Put official unicode.com keywords first and extend up with emojilib ones.
+    new_keywords = OrderedDict.fromkeys(emoji_picker_datasource_emojis[emoji]["j"] + emoji_additional_keywords).keys()
+    # Remove the ones derived from the unicode name
+    new_keywords = new_keywords - {emoji.replace("-", "_")} - {emoji.replace("-", " ")} - {emoji_name}
+    # Write new keywords back
+    emoji_picker_datasource_emojis[emoji]["j"] = list(new_keywords)
+
+# Filter out components from unicode 13.1 (as they are not suitable for single-emoji reactions)
+emoji_picker_datasource['categories'] = [x for x in emoji_picker_datasource['categories'] if x['id'] != 'component']
+
+# Write result to file (overwrite previous), without escaping unicode characters
+print("Writing emoji_picker_datasource.json...")
+scripts_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(scripts_dir, "../vector/src/main/res/raw/emoji_picker_datasource.json"), "w") as outfile:
+    json.dump(emoji_picker_datasource, outfile, ensure_ascii=False, separators=(',', ':'))
+print("Done.")
--- a/vector/src/androidTest/java/im/vector/app/features/reactions/data/EmojiDataSourceTest.kt
+++ b/vector/src/androidTest/java/im/vector/app/features/reactions/data/EmojiDataSourceTest.kt
@ -42,17 +42,15 @@ class EmojiDataSourceTest : InstrumentedTest {
    @Test
    fun checkNumberOfResult() {
        val emojiDataSource = EmojiDataSource(context().resources)
-
-        assertEquals("Wrong number of emojis", 1545, emojiDataSource.rawData.emojis.size)
-        assertEquals("Wrong number of categories", 8, emojiDataSource.rawData.categories.size)
-        assertEquals("Wrong number of aliases", 57, emojiDataSource.rawData.aliases.size)
+        assertTrue("Wrong number of emojis", emojiDataSource.rawData.emojis.size >= 500)
+        assertTrue("Wrong number of categories", emojiDataSource.rawData.categories.size >= 8)
    }

    @Test
    fun searchTestEmptySearch() {
        val emojiDataSource = EmojiDataSource(context().resources)

-        assertEquals("Empty search should return 1545 results", 1545, emojiDataSource.filterWith("").size)
+        assertTrue("Empty search should return at least 500 results", emojiDataSource.filterWith("").size >= 500)
    }

    @Test
--- a/vector/src/main/java/im/vector/app/features/reactions/data/EmojiDataSource.kt
+++ b/vector/src/main/java/im/vector/app/features/reactions/data/EmojiDataSource.kt
@ -16,6 +16,8 @@
 package im.vector.app.features.reactions.data

 import android.content.res.Resources
+import android.graphics.Paint
+import androidx.core.graphics.PaintCompat
 import com.squareup.moshi.Moshi
 import im.vector.app.R
 import javax.inject.Inject
@ -25,6 +27,7 @@ import javax.inject.Singleton
 class EmojiDataSource @Inject constructor(
        resources: Resources
 ) {
+    private val paint = Paint()
    val rawData = resources.openRawResource(R.raw.emoji_picker_datasource)
            .use { input ->
                Moshi.Builder()
@ -34,18 +37,32 @@ class EmojiDataSource @Inject constructor(
            }
            ?.let { parsedRawData ->
                // Add key as a keyword, it will solve the issue that ":tada" is not available in completion
+                // Only add emojis to emojis/categories that can be rendered by the system
                parsedRawData.copy(
                        emojis = mutableMapOf<String, EmojiItem>().apply {
                            parsedRawData.emojis.keys.forEach { key ->
                                val origin = parsedRawData.emojis[key] ?: return@forEach

                                // Do not add keys containing '_'
-                                if (origin.keywords.contains(key) || key.contains("_")) {
-                                    put(key, origin)
-                                } else {
-                                    put(key, origin.copy(keywords = origin.keywords + key))
+                                if (isEmojiRenderable(origin.emoji)) {
+                                    if (origin.keywords.contains(key) || key.contains("_")) {
+                                        put(key, origin)
+                                    } else {
+                                        put(key, origin.copy(keywords = origin.keywords + key))
+                                    }
                                }
                            }
+                        },
+                        categories = mutableListOf<EmojiCategory>().apply {
+                            parsedRawData.categories.forEach { entry ->
+                                add(EmojiCategory(entry.id, entry.name, mutableListOf<String>().apply {
+                                    entry.emojis.forEach { e ->
+                                        if (isEmojiRenderable(parsedRawData.emojis[e]!!.emoji)) {
+                                            add(e)
+                                        }
+                                    }
+                                }))
+                            }
                        }
                )
            }
@ -53,6 +70,10 @@ class EmojiDataSource @Inject constructor(

    private val quickReactions = mutableListOf<EmojiItem>()

+    private fun isEmojiRenderable(emoji: String): Boolean {
+        return PaintCompat.hasGlyph(paint, emoji)
+    }
+
    fun filterWith(query: String): List<EmojiItem> {
        val words = query.split("\\s".toRegex())

@ -79,12 +100,12 @@ class EmojiDataSource @Inject constructor(
    fun getQuickReactions(): List<EmojiItem> {
        if (quickReactions.isEmpty()) {
            listOf(
-                    "+1", // 👍
-                    "-1", // 👎
-                    "grinning", // 😄
-                    "tada", // 🎉
-                    "confused", // 😕
-                    "heart", // ❤️
+                    "thumbs-up", // 👍
+                    "thumbs-down", // 👎
+                    "grinning-face-with-smiling-eyes", // 😄
+                    "party-popper", // 🎉
+                    "confused-face", // 😕
+                    "red-heart", // ❤️
                    "rocket", // 🚀
                    "eyes" // 👀
            )
--- a/vector/src/main/res/raw/emoji_picker_datasource.json
+++ b/vector/src/main/res/raw/emoji_picker_datasource.json