From 125483db612616310b62c44e51def74b554fd83d Mon Sep 17 00:00:00 2001 From: Christophe Beyls Date: Sun, 16 Jun 2024 20:20:27 +0200 Subject: [PATCH] Improve BlurHashDecoder performance (#4515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pull request aims to dramatically improve the performance of `BlurHashDecoder` while also reducing its memory allocations. - Precompute cosines tables before composing the image so each cosine value is only computed once. - Compute cosines tables once if both are identical (for square images with the same number of colors in both dimensions). - Store colors in a one-dimension array instead of a two-dimension array to reduce memory allocations. - Use a simple `String.indexOf()` to find the index of a Base83 char, which is both faster and needs less memory than a `HashMap` thanks to better locality and no boxing of chars. - No cache is used, so computations may be performed in parallel on background threads without the need for synchronization which limits throughput. ## Benchmarks Simple: 4x4 colors, 32x32 pixels output. (This is what Mastodon and Tusky currently use) Complex: 9x9 colors, 256x256 pixels output. **Pixel 7 (Android 14)** ``` 365 738 ns 23 allocs Trace BlurHashDecoderBenchmark.tuskySimple 109 577 ns 8 allocs Trace BlurHashDecoderBenchmark.newSimple 108 771 647 ns 88 allocs Trace BlurHashDecoderBenchmark.tuskyComplex 12 932 076 ns 8 allocs Trace BlurHashDecoderBenchmark.newComplex ``` **Nexus 5 (Android 6)** ``` 4 600 937 ns 22 allocs Trace BlurHashDecoderBenchmark.tuskySimple 1 391 487 ns 7 allocs Trace BlurHashDecoderBenchmark.newSimple 1 260 644 948 ns 87 allocs Trace BlurHashDecoderBenchmark.tuskyComplex 125 274 063 ns 7 allocs Trace BlurHashDecoderBenchmark.newComplex ``` Conclusion: The new implementation is **3 times faster** than the old one for the current usage and up to **9 times faster** if we decide to increase the BlurHash quality in the future. The source code of the benchmark comparing the original untouched Kotlin implementation to the new one can be found [here](https://github.com/cbeyls/BlurHashAndroidBenchmark). --- .../tusky/util/BlurHashDecoder.kt | 82 ++++++++++--------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/app/src/main/java/com/keylesspalace/tusky/util/BlurHashDecoder.kt b/app/src/main/java/com/keylesspalace/tusky/util/BlurHashDecoder.kt index 117f59c09..940f5f138 100644 --- a/app/src/main/java/com/keylesspalace/tusky/util/BlurHashDecoder.kt +++ b/app/src/main/java/com/keylesspalace/tusky/util/BlurHashDecoder.kt @@ -2,6 +2,7 @@ * Blurhash implementation from blurhash project: * https://github.com/woltapp/blurhash * Minor modifications by charlag + * Major performance improvements by cbeyls */ package com.keylesspalace.tusky.util @@ -24,28 +25,27 @@ object BlurHashDecoder { val numCompEnc = decode83(blurHash, 0, 1) val numCompX = (numCompEnc % 9) + 1 val numCompY = (numCompEnc / 9) + 1 - if (blurHash.length != 4 + 2 * numCompX * numCompY) { + val totalComp = numCompX * numCompY + if (blurHash.length != 4 + 2 * totalComp) { return null } val maxAcEnc = decode83(blurHash, 1, 2) val maxAc = (maxAcEnc + 1) / 166f - val colors = Array(numCompX * numCompY) { i -> - if (i == 0) { - val colorEnc = decode83(blurHash, 2, 6) - decodeDc(colorEnc) - } else { - val from = 4 + i * 2 - val colorEnc = decode83(blurHash, from, from + 2) - decodeAc(colorEnc, maxAc * punch) - } + val colors = FloatArray(totalComp * 3) + var colorEnc = decode83(blurHash, 2, 6) + decodeDc(colorEnc, colors) + for (i in 1 until totalComp) { + val from = 4 + i * 2 + colorEnc = decode83(blurHash, from, from + 2) + decodeAc(colorEnc, maxAc * punch, colors, i * 3) } return composeBitmap(width, height, numCompX, numCompY, colors) } - private fun decode83(str: String, from: Int = 0, to: Int = str.length): Int { + private fun decode83(str: String, from: Int, to: Int): Int { var result = 0 for (i in from until to) { - val index = charMap[str[i]] ?: -1 + val index = CHARS.indexOf(str[i]) if (index != -1) { result = result * 83 + index } @@ -53,11 +53,13 @@ object BlurHashDecoder { return result } - private fun decodeDc(colorEnc: Int): FloatArray { - val r = colorEnc shr 16 - val g = (colorEnc shr 8) and 255 - val b = colorEnc and 255 - return floatArrayOf(srgbToLinear(r), srgbToLinear(g), srgbToLinear(b)) + private fun decodeDc(colorEnc: Int, outArray: FloatArray) { + val r = (colorEnc shr 16) and 0xFF + val g = (colorEnc shr 8) and 0xFF + val b = colorEnc and 0xFF + outArray[0] = srgbToLinear(r) + outArray[1] = srgbToLinear(g) + outArray[2] = srgbToLinear(b) } private fun srgbToLinear(colorEnc: Int): Float { @@ -69,15 +71,13 @@ object BlurHashDecoder { } } - private fun decodeAc(value: Int, maxAc: Float): FloatArray { + private fun decodeAc(value: Int, maxAc: Float, outArray: FloatArray, outIndex: Int) { val r = value / (19 * 19) val g = (value / 19) % 19 val b = value % 19 - return floatArrayOf( - signedPow2((r - 9) / 9.0f) * maxAc, - signedPow2((g - 9) / 9.0f) * maxAc, - signedPow2((b - 9) / 9.0f) * maxAc - ) + outArray[outIndex] = signedPow2((r - 9) / 9.0f) * maxAc + outArray[outIndex + 1] = signedPow2((g - 9) / 9.0f) * maxAc + outArray[outIndex + 2] = signedPow2((b - 9) / 9.0f) * maxAc } private fun signedPow2(value: Float) = value.pow(2f).withSign(value) @@ -87,21 +87,29 @@ object BlurHashDecoder { height: Int, numCompX: Int, numCompY: Int, - colors: Array + colors: FloatArray ): Bitmap { val imageArray = IntArray(width * height) + val cosinesX = createCosines(width, numCompX) + val cosinesY = if (width == height && numCompX == numCompY) { + cosinesX + } else { + createCosines(height, numCompY) + } for (y in 0 until height) { for (x in 0 until width) { var r = 0f var g = 0f var b = 0f for (j in 0 until numCompY) { + val cosY = cosinesY[y * numCompY + j] for (i in 0 until numCompX) { - val basis = (cos(PI * x * i / width) * cos(PI * y * j / height)).toFloat() - val color = colors[j * numCompX + i] - r += color[0] * basis - g += color[1] * basis - b += color[2] * basis + val cosX = cosinesX[x * numCompX + i] + val basis = cosX * cosY + val colorIndex = (j * numCompX + i) * 3 + r += colors[colorIndex] * basis + g += colors[colorIndex + 1] * basis + b += colors[colorIndex + 2] * basis } } imageArray[x + width * y] = Color.rgb(linearToSrgb(r), linearToSrgb(g), linearToSrgb(b)) @@ -110,6 +118,12 @@ object BlurHashDecoder { return Bitmap.createBitmap(imageArray, width, height, Bitmap.Config.ARGB_8888) } + private fun createCosines(size: Int, numComp: Int) = FloatArray(size * numComp) { index -> + val x = index / numComp + val i = index % numComp + cos(PI * x * i / size).toFloat() + } + private fun linearToSrgb(value: Float): Int { val v = value.coerceIn(0f, 1f) return if (v <= 0.0031308f) { @@ -119,13 +133,5 @@ object BlurHashDecoder { } } - private val charMap = listOf( - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '#', '$', '%', '*', '+', ',', - '-', '.', ':', ';', '=', '?', '@', '[', ']', '^', '_', '{', '|', '}', '~' - ) - .mapIndexed { i, c -> c to i } - .toMap() + private const val CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~" }