Improve BlurHashDecoder performance (#4515)

This pull request aims to dramatically improve the performance of
`BlurHashDecoder` while also reducing its memory allocations.

- Precompute cosines tables before composing the image so each cosine
value is only computed once.
- Compute cosines tables once if both are identical (for square images
with the same number of colors in both dimensions).
- Store colors in a one-dimension array instead of a two-dimension array
to reduce memory allocations.
- Use a simple `String.indexOf()` to find the index of a Base83 char,
which is both faster and needs less memory than a `HashMap` thanks to
better locality and no boxing of chars.
- No cache is used, so computations may be performed in parallel on
background threads without the need for synchronization which limits
throughput.

## Benchmarks

Simple: 4x4 colors, 32x32 pixels output. (This is what Mastodon and
Tusky currently use)
Complex: 9x9 colors, 256x256 pixels output.

**Pixel 7 (Android 14)**

```
      365 738   ns          23 allocs    Trace    BlurHashDecoderBenchmark.tuskySimple
      109 577   ns           8 allocs    Trace    BlurHashDecoderBenchmark.newSimple
  108 771 647   ns          88 allocs    Trace    BlurHashDecoderBenchmark.tuskyComplex
   12 932 076   ns           8 allocs    Trace    BlurHashDecoderBenchmark.newComplex
```

**Nexus 5 (Android 6)**

```
    4 600 937   ns          22 allocs    Trace    BlurHashDecoderBenchmark.tuskySimple
    1 391 487   ns           7 allocs    Trace    BlurHashDecoderBenchmark.newSimple
1 260 644 948   ns          87 allocs    Trace    BlurHashDecoderBenchmark.tuskyComplex
  125 274 063   ns           7 allocs    Trace    BlurHashDecoderBenchmark.newComplex
```

Conclusion: The new implementation is **3 times faster** than the old
one for the current usage and up to **9 times faster** if we decide to
increase the BlurHash quality in the future.

The source code of the benchmark comparing the original untouched Kotlin
implementation to the new one can be found
[here](https://github.com/cbeyls/BlurHashAndroidBenchmark).
This commit is contained in:
Christophe Beyls 2024-06-16 20:20:27 +02:00 committed by GitHub
parent 51e1c20449
commit 125483db61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 44 additions and 38 deletions

View File

@ -2,6 +2,7 @@
* Blurhash implementation from blurhash project:
* https://github.com/woltapp/blurhash
* Minor modifications by charlag
* Major performance improvements by cbeyls
*/
package com.keylesspalace.tusky.util
@ -24,28 +25,27 @@ object BlurHashDecoder {
val numCompEnc = decode83(blurHash, 0, 1)
val numCompX = (numCompEnc % 9) + 1
val numCompY = (numCompEnc / 9) + 1
if (blurHash.length != 4 + 2 * numCompX * numCompY) {
val totalComp = numCompX * numCompY
if (blurHash.length != 4 + 2 * totalComp) {
return null
}
val maxAcEnc = decode83(blurHash, 1, 2)
val maxAc = (maxAcEnc + 1) / 166f
val colors = Array(numCompX * numCompY) { i ->
if (i == 0) {
val colorEnc = decode83(blurHash, 2, 6)
decodeDc(colorEnc)
} else {
val from = 4 + i * 2
val colorEnc = decode83(blurHash, from, from + 2)
decodeAc(colorEnc, maxAc * punch)
}
val colors = FloatArray(totalComp * 3)
var colorEnc = decode83(blurHash, 2, 6)
decodeDc(colorEnc, colors)
for (i in 1 until totalComp) {
val from = 4 + i * 2
colorEnc = decode83(blurHash, from, from + 2)
decodeAc(colorEnc, maxAc * punch, colors, i * 3)
}
return composeBitmap(width, height, numCompX, numCompY, colors)
}
private fun decode83(str: String, from: Int = 0, to: Int = str.length): Int {
private fun decode83(str: String, from: Int, to: Int): Int {
var result = 0
for (i in from until to) {
val index = charMap[str[i]] ?: -1
val index = CHARS.indexOf(str[i])
if (index != -1) {
result = result * 83 + index
}
@ -53,11 +53,13 @@ object BlurHashDecoder {
return result
}
private fun decodeDc(colorEnc: Int): FloatArray {
val r = colorEnc shr 16
val g = (colorEnc shr 8) and 255
val b = colorEnc and 255
return floatArrayOf(srgbToLinear(r), srgbToLinear(g), srgbToLinear(b))
private fun decodeDc(colorEnc: Int, outArray: FloatArray) {
val r = (colorEnc shr 16) and 0xFF
val g = (colorEnc shr 8) and 0xFF
val b = colorEnc and 0xFF
outArray[0] = srgbToLinear(r)
outArray[1] = srgbToLinear(g)
outArray[2] = srgbToLinear(b)
}
private fun srgbToLinear(colorEnc: Int): Float {
@ -69,15 +71,13 @@ object BlurHashDecoder {
}
}
private fun decodeAc(value: Int, maxAc: Float): FloatArray {
private fun decodeAc(value: Int, maxAc: Float, outArray: FloatArray, outIndex: Int) {
val r = value / (19 * 19)
val g = (value / 19) % 19
val b = value % 19
return floatArrayOf(
signedPow2((r - 9) / 9.0f) * maxAc,
signedPow2((g - 9) / 9.0f) * maxAc,
signedPow2((b - 9) / 9.0f) * maxAc
)
outArray[outIndex] = signedPow2((r - 9) / 9.0f) * maxAc
outArray[outIndex + 1] = signedPow2((g - 9) / 9.0f) * maxAc
outArray[outIndex + 2] = signedPow2((b - 9) / 9.0f) * maxAc
}
private fun signedPow2(value: Float) = value.pow(2f).withSign(value)
@ -87,21 +87,29 @@ object BlurHashDecoder {
height: Int,
numCompX: Int,
numCompY: Int,
colors: Array<FloatArray>
colors: FloatArray
): Bitmap {
val imageArray = IntArray(width * height)
val cosinesX = createCosines(width, numCompX)
val cosinesY = if (width == height && numCompX == numCompY) {
cosinesX
} else {
createCosines(height, numCompY)
}
for (y in 0 until height) {
for (x in 0 until width) {
var r = 0f
var g = 0f
var b = 0f
for (j in 0 until numCompY) {
val cosY = cosinesY[y * numCompY + j]
for (i in 0 until numCompX) {
val basis = (cos(PI * x * i / width) * cos(PI * y * j / height)).toFloat()
val color = colors[j * numCompX + i]
r += color[0] * basis
g += color[1] * basis
b += color[2] * basis
val cosX = cosinesX[x * numCompX + i]
val basis = cosX * cosY
val colorIndex = (j * numCompX + i) * 3
r += colors[colorIndex] * basis
g += colors[colorIndex + 1] * basis
b += colors[colorIndex + 2] * basis
}
}
imageArray[x + width * y] = Color.rgb(linearToSrgb(r), linearToSrgb(g), linearToSrgb(b))
@ -110,6 +118,12 @@ object BlurHashDecoder {
return Bitmap.createBitmap(imageArray, width, height, Bitmap.Config.ARGB_8888)
}
private fun createCosines(size: Int, numComp: Int) = FloatArray(size * numComp) { index ->
val x = index / numComp
val i = index % numComp
cos(PI * x * i / size).toFloat()
}
private fun linearToSrgb(value: Float): Int {
val v = value.coerceIn(0f, 1f)
return if (v <= 0.0031308f) {
@ -119,13 +133,5 @@ object BlurHashDecoder {
}
}
private val charMap = listOf(
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '#', '$', '%', '*', '+', ',',
'-', '.', ':', ';', '=', '?', '@', '[', ']', '^', '_', '{', '|', '}', '~'
)
.mapIndexed { i, c -> c to i }
.toMap()
private const val CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~"
}