global endpoints
This commit is contained in:
		| @@ -94,6 +94,8 @@ uint result_index = 0; | |||||||
| uint result_vector_max_index; | uint result_vector_max_index; | ||||||
| bool result_limit_reached = false; | bool result_limit_reached = false; | ||||||
|  |  | ||||||
|  | uvec4 endpoints[2][4]; | ||||||
|  |  | ||||||
| // EncodingData helpers | // EncodingData helpers | ||||||
| uint Encoding(EncodingData val) { | uint Encoding(EncodingData val) { | ||||||
|     return bitfieldExtract(val.data, 0, 8); |     return bitfieldExtract(val.data, 0, 8); | ||||||
| @@ -673,7 +675,7 @@ ivec4 BlueContract(int a, int r, int g, int b) { | |||||||
|     return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); |     return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | ||||||
|                       inout uint colvals_index) { |                       inout uint colvals_index) { | ||||||
| #define READ_UINT_VALUES(N)                                                                        \ | #define READ_UINT_VALUES(N)                                                                        \ | ||||||
|     uint v[N];                                                                                     \ |     uint v[N];                                                                                     \ | ||||||
| @@ -692,22 +694,22 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||||||
|     switch (color_endpoint_mode) { |     switch (color_endpoint_mode) { | ||||||
|     case 0: { |     case 0: { | ||||||
|         READ_UINT_VALUES(2) |         READ_UINT_VALUES(2) | ||||||
|         ep1 = uvec4(0xFF, v[0], v[0], v[0]); |         endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]); | ||||||
|         ep2 = uvec4(0xFF, v[1], v[1], v[1]); |         endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 1: { |     case 1: { | ||||||
|         READ_UINT_VALUES(2) |         READ_UINT_VALUES(2) | ||||||
|         const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); |         const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | ||||||
|         const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); |         const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); | ||||||
|         ep1 = uvec4(0xFF, L0, L0, L0); |         endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0); | ||||||
|         ep2 = uvec4(0xFF, L1, L1, L1); |         endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 4: { |     case 4: { | ||||||
|         READ_UINT_VALUES(4) |         READ_UINT_VALUES(4) | ||||||
|         ep1 = uvec4(v[2], v[0], v[0], v[0]); |         endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]); | ||||||
|         ep2 = uvec4(v[3], v[1], v[1], v[1]); |         endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 5: { |     case 5: { | ||||||
| @@ -718,24 +720,24 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||||||
|         transferred = BitTransferSigned(v[3], v[2]); |         transferred = BitTransferSigned(v[3], v[2]); | ||||||
|         v[3] = transferred.x; |         v[3] = transferred.x; | ||||||
|         v[2] = transferred.y; |         v[2] = transferred.y; | ||||||
|         ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); |         endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0])); | ||||||
|         ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); |         endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 6: { |     case 6: { | ||||||
|         READ_UINT_VALUES(4) |         READ_UINT_VALUES(4) | ||||||
|         ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |         endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | ||||||
|         ep2 = uvec4(0xFF, v[0], v[1], v[2]); |         endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 8: { |     case 8: { | ||||||
|         READ_UINT_VALUES(6) |         READ_UINT_VALUES(6) | ||||||
|         if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |         if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | ||||||
|             ep1 = uvec4(0xFF, v[0], v[2], v[4]); |             endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]); | ||||||
|             ep2 = uvec4(0xFF, v[1], v[3], v[5]); |             endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]); | ||||||
|         } else { |         } else { | ||||||
|             ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); |             endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); | ||||||
|             ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); |             endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| @@ -751,28 +753,28 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||||||
|         v[5] = transferred.x; |         v[5] = transferred.x; | ||||||
|         v[4] = transferred.y; |         v[4] = transferred.y; | ||||||
|         if ((v[1] + v[3] + v[5]) >= 0) { |         if ((v[1] + v[3] + v[5]) >= 0) { | ||||||
|             ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); |             endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); | ||||||
|             ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |             endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||||||
|         } else { |         } else { | ||||||
|             ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |             endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||||||
|             ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); |             endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 10: { |     case 10: { | ||||||
|         READ_UINT_VALUES(6) |         READ_UINT_VALUES(6) | ||||||
|         ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |         endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | ||||||
|         ep2 = uvec4(v[5], v[0], v[1], v[2]); |         endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case 12: { |     case 12: { | ||||||
|         READ_UINT_VALUES(8) |         READ_UINT_VALUES(8) | ||||||
|         if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |         if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | ||||||
|             ep1 = uvec4(v[6], v[0], v[2], v[4]); |             endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]); | ||||||
|             ep2 = uvec4(v[7], v[1], v[3], v[5]); |             endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]); | ||||||
|         } else { |         } else { | ||||||
|             ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); |             endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); | ||||||
|             ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); |             endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| @@ -794,18 +796,18 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||||||
|         v[6] = transferred.y; |         v[6] = transferred.y; | ||||||
|  |  | ||||||
|         if ((v[1] + v[3] + v[5]) >= 0) { |         if ((v[1] + v[3] + v[5]) >= 0) { | ||||||
|             ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); |             endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4])); | ||||||
|             ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |             endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||||||
|         } else { |         } else { | ||||||
|             ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |             endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||||||
|             ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); |             endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     default: { |     default: { | ||||||
|         // HDR mode, or more likely a bug computing the color_endpoint_mode |         // HDR mode, or more likely a bug computing the color_endpoint_mode | ||||||
|         ep1 = uvec4(0xFF, 0xFF, 0, 0); |         endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0); | ||||||
|         ep2 = uvec4(0xFF, 0xFF, 0, 0); |         endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     } |     } | ||||||
| @@ -1198,10 +1200,6 @@ void DecompressBlock(ivec3 coord) { | |||||||
|             color_endpoint_mode[i] = cem; |             color_endpoint_mode[i] = cem; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     uvec4 endpoints0[4]; |  | ||||||
|     uvec4 endpoints1[4]; |  | ||||||
|     { |  | ||||||
|         // This decode phase should at most push 32 elements into the vector |         // This decode phase should at most push 32 elements into the vector | ||||||
|         result_vector_max_index = 32; |         result_vector_max_index = 32; | ||||||
|  |  | ||||||
| @@ -1209,10 +1207,8 @@ void DecompressBlock(ivec3 coord) { | |||||||
|         uint colvals_index = 0; |         uint colvals_index = 0; | ||||||
|         DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); |         DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); | ||||||
|         for (uint i = 0; i < num_partitions; i++) { |         for (uint i = 0; i < num_partitions; i++) { | ||||||
|             ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], |             ComputeEndpoints(i, color_endpoint_mode[i], colvals_index); | ||||||
|                              colvals_index); |  | ||||||
|         } |         } | ||||||
|     } |  | ||||||
|     color_endpoint_data = local_buff; |     color_endpoint_data = local_buff; | ||||||
|     color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; |     color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; | ||||||
|     const uint clear_byte_start = (weight_bits >> 3) + 1; |     const uint clear_byte_start = (weight_bits >> 3) + 1; | ||||||
| @@ -1247,8 +1243,8 @@ void DecompressBlock(ivec3 coord) { | |||||||
|                 local_partition = Select2DPartition(partition_index, i, j, num_partitions, |                 local_partition = Select2DPartition(partition_index, i, j, num_partitions, | ||||||
|                                                     (block_dims.y * block_dims.x) < 32); |                                                     (block_dims.y * block_dims.x) < 32); | ||||||
|             } |             } | ||||||
|             const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |             const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]); | ||||||
|             const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |             const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]); | ||||||
|             const uint weight_offset = (j * block_dims.x + i); |             const uint weight_offset = (j * block_dims.x + i); | ||||||
|             const uint array_index = weight_offset / 4; |             const uint array_index = weight_offset / 4; | ||||||
|             const uint vector_index = bfe(weight_offset, 0, 2); |             const uint vector_index = bfe(weight_offset, 0, 2); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user