astc_decoder: Reimplement Layers
Reimplements the approach to decoding layers in the compute shader. Fixes multilayer astc decoding when using Vulkan.
This commit is contained in:
		| @@ -39,17 +39,15 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | |||||||
| BEGIN_PUSH_CONSTANTS | BEGIN_PUSH_CONSTANTS | ||||||
| UNIFORM(0) uvec2 num_image_blocks; | UNIFORM(0) uvec2 num_image_blocks; | ||||||
| UNIFORM(1) uvec2 block_dims; | UNIFORM(1) uvec2 block_dims; | ||||||
| UNIFORM(2) uint layer; |  | ||||||
|  |  | ||||||
| UNIFORM(3) uvec3 origin; |  | ||||||
| UNIFORM(4) ivec3 destination; |  | ||||||
| UNIFORM(5) uint bytes_per_block_log2; |  | ||||||
| UNIFORM(6) uint layer_stride; |  | ||||||
| UNIFORM(7) uint block_size; |  | ||||||
| UNIFORM(8) uint x_shift; |  | ||||||
| UNIFORM(9) uint block_height; |  | ||||||
| UNIFORM(10) uint block_height_mask; |  | ||||||
|  |  | ||||||
|  | UNIFORM(2) uvec3 origin; | ||||||
|  | UNIFORM(3) ivec3 destination; | ||||||
|  | UNIFORM(4) uint bytes_per_block_log2; | ||||||
|  | UNIFORM(5) uint layer_stride; | ||||||
|  | UNIFORM(6) uint block_size; | ||||||
|  | UNIFORM(7) uint x_shift; | ||||||
|  | UNIFORM(8) uint block_height; | ||||||
|  | UNIFORM(9) uint block_height_mask; | ||||||
| END_PUSH_CONSTANTS | END_PUSH_CONSTANTS | ||||||
|  |  | ||||||
| uint current_index = 0; | uint current_index = 0; | ||||||
| @@ -82,7 +80,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | |||||||
|     uint swizzle_table[]; |     uint swizzle_table[]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { | ||||||
|     uint astc_data[]; |     uint astc_data[]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -104,7 +102,7 @@ layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BY | |||||||
|     uint REPLICATE_BYTE_TO_16_TABLE[]; |     uint REPLICATE_BYTE_TO_16_TABLE[]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2D dest_image; | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; | ||||||
|  |  | ||||||
| const uint GOB_SIZE_X = 64; | const uint GOB_SIZE_X = 64; | ||||||
| const uint GOB_SIZE_Y = 8; | const uint GOB_SIZE_Y = 8; | ||||||
| @@ -1086,10 +1084,9 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { | |||||||
| void FillError(ivec3 coord) { | void FillError(ivec3 coord) { | ||||||
|     for (uint j = 0; j < block_dims.y; j++) { |     for (uint j = 0; j < block_dims.y; j++) { | ||||||
|         for (uint i = 0; i < block_dims.x; i++) { |         for (uint i = 0; i < block_dims.x; i++) { | ||||||
|             imageStore(dest_image, coord.xy + ivec2(i, j), vec4(1.0, 1.0, 0.0, 1.0)); |             imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     return; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void FillVoidExtentLDR(ivec3 coord, uint block_index) { | void FillVoidExtentLDR(ivec3 coord, uint block_index) { | ||||||
| @@ -1107,7 +1104,7 @@ void FillVoidExtentLDR(ivec3 coord, uint block_index) { | |||||||
|     float b = float(b_u) / 65535.0f; |     float b = float(b_u) / 65535.0f; | ||||||
|     for (uint j = 0; j < block_dims.y; j++) { |     for (uint j = 0; j < block_dims.y; j++) { | ||||||
|         for (uint i = 0; i < block_dims.x; i++) { |         for (uint i = 0; i < block_dims.x; i++) { | ||||||
|             imageStore(dest_image, coord.xy + ivec2(i, j), vec4(r, g, b, a)); |             imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -1264,7 +1261,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||||||
|             } |             } | ||||||
|             vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); |             vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); | ||||||
|             p = (Cf / 65535.0); |             p = (Cf / 65535.0); | ||||||
|             imageStore(dest_image, coord.xy + ivec2(i, j), p.gbar); |             imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -1279,7 +1276,7 @@ void main() { | |||||||
|     const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |     const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||||||
|  |  | ||||||
|     uint offset = 0; |     uint offset = 0; | ||||||
|     offset += layer * layer_stride; |     offset += pos.z * layer_stride; | ||||||
|     offset += (block_y >> block_height) * block_size; |     offset += (block_y >> block_height) * block_size; | ||||||
|     offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; |     offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||||||
|     offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |     offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||||||
| @@ -1287,7 +1284,7 @@ void main() { | |||||||
|  |  | ||||||
|     const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); |     const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); | ||||||
|     uint block_index = |     uint block_index = | ||||||
|         layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; |         pos.z * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | ||||||
|     current_index = 0; |     current_index = 0; | ||||||
|     bitsread = 0; |     bitsread = 0; | ||||||
|     for (int i = 0; i < 16; i++) { |     for (int i = 0; i < 16; i++) { | ||||||
|   | |||||||
| @@ -110,7 +110,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||||
|     static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; |     static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; | ||||||
|     static constexpr GLuint LOC_BLOCK_DIMS = 1; |     static constexpr GLuint LOC_BLOCK_DIMS = 1; | ||||||
|     static constexpr GLuint LOC_LAYER = 2; |  | ||||||
|  |  | ||||||
|     const Extent3D tile_size = { |     const Extent3D tile_size = { | ||||||
|         VideoCore::Surface::DefaultBlockWidth(image.info.format), |         VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||||||
| @@ -130,35 +129,31 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||||||
|  |  | ||||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||||
|     glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); |     glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); | ||||||
|  |  | ||||||
|     for (u32 layer = 0; layer < image.info.resources.layers; layer++) { |  | ||||||
|     for (const SwizzleParameters& swizzle : swizzles) { |     for (const SwizzleParameters& swizzle : swizzles) { | ||||||
|             glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_FALSE, |         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, | ||||||
|                                layer, GL_WRITE_ONLY, GL_RGBA8); |                            GL_WRITE_ONLY, GL_RGBA8); | ||||||
|         const size_t input_offset = swizzle.buffer_offset + map.offset; |         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||||
|         const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); |         const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||||||
|         const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); |         const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||||||
|  |  | ||||||
|         glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); |         glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); | ||||||
|             glUniform1ui(LOC_LAYER, layer); |  | ||||||
|  |  | ||||||
|         // To unswizzle the ASTC data |         // To unswizzle the ASTC data | ||||||
|         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||||||
|             glUniform3uiv(3, 1, params.origin.data()); |         glUniform3uiv(2, 1, params.origin.data()); | ||||||
|             glUniform3iv(4, 1, params.destination.data()); |         glUniform3iv(3, 1, params.destination.data()); | ||||||
|             glUniform1ui(5, params.bytes_per_block_log2); |         glUniform1ui(4, params.bytes_per_block_log2); | ||||||
|             glUniform1ui(6, params.layer_stride); |         glUniform1ui(5, params.layer_stride); | ||||||
|             glUniform1ui(7, params.block_size); |         glUniform1ui(6, params.block_size); | ||||||
|             glUniform1ui(8, params.x_shift); |         glUniform1ui(7, params.x_shift); | ||||||
|             glUniform1ui(9, params.block_height); |         glUniform1ui(8, params.block_height); | ||||||
|             glUniform1ui(10, params.block_height_mask); |         glUniform1ui(9, params.block_height_mask); | ||||||
|  |  | ||||||
|         // ASTC texture data |         // ASTC texture data | ||||||
|             glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, |         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, | ||||||
|                               input_offset, image.guest_size_bytes - swizzle.buffer_offset); |                           image.guest_size_bytes - swizzle.buffer_offset); | ||||||
|  |  | ||||||
|             glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); |         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     program_manager.RestoreGuestCompute(); |     program_manager.RestoreGuestCompute(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -31,6 +31,7 @@ namespace Vulkan { | |||||||
|  |  | ||||||
| using Tegra::Texture::SWIZZLE_TABLE; | using Tegra::Texture::SWIZZLE_TABLE; | ||||||
| using Tegra::Texture::ASTC::EncodingsValues; | using Tegra::Texture::ASTC::EncodingsValues; | ||||||
|  | using namespace Tegra::Texture::ASTC; | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
| @@ -214,7 +215,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||||||
| struct AstcPushConstants { | struct AstcPushConstants { | ||||||
|     std::array<u32, 2> num_image_blocks; |     std::array<u32, 2> num_image_blocks; | ||||||
|     std::array<u32, 2> blocks_dims; |     std::array<u32, 2> blocks_dims; | ||||||
|     u32 layer; |  | ||||||
|     VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; |     VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -226,6 +226,7 @@ struct AstcBufferData { | |||||||
|     decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; |     decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||||||
|     decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; |     decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||||||
| } constexpr ASTC_BUFFER_DATA; | } constexpr ASTC_BUFFER_DATA; | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | ||||||
| @@ -403,7 +404,6 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|     return {staging.buffer, staging.offset}; |     return {staging.buffer, staging.offset}; | ||||||
| } | } | ||||||
|  |  | ||||||
| using namespace Tegra::Texture::ASTC; |  | ||||||
| ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ||||||
|                                  VKDescriptorPool& descriptor_pool_, |                                  VKDescriptorPool& descriptor_pool_, | ||||||
|                                  StagingBufferPool& staging_buffer_pool_, |                                  StagingBufferPool& staging_buffer_pool_, | ||||||
| @@ -464,63 +464,20 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||||||
|     if (!data_buffer) { |     if (!data_buffer) { | ||||||
|         MakeDataBuffer(); |         MakeDataBuffer(); | ||||||
|     } |     } | ||||||
|     const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; |     const VkImageAspectFlags aspect_mask = image.AspectMask(); | ||||||
|     for (s32 layer = 0; layer < image.info.resources.layers; layer++) { |     const VkImage vk_image = image.Handle(); | ||||||
|         for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { |     const bool is_initialized = image.ExchangeInitialization(); | ||||||
|             const size_t input_offset = swizzle.buffer_offset + map.offset; |     scheduler.Record([vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { | ||||||
|             const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); |  | ||||||
|             const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); |  | ||||||
|             const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; |  | ||||||
|             const u32 layer_image_size = |  | ||||||
|                 image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); |  | ||||||
|  |  | ||||||
|             update_descriptor_queue.Acquire(); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, swizzle_table_buffer), |  | ||||||
|                                               sizeof(AstcBufferData::swizzle_table_buffer)); |  | ||||||
|             update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, encoding_values), |  | ||||||
|                                               sizeof(AstcBufferData::encoding_values)); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, replicate_6_to_8), |  | ||||||
|                                               sizeof(AstcBufferData::replicate_6_to_8)); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, replicate_7_to_8), |  | ||||||
|                                               sizeof(AstcBufferData::replicate_7_to_8)); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, replicate_8_to_8), |  | ||||||
|                                               sizeof(AstcBufferData::replicate_8_to_8)); |  | ||||||
|             update_descriptor_queue.AddBuffer(*data_buffer, |  | ||||||
|                                               offsetof(AstcBufferData, replicate_byte_to_16), |  | ||||||
|                                               sizeof(AstcBufferData::replicate_byte_to_16)); |  | ||||||
|             update_descriptor_queue.AddImage(image.StorageImageView()); |  | ||||||
|  |  | ||||||
|             const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |  | ||||||
|             // To unswizzle the ASTC data |  | ||||||
|             const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |  | ||||||
|             scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = map.buffer, |  | ||||||
|                               num_dispatches_x, num_dispatches_y, layer_image_size, |  | ||||||
|                               num_image_blocks, block_dims, layer, params, set, |  | ||||||
|                               image = image.Handle(), input_offset, |  | ||||||
|                               aspect_mask = image.AspectMask()](vk::CommandBuffer cmdbuf) { |  | ||||||
|                 const AstcPushConstants uniforms{num_image_blocks, block_dims, layer, params}; |  | ||||||
|  |  | ||||||
|                 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |  | ||||||
|                 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |  | ||||||
|                 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); |  | ||||||
|                 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, 1); |  | ||||||
|  |  | ||||||
|         const VkImageMemoryBarrier image_barrier{ |         const VkImageMemoryBarrier image_barrier{ | ||||||
|             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|             .pNext = nullptr, |             .pNext = nullptr, | ||||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|                     .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, |             .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|                     .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, |             .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||||||
|             .newLayout = VK_IMAGE_LAYOUT_GENERAL, |             .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|                     .image = image, |             .image = vk_image, | ||||||
|             .subresourceRange{ |             .subresourceRange{ | ||||||
|                 .aspectMask = aspect_mask, |                 .aspectMask = aspect_mask, | ||||||
|                 .baseMipLevel = 0, |                 .baseMipLevel = 0, | ||||||
| @@ -529,11 +486,72 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||||||
|                 .layerCount = VK_REMAINING_ARRAY_LAYERS, |                 .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|             }, |             }, | ||||||
|         }; |         }; | ||||||
|                 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |         cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||||||
|                                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); |     }); | ||||||
|  |     const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; | ||||||
|  |     for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | ||||||
|  |         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||||
|  |         const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||||||
|  |         const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||||||
|  |         const u32 num_dispatches_z = image.info.resources.layers; | ||||||
|  |         const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; | ||||||
|  |         const u32 layer_image_size = | ||||||
|  |             image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); | ||||||
|  |  | ||||||
|  |         update_descriptor_queue.Acquire(); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, | ||||||
|  |                                           offsetof(AstcBufferData, swizzle_table_buffer), | ||||||
|  |                                           sizeof(AstcBufferData::swizzle_table_buffer)); | ||||||
|  |         update_descriptor_queue.AddBuffer(map.buffer, input_offset, layer_image_size); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), | ||||||
|  |                                           sizeof(AstcBufferData::encoding_values)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_6_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_7_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_8_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, | ||||||
|  |                                           offsetof(AstcBufferData, replicate_byte_to_16), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_byte_to_16)); | ||||||
|  |         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | ||||||
|  |  | ||||||
|  |         const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||||||
|  |         const VkPipelineLayout vk_layout = *layout; | ||||||
|  |         const VkPipeline vk_pipeline = *pipeline; | ||||||
|  |         // To unswizzle the ASTC data | ||||||
|  |         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||||||
|  |         scheduler.Record([vk_layout, vk_pipeline, buffer = map.buffer, num_dispatches_x, | ||||||
|  |                           num_dispatches_y, num_dispatches_z, num_image_blocks, block_dims, params, | ||||||
|  |                           set, input_offset](vk::CommandBuffer cmdbuf) { | ||||||
|  |             const AstcPushConstants uniforms{num_image_blocks, block_dims, params}; | ||||||
|  |             cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); | ||||||
|  |             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); | ||||||
|  |             cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||||||
|  |             cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|     } |     scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) { | ||||||
|  |         const VkImageMemoryBarrier image_barrier{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |             .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |             .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |             .image = vk_image, | ||||||
|  |             .subresourceRange{ | ||||||
|  |                 .aspectMask = aspect_mask, | ||||||
|  |                 .baseMipLevel = 0, | ||||||
|  |                 .levelCount = VK_REMAINING_MIP_LEVELS, | ||||||
|  |                 .baseArrayLayer = 0, | ||||||
|  |                 .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||||||
|  |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace Vulkan | } // namespace Vulkan | ||||||
|   | |||||||
| @@ -823,12 +823,15 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||||||
|         .usage = VK_IMAGE_USAGE_STORAGE_BIT, |         .usage = VK_IMAGE_USAGE_STORAGE_BIT, | ||||||
|     }; |     }; | ||||||
|     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | ||||||
|         storage_image_view = runtime.device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |         const auto& device = runtime.device.GetLogical(); | ||||||
|  |         storage_image_views.reserve(info.resources.levels); | ||||||
|  |         for (s32 level = 0; level < info.resources.levels; ++level) { | ||||||
|  |             storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ | ||||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |                 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||||||
|                 .pNext = &storage_image_view_usage_create_info, |                 .pNext = &storage_image_view_usage_create_info, | ||||||
|                 .flags = 0, |                 .flags = 0, | ||||||
|                 .image = *image, |                 .image = *image, | ||||||
|             .viewType = VK_IMAGE_VIEW_TYPE_2D, |                 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, | ||||||
|                 .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, |                 .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | ||||||
|                 .components{ |                 .components{ | ||||||
|                     .r = VK_COMPONENT_SWIZZLE_IDENTITY, |                     .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||||||
| @@ -838,12 +841,13 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||||||
|                 }, |                 }, | ||||||
|                 .subresourceRange{ |                 .subresourceRange{ | ||||||
|                     .aspectMask = aspect_mask, |                     .aspectMask = aspect_mask, | ||||||
|                 .baseMipLevel = 0, |                     .baseMipLevel = static_cast<u32>(level), | ||||||
|                 .levelCount = VK_REMAINING_MIP_LEVELS, |                     .levelCount = 1, | ||||||
|                     .baseArrayLayer = 0, |                     .baseArrayLayer = 0, | ||||||
|                     .layerCount = VK_REMAINING_ARRAY_LAYERS, |                     .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|                 }, |                 }, | ||||||
|         }); |             })); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -121,12 +121,17 @@ public: | |||||||
|         return *buffer; |         return *buffer; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { |     [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { | ||||||
|         return aspect_mask; |         return aspect_mask; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     [[nodiscard]] VkImageView StorageImageView() const noexcept { |     [[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept { | ||||||
|         return *storage_image_view; |         return *storage_image_views[level]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns true when the image is already initialized and mark it as initialized | ||||||
|  |     [[nodiscard]] bool ExchangeInitialization() noexcept { | ||||||
|  |         return std::exchange(initialized, true); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
| @@ -135,7 +140,7 @@ private: | |||||||
|     vk::Buffer buffer; |     vk::Buffer buffer; | ||||||
|     MemoryCommit commit; |     MemoryCommit commit; | ||||||
|     vk::ImageView image_view; |     vk::ImageView image_view; | ||||||
|     vk::ImageView storage_image_view; |     std::vector<vk::ImageView> storage_image_views; | ||||||
|     VkImageAspectFlags aspect_mask = 0; |     VkImageAspectFlags aspect_mask = 0; | ||||||
|     bool initialized = false; |     bool initialized = false; | ||||||
| }; | }; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user