Merge pull request #5927 from ameerj/astc-compute
video_core: Accelerate ASTC texture decoding using compute shaders
This commit is contained in:
		| @@ -236,7 +236,6 @@ add_library(video_core STATIC | |||||||
|     texture_cache/types.h |     texture_cache/types.h | ||||||
|     texture_cache/util.cpp |     texture_cache/util.cpp | ||||||
|     texture_cache/util.h |     texture_cache/util.h | ||||||
|     textures/astc.cpp |  | ||||||
|     textures/astc.h |     textures/astc.h | ||||||
|     textures/decoders.cpp |     textures/decoders.cpp | ||||||
|     textures/decoders.h |     textures/decoders.h | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| set(SHADER_FILES | set(SHADER_FILES | ||||||
|  |     astc_decoder.comp | ||||||
|     block_linear_unswizzle_2d.comp |     block_linear_unswizzle_2d.comp | ||||||
|     block_linear_unswizzle_3d.comp |     block_linear_unswizzle_3d.comp | ||||||
|     convert_depth_to_float.frag |     convert_depth_to_float.frag | ||||||
|   | |||||||
| @@ -6,7 +6,27 @@ get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME) | |||||||
| string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME}) | string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME}) | ||||||
| string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) | string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) | ||||||
|  |  | ||||||
| file(READ ${SOURCE_FILE} CONTENTS) | FILE(READ ${SOURCE_FILE} line_contents) | ||||||
|  |  | ||||||
|  | # Replace double quotes with single quotes, | ||||||
|  | # as double quotes will be used to wrap the lines | ||||||
|  | STRING(REGEX REPLACE "\"" "'" line_contents "${line_contents}") | ||||||
|  |  | ||||||
|  | # CMake separates list elements with semicolons, but semicolons | ||||||
|  | # are used extensively in the shader code. | ||||||
|  | # Replace with a temporary marker, to be reverted later. | ||||||
|  | STRING(REGEX REPLACE ";" "{{SEMICOLON}}" line_contents "${line_contents}") | ||||||
|  |  | ||||||
|  | # Make every line an individual element in the CMake list. | ||||||
|  | STRING(REGEX REPLACE "\n" ";" line_contents "${line_contents}") | ||||||
|  |  | ||||||
|  | # Build the shader string, wrapping each line in double quotes. | ||||||
|  | foreach(line IN LISTS line_contents) | ||||||
|  |     string(CONCAT CONTENTS "${CONTENTS}" \"${line}\\n\"\n) | ||||||
|  | endforeach() | ||||||
|  |  | ||||||
|  | # Revert the original semicolons in the source. | ||||||
|  | STRING(REGEX REPLACE "{{SEMICOLON}}" ";" CONTENTS "${CONTENTS}") | ||||||
|  |  | ||||||
| get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY) | get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY) | ||||||
| make_directory(${OUTPUT_DIR}) | make_directory(${OUTPUT_DIR}) | ||||||
|   | |||||||
							
								
								
									
										1339
									
								
								src/video_core/host_shaders/astc_decoder.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1339
									
								
								src/video_core/host_shaders/astc_decoder.comp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -4,6 +4,8 @@ | |||||||
|  |  | ||||||
| namespace HostShaders { | namespace HostShaders { | ||||||
|  |  | ||||||
| constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)"; | constexpr std::string_view @CONTENTS_NAME@ = { | ||||||
|  | @CONTENTS@ | ||||||
|  | }; | ||||||
|  |  | ||||||
| } // namespace HostShaders | } // namespace HostShaders | ||||||
|   | |||||||
| @@ -307,7 +307,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||||||
|  |  | ||||||
| [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | ||||||
|                                     const VideoCommon::ImageInfo& info) { |                                     const VideoCommon::ImageInfo& info) { | ||||||
|     // Disable accelerated uploads for now as they don't implement swizzled uploads |     return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format); | ||||||
|  |     // Disable other accelerated uploads for now as they don't implement swizzled uploads | ||||||
|     return false; |     return false; | ||||||
|     switch (info.type) { |     switch (info.type) { | ||||||
|     case ImageType::e2D: |     case ImageType::e2D: | ||||||
| @@ -569,7 +570,11 @@ void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferM | |||||||
|                                                 std::span<const SwizzleParameters> swizzles) { |                                                 std::span<const SwizzleParameters> swizzles) { | ||||||
|     switch (image.info.type) { |     switch (image.info.type) { | ||||||
|     case ImageType::e2D: |     case ImageType::e2D: | ||||||
|         return util_shaders.BlockLinearUpload2D(image, map, swizzles); |         if (IsPixelFormatASTC(image.info.format)) { | ||||||
|  |             return util_shaders.ASTCDecode(image, map, swizzles); | ||||||
|  |         } else { | ||||||
|  |             return util_shaders.BlockLinearUpload2D(image, map, swizzles); | ||||||
|  |         } | ||||||
|     case ImageType::e3D: |     case ImageType::e3D: | ||||||
|         return util_shaders.BlockLinearUpload3D(image, map, swizzles); |         return util_shaders.BlockLinearUpload3D(image, map, swizzles); | ||||||
|     case ImageType::Linear: |     case ImageType::Linear: | ||||||
| @@ -599,6 +604,10 @@ FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | bool TextureCacheRuntime::HasNativeASTC() const noexcept { | ||||||
|  |     return device.HasASTC(); | ||||||
|  | } | ||||||
|  |  | ||||||
| TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | ||||||
|     : storage_flags{storage_flags_}, map_flags{map_flags_} {} |     : storage_flags{storage_flags_}, map_flags{map_flags_} {} | ||||||
|  |  | ||||||
|   | |||||||
| @@ -95,6 +95,8 @@ public: | |||||||
|         return has_broken_texture_view_formats; |         return has_broken_texture_view_formats; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool HasNativeASTC() const noexcept; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     struct StagingBuffers { |     struct StagingBuffers { | ||||||
|         explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); |         explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||||||
|   | |||||||
| @@ -2,7 +2,6 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #include <bit> |  | ||||||
| #include <span> | #include <span> | ||||||
| #include <string_view> | #include <string_view> | ||||||
|  |  | ||||||
| @@ -11,6 +10,7 @@ | |||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/div_ceil.h" | #include "common/div_ceil.h" | ||||||
|  | #include "video_core/host_shaders/astc_decoder_comp.h" | ||||||
| #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||||||
| #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||||||
| #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||||||
| @@ -20,16 +20,18 @@ | |||||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||||
| #include "video_core/renderer_opengl/util_shaders.h" | #include "video_core/renderer_opengl/util_shaders.h" | ||||||
| #include "video_core/surface.h" |  | ||||||
| #include "video_core/texture_cache/accelerated_swizzle.h" | #include "video_core/texture_cache/accelerated_swizzle.h" | ||||||
| #include "video_core/texture_cache/types.h" | #include "video_core/texture_cache/types.h" | ||||||
| #include "video_core/texture_cache/util.h" | #include "video_core/texture_cache/util.h" | ||||||
|  | #include "video_core/textures/astc.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
|  |  | ||||||
| using namespace HostShaders; | using namespace HostShaders; | ||||||
|  | using namespace Tegra::Texture::ASTC; | ||||||
|  |  | ||||||
|  | using VideoCommon::Extent2D; | ||||||
| using VideoCommon::Extent3D; | using VideoCommon::Extent3D; | ||||||
| using VideoCommon::ImageCopy; | using VideoCommon::ImageCopy; | ||||||
| using VideoCommon::ImageType; | using VideoCommon::ImageType; | ||||||
| @@ -57,7 +59,7 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { | |||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| UtilShaders::UtilShaders(ProgramManager& program_manager_) | UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||||||
|     : program_manager{program_manager_}, |     : program_manager{program_manager_}, astc_decoder_program(MakeProgram(ASTC_DECODER_COMP)), | ||||||
|       block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), |       block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | ||||||
|       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), |       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||||||
|       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), |       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||||||
| @@ -65,11 +67,79 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||||||
|       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { |       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | ||||||
|     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); |     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||||||
|     swizzle_table_buffer.Create(); |     swizzle_table_buffer.Create(); | ||||||
|  |     astc_buffer.Create(); | ||||||
|     glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); |     glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | ||||||
|  |     glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
| UtilShaders::~UtilShaders() = default; | UtilShaders::~UtilShaders() = default; | ||||||
|  |  | ||||||
|  | void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | ||||||
|  |                              std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||||||
|  |     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||||||
|  |     static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||||||
|  |     static constexpr GLuint BINDING_ENC_BUFFER = 2; | ||||||
|  |  | ||||||
|  |     static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; | ||||||
|  |     static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; | ||||||
|  |     static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; | ||||||
|  |     static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; | ||||||
|  |  | ||||||
|  |     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||||
|  |  | ||||||
|  |     const Extent2D tile_size{ | ||||||
|  |         .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||||||
|  |         .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||||||
|  |     }; | ||||||
|  |     program_manager.BindHostCompute(astc_decoder_program.handle); | ||||||
|  |     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||||
|  |     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle, | ||||||
|  |                       offsetof(AstcBufferData, encoding_values), | ||||||
|  |                       sizeof(AstcBufferData::encoding_values)); | ||||||
|  |     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle, | ||||||
|  |                       offsetof(AstcBufferData, replicate_6_to_8), | ||||||
|  |                       sizeof(AstcBufferData::replicate_6_to_8)); | ||||||
|  |     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle, | ||||||
|  |                       offsetof(AstcBufferData, replicate_7_to_8), | ||||||
|  |                       sizeof(AstcBufferData::replicate_7_to_8)); | ||||||
|  |     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, | ||||||
|  |                       offsetof(AstcBufferData, replicate_8_to_8), | ||||||
|  |                       sizeof(AstcBufferData::replicate_8_to_8)); | ||||||
|  |     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, | ||||||
|  |                       offsetof(AstcBufferData, replicate_byte_to_16), | ||||||
|  |                       sizeof(AstcBufferData::replicate_byte_to_16)); | ||||||
|  |  | ||||||
|  |     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||||
|  |     glUniform2ui(1, tile_size.width, tile_size.height); | ||||||
|  |     // Ensure buffer data is valid before dispatching | ||||||
|  |     glFlush(); | ||||||
|  |     for (const SwizzleParameters& swizzle : swizzles) { | ||||||
|  |         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||||
|  |         const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||||||
|  |         const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||||||
|  |  | ||||||
|  |         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||||||
|  |         ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | ||||||
|  |         ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | ||||||
|  |  | ||||||
|  |         glUniform1ui(2, params.bytes_per_block_log2); | ||||||
|  |         glUniform1ui(3, params.layer_stride); | ||||||
|  |         glUniform1ui(4, params.block_size); | ||||||
|  |         glUniform1ui(5, params.x_shift); | ||||||
|  |         glUniform1ui(6, params.block_height); | ||||||
|  |         glUniform1ui(7, params.block_height_mask); | ||||||
|  |  | ||||||
|  |         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, | ||||||
|  |                            GL_WRITE_ONLY, GL_RGBA8); | ||||||
|  |         // ASTC texture data | ||||||
|  |         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, | ||||||
|  |                           image.guest_size_bytes - swizzle.buffer_offset); | ||||||
|  |  | ||||||
|  |         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||||||
|  |     } | ||||||
|  |     program_manager.RestoreGuestCompute(); | ||||||
|  | } | ||||||
|  |  | ||||||
| void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | ||||||
|                                       std::span<const SwizzleParameters> swizzles) { |                                       std::span<const SwizzleParameters> swizzles) { | ||||||
|     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||||||
|   | |||||||
| @@ -40,6 +40,9 @@ public: | |||||||
|     explicit UtilShaders(ProgramManager& program_manager); |     explicit UtilShaders(ProgramManager& program_manager); | ||||||
|     ~UtilShaders(); |     ~UtilShaders(); | ||||||
|  |  | ||||||
|  |     void ASTCDecode(Image& image, const ImageBufferMap& map, | ||||||
|  |                     std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||||
|  |  | ||||||
|     void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |     void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | ||||||
|                              std::span<const VideoCommon::SwizzleParameters> swizzles); |                              std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||||
|  |  | ||||||
| @@ -59,7 +62,9 @@ private: | |||||||
|     ProgramManager& program_manager; |     ProgramManager& program_manager; | ||||||
|  |  | ||||||
|     OGLBuffer swizzle_table_buffer; |     OGLBuffer swizzle_table_buffer; | ||||||
|  |     OGLBuffer astc_buffer; | ||||||
|  |  | ||||||
|  |     OGLProgram astc_decoder_program; | ||||||
|     OGLProgram block_linear_unswizzle_2d_program; |     OGLProgram block_linear_unswizzle_2d_program; | ||||||
|     OGLProgram block_linear_unswizzle_3d_program; |     OGLProgram block_linear_unswizzle_3d_program; | ||||||
|     OGLProgram pitch_unswizzle_program; |     OGLProgram pitch_unswizzle_program; | ||||||
|   | |||||||
| @@ -166,7 +166,7 @@ struct FormatTuple { | |||||||
|     {VK_FORMAT_R16G16_SINT, Attachable | Storage},             // R16G16_SINT |     {VK_FORMAT_R16G16_SINT, Attachable | Storage},             // R16G16_SINT | ||||||
|     {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM |     {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM | ||||||
|     {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT |     {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT | ||||||
|     {VK_FORMAT_R8G8B8A8_SRGB, Attachable},                     // A8B8G8R8_SRGB |     {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable},              // A8B8G8R8_SRGB | ||||||
|     {VK_FORMAT_R8G8_UNORM, Attachable | Storage},              // R8G8_UNORM |     {VK_FORMAT_R8G8_UNORM, Attachable | Storage},              // R8G8_UNORM | ||||||
|     {VK_FORMAT_R8G8_SNORM, Attachable | Storage},              // R8G8_SNORM |     {VK_FORMAT_R8G8_SNORM, Attachable | Storage},              // R8G8_SNORM | ||||||
|     {VK_FORMAT_R8G8_SINT, Attachable | Storage},               // R8G8_SINT |     {VK_FORMAT_R8G8_SINT, Attachable | Storage},               // R8G8_SINT | ||||||
|   | |||||||
| @@ -11,18 +11,39 @@ | |||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/div_ceil.h" | #include "common/div_ceil.h" | ||||||
|  | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||||||
| #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||||||
| #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
|  | #include "video_core/texture_cache/accelerated_swizzle.h" | ||||||
|  | #include "video_core/texture_cache/types.h" | ||||||
|  | #include "video_core/textures/astc.h" | ||||||
|  | #include "video_core/textures/decoders.h" | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  |  | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  |  | ||||||
|  | using Tegra::Texture::SWIZZLE_TABLE; | ||||||
|  | using Tegra::Texture::ASTC::EncodingsValues; | ||||||
|  | using namespace Tegra::Texture::ASTC; | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
|  | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; | ||||||
|  | constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; | ||||||
|  | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; | ||||||
|  | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; | ||||||
|  | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; | ||||||
|  | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; | ||||||
|  | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; | ||||||
|  | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; | ||||||
|  |  | ||||||
| VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | ||||||
|     return { |     return { | ||||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
| @@ -50,6 +71,67 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding | |||||||
|     }}; |     }}; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | ||||||
|  |     return {{ | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_INPUT_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_ENC_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_6_TO_8_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_7_TO_8_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_8_TO_8_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_SWIZZLE_BUFFER, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .binding = ASTC_BINDING_OUTPUT_IMAGE, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }, | ||||||
|  |     }}; | ||||||
|  | } | ||||||
|  |  | ||||||
| VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | ||||||
|     return { |     return { | ||||||
|         .dstBinding = 0, |         .dstBinding = 0, | ||||||
| @@ -61,6 +143,94 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||||||
|     }; |     }; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { | ||||||
|  |     return {{ | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_INPUT_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_ENC_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_6_TO_8_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_7_TO_8_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_8_TO_8_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||||
|  |             .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||||||
|  |             .offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry), | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }, | ||||||
|  |     }}; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct AstcPushConstants { | ||||||
|  |     std::array<u32, 2> blocks_dims; | ||||||
|  |     u32 bytes_per_block_log2; | ||||||
|  |     u32 layer_stride; | ||||||
|  |     u32 block_size; | ||||||
|  |     u32 x_shift; | ||||||
|  |     u32 block_height; | ||||||
|  |     u32 block_height_mask; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct AstcBufferData { | ||||||
|  |     decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; | ||||||
|  |     decltype(EncodingsValues) encoding_values = EncodingsValues; | ||||||
|  |     decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||||||
|  | } constexpr ASTC_BUFFER_DATA; | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | ||||||
| @@ -238,4 +408,167 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|     return {staging.buffer, staging.offset}; |     return {staging.buffer, staging.offset}; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ||||||
|  |                                  VKDescriptorPool& descriptor_pool_, | ||||||
|  |                                  StagingBufferPool& staging_buffer_pool_, | ||||||
|  |                                  VKUpdateDescriptorQueue& update_descriptor_queue_, | ||||||
|  |                                  MemoryAllocator& memory_allocator_) | ||||||
|  |     : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), | ||||||
|  |                     BuildASTCPassDescriptorUpdateTemplateEntry(), | ||||||
|  |                     BuildComputePushConstantRange(sizeof(AstcPushConstants)), | ||||||
|  |                     ASTC_DECODER_COMP_SPV), | ||||||
|  |       device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||||||
|  |       update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} | ||||||
|  |  | ||||||
|  | ASTCDecoderPass::~ASTCDecoderPass() = default; | ||||||
|  |  | ||||||
|  | void ASTCDecoderPass::MakeDataBuffer() { | ||||||
|  |     constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE); | ||||||
|  |     data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||||
|  |         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||||
|  |         .pNext = nullptr, | ||||||
|  |         .flags = 0, | ||||||
|  |         .size = TOTAL_BUFFER_SIZE, | ||||||
|  |         .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||||
|  |         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||||
|  |         .queueFamilyIndexCount = 0, | ||||||
|  |         .pQueueFamilyIndices = nullptr, | ||||||
|  |     }); | ||||||
|  |     data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | ||||||
|  |  | ||||||
|  |     const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); | ||||||
|  |     std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); | ||||||
|  |     // Tack on the swizzle table at the end of the buffer | ||||||
|  |     std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE, | ||||||
|  |                 sizeof(SWIZZLE_TABLE)); | ||||||
|  |  | ||||||
|  |     scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, | ||||||
|  |                       TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { | ||||||
|  |         cmdbuf.CopyBuffer(src, dst, | ||||||
|  |                           VkBufferCopy{ | ||||||
|  |                               .srcOffset = offset, | ||||||
|  |                               .dstOffset = 0, | ||||||
|  |                               .size = TOTAL_BUFFER_SIZE, | ||||||
|  |                           }); | ||||||
|  |         cmdbuf.PipelineBarrier( | ||||||
|  |             VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, | ||||||
|  |             VkMemoryBarrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = 0, | ||||||
|  |                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |             }); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | ||||||
|  |                                std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||||||
|  |     using namespace VideoCommon::Accelerated; | ||||||
|  |     const std::array<u32, 2> block_dims{ | ||||||
|  |         VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||||||
|  |         VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||||||
|  |     }; | ||||||
|  |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |     if (!data_buffer) { | ||||||
|  |         MakeDataBuffer(); | ||||||
|  |     } | ||||||
|  |     const VkPipeline vk_pipeline = *pipeline; | ||||||
|  |     const VkImageAspectFlags aspect_mask = image.AspectMask(); | ||||||
|  |     const VkImage vk_image = image.Handle(); | ||||||
|  |     const bool is_initialized = image.ExchangeInitialization(); | ||||||
|  |     scheduler.Record( | ||||||
|  |         [vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { | ||||||
|  |             const VkImageMemoryBarrier image_barrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |                 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |                 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||||||
|  |                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .image = vk_image, | ||||||
|  |                 .subresourceRange{ | ||||||
|  |                     .aspectMask = aspect_mask, | ||||||
|  |                     .baseMipLevel = 0, | ||||||
|  |                     .levelCount = VK_REMAINING_MIP_LEVELS, | ||||||
|  |                     .baseArrayLayer = 0, | ||||||
|  |                     .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|  |                 }, | ||||||
|  |             }; | ||||||
|  |             cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : 0, | ||||||
|  |                                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||||||
|  |             cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); | ||||||
|  |         }); | ||||||
|  |     for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | ||||||
|  |         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||||
|  |         const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||||||
|  |         const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||||||
|  |         const u32 num_dispatches_z = image.info.resources.layers; | ||||||
|  |  | ||||||
|  |         update_descriptor_queue.Acquire(); | ||||||
|  |         update_descriptor_queue.AddBuffer(map.buffer, input_offset, | ||||||
|  |                                           image.guest_size_bytes - swizzle.buffer_offset); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), | ||||||
|  |                                           sizeof(AstcBufferData::encoding_values)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_6_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_7_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_8_to_8)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, | ||||||
|  |                                           offsetof(AstcBufferData, replicate_byte_to_16), | ||||||
|  |                                           sizeof(AstcBufferData::replicate_byte_to_16)); | ||||||
|  |         update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), | ||||||
|  |                                           sizeof(SWIZZLE_TABLE)); | ||||||
|  |         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | ||||||
|  |  | ||||||
|  |         const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||||||
|  |         const VkPipelineLayout vk_layout = *layout; | ||||||
|  |  | ||||||
|  |         // To unswizzle the ASTC data | ||||||
|  |         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||||||
|  |         ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | ||||||
|  |         ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | ||||||
|  |         scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, | ||||||
|  |                           block_dims, params, set](vk::CommandBuffer cmdbuf) { | ||||||
|  |             const AstcPushConstants uniforms{ | ||||||
|  |                 .blocks_dims = block_dims, | ||||||
|  |                 .bytes_per_block_log2 = params.bytes_per_block_log2, | ||||||
|  |                 .layer_stride = params.layer_stride, | ||||||
|  |                 .block_size = params.block_size, | ||||||
|  |                 .x_shift = params.x_shift, | ||||||
|  |                 .block_height = params.block_height, | ||||||
|  |                 .block_height_mask = params.block_height_mask, | ||||||
|  |             }; | ||||||
|  |             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); | ||||||
|  |             cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||||||
|  |             cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |     scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) { | ||||||
|  |         const VkImageMemoryBarrier image_barrier{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |             .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |             .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |             .image = vk_image, | ||||||
|  |             .subresourceRange{ | ||||||
|  |                 .aspectMask = aspect_mask, | ||||||
|  |                 .baseMipLevel = 0, | ||||||
|  |                 .levelCount = VK_REMAINING_MIP_LEVELS, | ||||||
|  |                 .baseArrayLayer = 0, | ||||||
|  |                 .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||||
|  |                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace Vulkan | } // namespace Vulkan | ||||||
|   | |||||||
| @@ -11,14 +11,21 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
|  | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  |  | ||||||
|  | namespace VideoCommon { | ||||||
|  | struct SwizzleParameters; | ||||||
|  | } | ||||||
|  |  | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  |  | ||||||
| class Device; | class Device; | ||||||
| class StagingBufferPool; | class StagingBufferPool; | ||||||
| class VKScheduler; | class VKScheduler; | ||||||
| class VKUpdateDescriptorQueue; | class VKUpdateDescriptorQueue; | ||||||
|  | class Image; | ||||||
|  | struct StagingBufferRef; | ||||||
|  |  | ||||||
| class VKComputePass { | class VKComputePass { | ||||||
| public: | public: | ||||||
| @@ -77,4 +84,29 @@ private: | |||||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; |     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class ASTCDecoderPass final : public VKComputePass { | ||||||
|  | public: | ||||||
|  |     explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ||||||
|  |                              VKDescriptorPool& descriptor_pool_, | ||||||
|  |                              StagingBufferPool& staging_buffer_pool_, | ||||||
|  |                              VKUpdateDescriptorQueue& update_descriptor_queue_, | ||||||
|  |                              MemoryAllocator& memory_allocator_); | ||||||
|  |     ~ASTCDecoderPass(); | ||||||
|  |  | ||||||
|  |     void Assemble(Image& image, const StagingBufferRef& map, | ||||||
|  |                   std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     void MakeDataBuffer(); | ||||||
|  |  | ||||||
|  |     const Device& device; | ||||||
|  |     VKScheduler& scheduler; | ||||||
|  |     StagingBufferPool& staging_buffer_pool; | ||||||
|  |     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||||
|  |     MemoryAllocator& memory_allocator; | ||||||
|  |  | ||||||
|  |     vk::Buffer data_buffer; | ||||||
|  |     MemoryCommit data_buffer_commit; | ||||||
|  | }; | ||||||
|  |  | ||||||
| } // namespace Vulkan | } // namespace Vulkan | ||||||
|   | |||||||
| @@ -241,7 +241,10 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||||||
|       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | ||||||
|       update_descriptor_queue(device, scheduler), |       update_descriptor_queue(device, scheduler), | ||||||
|       blit_image(device, scheduler, state_tracker, descriptor_pool), |       blit_image(device, scheduler, state_tracker, descriptor_pool), | ||||||
|       texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, |       astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, | ||||||
|  |                         memory_allocator), | ||||||
|  |       texture_cache_runtime{device,       scheduler,  memory_allocator, | ||||||
|  |                             staging_pool, blit_image, astc_decoder_pass}, | ||||||
|       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||||||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||||
|                            update_descriptor_queue, descriptor_pool), |                            update_descriptor_queue, descriptor_pool), | ||||||
|   | |||||||
| @@ -173,6 +173,7 @@ private: | |||||||
|     VKDescriptorPool descriptor_pool; |     VKDescriptorPool descriptor_pool; | ||||||
|     VKUpdateDescriptorQueue update_descriptor_queue; |     VKUpdateDescriptorQueue update_descriptor_queue; | ||||||
|     BlitImageHelper blit_image; |     BlitImageHelper blit_image; | ||||||
|  |     ASTCDecoderPass astc_decoder_pass; | ||||||
|  |  | ||||||
|     GraphicsPipelineCacheKey graphics_key; |     GraphicsPipelineCacheKey graphics_key; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ | |||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/renderer_vulkan/blit_image.h" | #include "video_core/renderer_vulkan/blit_image.h" | ||||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||||
| @@ -807,7 +808,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||||||
|         commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); |         commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||||||
|     } |     } | ||||||
|     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | ||||||
|         flags |= VideoCommon::ImageFlagBits::Converted; |         flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | ||||||
|     } |     } | ||||||
|     if (runtime.device.HasDebuggingToolAttached()) { |     if (runtime.device.HasDebuggingToolAttached()) { | ||||||
|         if (image) { |         if (image) { | ||||||
| @@ -816,6 +817,38 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||||||
|             buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |             buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |     static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ | ||||||
|  |         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | ||||||
|  |         .pNext = nullptr, | ||||||
|  |         .usage = VK_IMAGE_USAGE_STORAGE_BIT, | ||||||
|  |     }; | ||||||
|  |     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | ||||||
|  |         const auto& device = runtime.device.GetLogical(); | ||||||
|  |         storage_image_views.reserve(info.resources.levels); | ||||||
|  |         for (s32 level = 0; level < info.resources.levels; ++level) { | ||||||
|  |             storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||||||
|  |                 .pNext = &storage_image_view_usage_create_info, | ||||||
|  |                 .flags = 0, | ||||||
|  |                 .image = *image, | ||||||
|  |                 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, | ||||||
|  |                 .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | ||||||
|  |                 .components{ | ||||||
|  |                     .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||||||
|  |                     .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||||||
|  |                     .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||||||
|  |                     .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||||||
|  |                 }, | ||||||
|  |                 .subresourceRange{ | ||||||
|  |                     .aspectMask = aspect_mask, | ||||||
|  |                     .baseMipLevel = static_cast<u32>(level), | ||||||
|  |                     .levelCount = 1, | ||||||
|  |                     .baseArrayLayer = 0, | ||||||
|  |                     .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||||
|  |                 }, | ||||||
|  |             })); | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | ||||||
| @@ -918,7 +951,6 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |     const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); | ||||||
|     const VkFormat vk_format = format_info.format; |  | ||||||
|     const VkImageViewUsageCreateInfo image_view_usage{ |     const VkImageViewUsageCreateInfo image_view_usage{ | ||||||
|         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
| @@ -930,7 +962,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||||||
|         .flags = 0, |         .flags = 0, | ||||||
|         .image = image.Handle(), |         .image = image.Handle(), | ||||||
|         .viewType = VkImageViewType{}, |         .viewType = VkImageViewType{}, | ||||||
|         .format = vk_format, |         .format = format_info.format, | ||||||
|         .components{ |         .components{ | ||||||
|             .r = ComponentSwizzle(swizzle[0]), |             .r = ComponentSwizzle(swizzle[0]), | ||||||
|             .g = ComponentSwizzle(swizzle[1]), |             .g = ComponentSwizzle(swizzle[1]), | ||||||
| @@ -982,7 +1014,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||||||
|             .pNext = nullptr, |             .pNext = nullptr, | ||||||
|             .flags = 0, |             .flags = 0, | ||||||
|             .buffer = image.Buffer(), |             .buffer = image.Buffer(), | ||||||
|             .format = vk_format, |             .format = format_info.format, | ||||||
|             .offset = 0, // TODO: Redesign buffer cache to support this |             .offset = 0, // TODO: Redesign buffer cache to support this | ||||||
|             .range = image.guest_size_bytes, |             .range = image.guest_size_bytes, | ||||||
|         }); |         }); | ||||||
| @@ -1167,4 +1199,13 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void TextureCacheRuntime::AccelerateImageUpload( | ||||||
|  |     Image& image, const StagingBufferRef& map, | ||||||
|  |     std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||||||
|  |     if (IsPixelFormatASTC(image.info.format)) { | ||||||
|  |         return astc_decoder_pass.Assemble(image, map, swizzles); | ||||||
|  |     } | ||||||
|  |     UNREACHABLE(); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace Vulkan | } // namespace Vulkan | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ using VideoCommon::Offset2D; | |||||||
| using VideoCommon::RenderTargets; | using VideoCommon::RenderTargets; | ||||||
| using VideoCore::Surface::PixelFormat; | using VideoCore::Surface::PixelFormat; | ||||||
|  |  | ||||||
|  | class ASTCDecoderPass; | ||||||
| class BlitImageHelper; | class BlitImageHelper; | ||||||
| class Device; | class Device; | ||||||
| class Image; | class Image; | ||||||
| @@ -60,6 +61,7 @@ struct TextureCacheRuntime { | |||||||
|     MemoryAllocator& memory_allocator; |     MemoryAllocator& memory_allocator; | ||||||
|     StagingBufferPool& staging_buffer_pool; |     StagingBufferPool& staging_buffer_pool; | ||||||
|     BlitImageHelper& blit_image_helper; |     BlitImageHelper& blit_image_helper; | ||||||
|  |     ASTCDecoderPass& astc_decoder_pass; | ||||||
|     std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; |     std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; | ||||||
|  |  | ||||||
|     void Finish(); |     void Finish(); | ||||||
| @@ -83,9 +85,7 @@ struct TextureCacheRuntime { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     void AccelerateImageUpload(Image&, const StagingBufferRef&, |     void AccelerateImageUpload(Image&, const StagingBufferRef&, | ||||||
|                                std::span<const VideoCommon::SwizzleParameters>) { |                                std::span<const VideoCommon::SwizzleParameters>); | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     void InsertUploadMemoryBarrier() {} |     void InsertUploadMemoryBarrier() {} | ||||||
|  |  | ||||||
| @@ -121,15 +121,26 @@ public: | |||||||
|         return *buffer; |         return *buffer; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { |     [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { | ||||||
|         return aspect_mask; |         return aspect_mask; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     [[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept { | ||||||
|  |         return *storage_image_views[level]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns true when the image is already initialized and mark it as initialized | ||||||
|  |     [[nodiscard]] bool ExchangeInitialization() noexcept { | ||||||
|  |         return std::exchange(initialized, true); | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     VKScheduler* scheduler; |     VKScheduler* scheduler; | ||||||
|     vk::Image image; |     vk::Image image; | ||||||
|     vk::Buffer buffer; |     vk::Buffer buffer; | ||||||
|     MemoryCommit commit; |     MemoryCommit commit; | ||||||
|  |     vk::ImageView image_view; | ||||||
|  |     std::vector<vk::ImageView> storage_image_views; | ||||||
|     VkImageAspectFlags aspect_mask = 0; |     VkImageAspectFlags aspect_mask = 0; | ||||||
|     bool initialized = false; |     bool initialized = false; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -13,8 +13,8 @@ | |||||||
| namespace VideoCommon::Accelerated { | namespace VideoCommon::Accelerated { | ||||||
|  |  | ||||||
| struct BlockLinearSwizzle2DParams { | struct BlockLinearSwizzle2DParams { | ||||||
|     std::array<u32, 3> origin; |     alignas(16) std::array<u32, 3> origin; | ||||||
|     std::array<s32, 3> destination; |     alignas(16) std::array<s32, 3> destination; | ||||||
|     u32 bytes_per_block_log2; |     u32 bytes_per_block_log2; | ||||||
|     u32 layer_stride; |     u32 layer_stride; | ||||||
|     u32 block_size; |     u32 block_size; | ||||||
|   | |||||||
| @@ -47,7 +47,6 @@ | |||||||
| #include "video_core/texture_cache/formatter.h" | #include "video_core/texture_cache/formatter.h" | ||||||
| #include "video_core/texture_cache/samples_helper.h" | #include "video_core/texture_cache/samples_helper.h" | ||||||
| #include "video_core/texture_cache/util.h" | #include "video_core/texture_cache/util.h" | ||||||
| #include "video_core/textures/astc.h" |  | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  |  | ||||||
| namespace VideoCommon { | namespace VideoCommon { | ||||||
| @@ -879,17 +878,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||||||
|         ASSERT(copy.image_extent == mip_size); |         ASSERT(copy.image_extent == mip_size); | ||||||
|         ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); |         ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); | ||||||
|         ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); |         ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); | ||||||
|  |         DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | ||||||
|         if (IsPixelFormatASTC(info.format)) { |                       output.subspan(output_offset)); | ||||||
|             ASSERT(copy.image_extent.depth == 1); |  | ||||||
|             Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), |  | ||||||
|                                              copy.image_extent.width, copy.image_extent.height, |  | ||||||
|                                              copy.image_subresource.num_layers, tile_size.width, |  | ||||||
|                                              tile_size.height, output.subspan(output_offset)); |  | ||||||
|         } else { |  | ||||||
|             DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, |  | ||||||
|                           output.subspan(output_offset)); |  | ||||||
|         } |  | ||||||
|         copy.buffer_offset = output_offset; |         copy.buffer_offset = output_offset; | ||||||
|         copy.buffer_row_length = mip_size.width; |         copy.buffer_row_length = mip_size.width; | ||||||
|         copy.buffer_image_height = mip_size.height; |         copy.buffer_image_height = mip_size.height; | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -4,11 +4,129 @@ | |||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include <cstdint> | #include <bit> | ||||||
|  | #include "common/common_types.h" | ||||||
|  |  | ||||||
| namespace Tegra::Texture::ASTC { | namespace Tegra::Texture::ASTC { | ||||||
|  |  | ||||||
| void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | enum class IntegerEncoding { JustBits, Quint, Trit }; | ||||||
|                 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |  | ||||||
|  | struct IntegerEncodedValue { | ||||||
|  |     constexpr IntegerEncodedValue() = default; | ||||||
|  |  | ||||||
|  |     constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) | ||||||
|  |         : encoding{encoding_}, num_bits{num_bits_} {} | ||||||
|  |  | ||||||
|  |     constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||||||
|  |         return encoding == other.encoding && num_bits == other.num_bits; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Returns the number of bits required to encode num_vals values. | ||||||
|  |     u32 GetBitLength(u32 num_vals) const { | ||||||
|  |         u32 total_bits = num_bits * num_vals; | ||||||
|  |         if (encoding == IntegerEncoding::Trit) { | ||||||
|  |             total_bits += (num_vals * 8 + 4) / 5; | ||||||
|  |         } else if (encoding == IntegerEncoding::Quint) { | ||||||
|  |             total_bits += (num_vals * 7 + 2) / 3; | ||||||
|  |         } | ||||||
|  |         return total_bits; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     IntegerEncoding encoding{}; | ||||||
|  |     u32 num_bits = 0; | ||||||
|  |     u32 bit_value = 0; | ||||||
|  |     union { | ||||||
|  |         u32 quint_value = 0; | ||||||
|  |         u32 trit_value; | ||||||
|  |     }; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Returns a new instance of this struct that corresponds to the | ||||||
|  | // can take no more than mav_value values | ||||||
|  | constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { | ||||||
|  |     while (mav_value > 0) { | ||||||
|  |         u32 check = mav_value + 1; | ||||||
|  |  | ||||||
|  |         // Is mav_value a power of two? | ||||||
|  |         if (!(check & (check - 1))) { | ||||||
|  |             return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Is mav_value of the type 3*2^n - 1? | ||||||
|  |         if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | ||||||
|  |             return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Is mav_value of the type 5*2^n - 1? | ||||||
|  |         if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | ||||||
|  |             return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Apparently it can't be represented with a bounded integer sequence... | ||||||
|  |         // just iterate. | ||||||
|  |         mav_value--; | ||||||
|  |     } | ||||||
|  |     return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { | ||||||
|  |     std::array<IntegerEncodedValue, 256> encodings{}; | ||||||
|  |     for (std::size_t i = 0; i < encodings.size(); ++i) { | ||||||
|  |         encodings[i] = CreateEncoding(static_cast<u32>(i)); | ||||||
|  |     } | ||||||
|  |     return encodings; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues(); | ||||||
|  |  | ||||||
|  | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | ||||||
|  | // is the same as [(num_bits - 1):0] and repeats all the way down. | ||||||
|  | template <typename IntType> | ||||||
|  | constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { | ||||||
|  |     if (num_bits == 0 || to_bit == 0) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  |     const IntType v = val & static_cast<IntType>((1 << num_bits) - 1); | ||||||
|  |     IntType res = v; | ||||||
|  |     u32 reslen = num_bits; | ||||||
|  |     while (reslen < to_bit) { | ||||||
|  |         u32 comp = 0; | ||||||
|  |         if (num_bits > to_bit - reslen) { | ||||||
|  |             u32 newshift = to_bit - reslen; | ||||||
|  |             comp = num_bits - newshift; | ||||||
|  |             num_bits = newshift; | ||||||
|  |         } | ||||||
|  |         res = static_cast<IntType>(res << num_bits); | ||||||
|  |         res = static_cast<IntType>(res | (v >> comp)); | ||||||
|  |         reslen += num_bits; | ||||||
|  |     } | ||||||
|  |     return res; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||||||
|  |     return std::size_t(1) << num_bits; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename IntType, u32 num_bits, u32 to_bit> | ||||||
|  | constexpr auto MakeReplicateTable() { | ||||||
|  |     std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||||||
|  |     for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||||||
|  |         table[value] = Replicate(value, num_bits, to_bit); | ||||||
|  |     } | ||||||
|  |     return table; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||||||
|  | constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||||||
|  | constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||||||
|  | constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||||||
|  |  | ||||||
|  | struct AstcBufferData { | ||||||
|  |     decltype(EncodingsValues) encoding_values = EncodingsValues; | ||||||
|  |     decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||||||
|  |     decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||||||
|  | } constexpr ASTC_BUFFER_DATA; | ||||||
|  |  | ||||||
| } // namespace Tegra::Texture::ASTC | } // namespace Tegra::Texture::ASTC | ||||||
|   | |||||||
| @@ -17,26 +17,7 @@ | |||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
|  |  | ||||||
| namespace Tegra::Texture { | namespace Tegra::Texture { | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
| /** |  | ||||||
|  * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. |  | ||||||
|  * Calculates the offset of an (x, y) position within a swizzled texture. |  | ||||||
|  * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 |  | ||||||
|  */ |  | ||||||
| constexpr SwizzleTable MakeSwizzleTableConst() { |  | ||||||
|     SwizzleTable table{}; |  | ||||||
|     for (u32 y = 0; y < table.size(); ++y) { |  | ||||||
|         for (u32 x = 0; x < table[0].size(); ++x) { |  | ||||||
|             table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + |  | ||||||
|                           (y % 2) * 16 + (x % 16); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return table; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); |  | ||||||
|  |  | ||||||
| template <bool TO_LINEAR> | template <bool TO_LINEAR> | ||||||
| void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | ||||||
|              u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { |              u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | ||||||
| @@ -91,10 +72,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | |||||||
| } | } | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| SwizzleTable MakeSwizzleTable() { |  | ||||||
|     return SWIZZLE_TABLE; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | ||||||
|                       u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |                       u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, | ||||||
|                       u32 stride_alignment) { |                       u32 stride_alignment) { | ||||||
|   | |||||||
| @@ -23,8 +23,22 @@ constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_ | |||||||
|  |  | ||||||
| using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; | ||||||
|  |  | ||||||
| /// Returns a z-order swizzle table | /** | ||||||
| SwizzleTable MakeSwizzleTable(); |  * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. | ||||||
|  |  * Calculates the offset of an (x, y) position within a swizzled texture. | ||||||
|  |  * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | ||||||
|  |  */ | ||||||
|  | constexpr SwizzleTable MakeSwizzleTable() { | ||||||
|  |     SwizzleTable table{}; | ||||||
|  |     for (u32 y = 0; y < table.size(); ++y) { | ||||||
|  |         for (u32 x = 0; x < table[0].size(); ++x) { | ||||||
|  |             table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + | ||||||
|  |                           (y % 2) * 16 + (x % 16); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     return table; | ||||||
|  | } | ||||||
|  | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable(); | ||||||
|  |  | ||||||
| /// Unswizzles a block linear texture into linear memory. | /// Unswizzles a block linear texture into linear memory. | ||||||
| void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user