Merge pull request #5891 from ameerj/bgra-ogl
renderer_opengl: Use compute shaders to swizzle BGR textures on copy
This commit is contained in:
		| @@ -48,6 +48,15 @@ constexpr std::array VIEW_CLASS_32_BITS{ | |||||||
|     PixelFormat::A2B10G10R10_UINT, |     PixelFormat::A2B10G10R10_UINT, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{ | ||||||
|  |     PixelFormat::R16G16_FLOAT,      PixelFormat::B10G11R11_FLOAT,  PixelFormat::R32_FLOAT, | ||||||
|  |     PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT,      PixelFormat::R32_UINT, | ||||||
|  |     PixelFormat::R16G16_SINT,       PixelFormat::R32_SINT,         PixelFormat::A8B8G8R8_UNORM, | ||||||
|  |     PixelFormat::R16G16_UNORM,      PixelFormat::A8B8G8R8_SNORM,   PixelFormat::R16G16_SNORM, | ||||||
|  |     PixelFormat::A8B8G8R8_SRGB,     PixelFormat::E5B9G9R9_FLOAT,   PixelFormat::A8B8G8R8_UINT, | ||||||
|  |     PixelFormat::A8B8G8R8_SINT,     PixelFormat::A2B10G10R10_UINT, | ||||||
|  | }; | ||||||
|  |  | ||||||
| // TODO: How should we handle 24 bits? | // TODO: How should we handle 24 bits? | ||||||
|  |  | ||||||
| constexpr std::array VIEW_CLASS_16_BITS{ | constexpr std::array VIEW_CLASS_16_BITS{ | ||||||
| @@ -205,7 +214,6 @@ constexpr Table MakeViewTable() { | |||||||
|     EnableRange(view, VIEW_CLASS_128_BITS); |     EnableRange(view, VIEW_CLASS_128_BITS); | ||||||
|     EnableRange(view, VIEW_CLASS_96_BITS); |     EnableRange(view, VIEW_CLASS_96_BITS); | ||||||
|     EnableRange(view, VIEW_CLASS_64_BITS); |     EnableRange(view, VIEW_CLASS_64_BITS); | ||||||
|     EnableRange(view, VIEW_CLASS_32_BITS); |  | ||||||
|     EnableRange(view, VIEW_CLASS_16_BITS); |     EnableRange(view, VIEW_CLASS_16_BITS); | ||||||
|     EnableRange(view, VIEW_CLASS_8_BITS); |     EnableRange(view, VIEW_CLASS_8_BITS); | ||||||
|     EnableRange(view, VIEW_CLASS_RGTC1_RED); |     EnableRange(view, VIEW_CLASS_RGTC1_RED); | ||||||
| @@ -231,20 +239,47 @@ constexpr Table MakeCopyTable() { | |||||||
|     EnableRange(copy, COPY_CLASS_64_BITS); |     EnableRange(copy, COPY_CLASS_64_BITS); | ||||||
|     return copy; |     return copy; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | constexpr Table MakeNativeBgrViewTable() { | ||||||
|  |     Table copy = MakeViewTable(); | ||||||
|  |     EnableRange(copy, VIEW_CLASS_32_BITS); | ||||||
|  |     return copy; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr Table MakeNonNativeBgrViewTable() { | ||||||
|  |     Table copy = MakeViewTable(); | ||||||
|  |     EnableRange(copy, VIEW_CLASS_32_BITS_NO_BGR); | ||||||
|  |     return copy; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr Table MakeNativeBgrCopyTable() { | ||||||
|  |     Table copy = MakeCopyTable(); | ||||||
|  |     EnableRange(copy, VIEW_CLASS_32_BITS); | ||||||
|  |     return copy; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | constexpr Table MakeNonNativeBgrCopyTable() { | ||||||
|  |     Table copy = MakeCopyTable(); | ||||||
|  |     EnableRange(copy, VIEW_CLASS_32_BITS); | ||||||
|  |     return copy; | ||||||
|  | } | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views, | ||||||
|  |                       bool native_bgr) { | ||||||
|     if (broken_views) { |     if (broken_views) { | ||||||
|         // If format views are broken, only accept formats that are identical. |         // If format views are broken, only accept formats that are identical. | ||||||
|         return format_a == format_b; |         return format_a == format_b; | ||||||
|     } |     } | ||||||
|     static constexpr Table TABLE = MakeViewTable(); |     static constexpr Table BGR_TABLE = MakeNativeBgrViewTable(); | ||||||
|     return IsSupported(TABLE, format_a, format_b); |     static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrViewTable(); | ||||||
|  |     return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b); | ||||||
| } | } | ||||||
|  |  | ||||||
| bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) { | ||||||
|     static constexpr Table TABLE = MakeCopyTable(); |     static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable(); | ||||||
|     return IsSupported(TABLE, format_a, format_b); |     static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable(); | ||||||
|  |     return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b); | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace VideoCore::Surface | } // namespace VideoCore::Surface | ||||||
|   | |||||||
| @@ -8,8 +8,9 @@ | |||||||
|  |  | ||||||
| namespace VideoCore::Surface { | namespace VideoCore::Surface { | ||||||
|  |  | ||||||
| bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views, | ||||||
|  |                       bool native_bgr); | ||||||
|  |  | ||||||
| bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr); | ||||||
|  |  | ||||||
| } // namespace VideoCore::Surface | } // namespace VideoCore::Surface | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ set(SHADER_FILES | |||||||
|     convert_float_to_depth.frag |     convert_float_to_depth.frag | ||||||
|     full_screen_triangle.vert |     full_screen_triangle.vert | ||||||
|     opengl_copy_bc4.comp |     opengl_copy_bc4.comp | ||||||
|  |     opengl_copy_bgra.comp | ||||||
|     opengl_present.frag |     opengl_present.frag | ||||||
|     opengl_present.vert |     opengl_present.vert | ||||||
|     pitch_unswizzle.comp |     pitch_unswizzle.comp | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								src/video_core/host_shaders/opengl_copy_bgra.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								src/video_core/host_shaders/opengl_copy_bgra.comp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | // Copyright 2021 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #version 430 core | ||||||
|  |  | ||||||
|  | layout (local_size_x = 4, local_size_y = 4) in; | ||||||
|  |  | ||||||
|  | layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; | ||||||
|  | layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; | ||||||
|  |  | ||||||
|  | void main() { | ||||||
|  |     vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); | ||||||
|  |     imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); | ||||||
|  | } | ||||||
| @@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | |||||||
|     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT |     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT | ||||||
|     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT |     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT | ||||||
|     {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM |     {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM | ||||||
|     {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM |     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM | ||||||
|     {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT |     {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT | ||||||
|     {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT |     {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT | ||||||
|     {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT |     {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT | ||||||
| @@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | |||||||
|     {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM |     {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM | ||||||
|     {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM |     {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM | ||||||
|     {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM |     {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM | ||||||
|     {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_UNORM |     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB | ||||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB |     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB | ||||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB |     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB | ||||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB |     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB | ||||||
| @@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { | ||||||
|  |     switch (format) { | ||||||
|  |     case PixelFormat::B5G6R5_UNORM: | ||||||
|  |     case PixelFormat::B8G8R8A8_UNORM: | ||||||
|  |     case PixelFormat::B8G8R8A8_SRGB: | ||||||
|  |         return true; | ||||||
|  |     default: | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| ImageBufferMap::~ImageBufferMap() { | ImageBufferMap::~ImageBufferMap() { | ||||||
| @@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { | |||||||
|     if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |     if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |     if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, | |||||||
|     if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |     if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { | ||||||
|         ASSERT(src.info.type == ImageType::e3D); |         ASSERT(src.info.type == ImageType::e3D); | ||||||
|         util_shaders.CopyBC4(dst, src, copies); |         util_shaders.CopyBC4(dst, src, copies); | ||||||
|  |     } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||||||
|  |         util_shaders.CopyBGR(dst, src, copies); | ||||||
|     } else { |     } else { | ||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -86,6 +86,11 @@ public: | |||||||
|  |  | ||||||
|     FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; |     FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; | ||||||
|  |  | ||||||
|  |     bool HasNativeBgr() const noexcept { | ||||||
|  |         // OpenGL does not have native support for the BGR internal format | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     bool HasBrokenTextureViewFormats() const noexcept { |     bool HasBrokenTextureViewFormats() const noexcept { | ||||||
|         return has_broken_texture_view_formats; |         return has_broken_texture_view_formats; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -14,6 +14,7 @@ | |||||||
| #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||||||
| #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||||||
| #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||||||
|  | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | ||||||
| #include "video_core/host_shaders/pitch_unswizzle_comp.h" | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||||
| @@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) { | |||||||
|     return program; |     return program; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { | ||||||
|  |     return static_cast<size_t>(copy.extent.width * copy.extent.height * | ||||||
|  |                                copy.src_subresource.num_layers); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| UtilShaders::UtilShaders(ProgramManager& program_manager_) | UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||||||
| @@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||||||
|       block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), |       block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | ||||||
|       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), |       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||||||
|       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), |       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||||||
|  |       copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), | ||||||
|       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { |       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | ||||||
|     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); |     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||||||
|     swizzle_table_buffer.Create(); |     swizzle_table_buffer.Create(); | ||||||
| @@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||||||
|     program_manager.RestoreGuestCompute(); |     program_manager.RestoreGuestCompute(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | ||||||
|  |                           std::span<const VideoCommon::ImageCopy> copies) { | ||||||
|  |     static constexpr GLuint BINDING_INPUT_IMAGE = 0; | ||||||
|  |     static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; | ||||||
|  |     static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; | ||||||
|  |     const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); | ||||||
|  |     switch (bytes_per_block) { | ||||||
|  |     case 2: | ||||||
|  |         // BGR565 copy | ||||||
|  |         for (const ImageCopy& copy : copies) { | ||||||
|  |             ASSERT(copy.src_offset == zero_offset); | ||||||
|  |             ASSERT(copy.dst_offset == zero_offset); | ||||||
|  |             bgr_copy_pass.Execute(dst_image, src_image, copy); | ||||||
|  |         } | ||||||
|  |         break; | ||||||
|  |     case 4: { | ||||||
|  |         // BGRA8 copy | ||||||
|  |         program_manager.BindHostCompute(copy_bgra_program.handle); | ||||||
|  |         constexpr GLenum FORMAT = GL_RGBA8; | ||||||
|  |         for (const ImageCopy& copy : copies) { | ||||||
|  |             ASSERT(copy.src_offset == zero_offset); | ||||||
|  |             ASSERT(copy.dst_offset == zero_offset); | ||||||
|  |             glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), | ||||||
|  |                                copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); | ||||||
|  |             glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), | ||||||
|  |                                copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); | ||||||
|  |             glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | ||||||
|  |         } | ||||||
|  |         program_manager.RestoreGuestCompute(); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |     default: | ||||||
|  |         UNREACHABLE(); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| GLenum StoreFormat(u32 bytes_per_block) { | GLenum StoreFormat(u32 bytes_per_block) { | ||||||
|     switch (bytes_per_block) { |     switch (bytes_per_block) { | ||||||
|     case 1: |     case 1: | ||||||
| @@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) { | |||||||
|     return GL_R8UI; |     return GL_R8UI; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, | ||||||
|  |                              const ImageCopy& copy) { | ||||||
|  |     if (CopyBufferCreationNeeded(copy)) { | ||||||
|  |         CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); | ||||||
|  |     } | ||||||
|  |     // Copy from source to PBO | ||||||
|  |     glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||||||
|  |     glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); | ||||||
|  |     glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); | ||||||
|  |     glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||||
|  |                          copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, | ||||||
|  |                          static_cast<GLsizei>(bgr16_pbo_size), nullptr); | ||||||
|  |  | ||||||
|  |     // Copy from PBO to destination in reverse order | ||||||
|  |     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | ||||||
|  |     glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); | ||||||
|  |     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); | ||||||
|  |     glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||||
|  |                         copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, | ||||||
|  |                         nullptr); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { | ||||||
|  |     return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { | ||||||
|  |     bgr16_pbo.Create(); | ||||||
|  |     bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); | ||||||
|  |     glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace OpenGL | } // namespace OpenGL | ||||||
|   | |||||||
| @@ -19,6 +19,22 @@ class ProgramManager; | |||||||
|  |  | ||||||
| struct ImageBufferMap; | struct ImageBufferMap; | ||||||
|  |  | ||||||
|  | class Bgr565CopyPass { | ||||||
|  | public: | ||||||
|  |     Bgr565CopyPass() = default; | ||||||
|  |     ~Bgr565CopyPass() = default; | ||||||
|  |  | ||||||
|  |     void Execute(const Image& dst_image, const Image& src_image, | ||||||
|  |                  const VideoCommon::ImageCopy& copy); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); | ||||||
|  |     void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); | ||||||
|  |  | ||||||
|  |     OGLBuffer bgr16_pbo; | ||||||
|  |     size_t bgr16_pbo_size{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
| class UtilShaders { | class UtilShaders { | ||||||
| public: | public: | ||||||
|     explicit UtilShaders(ProgramManager& program_manager); |     explicit UtilShaders(ProgramManager& program_manager); | ||||||
| @@ -36,6 +52,9 @@ public: | |||||||
|     void CopyBC4(Image& dst_image, Image& src_image, |     void CopyBC4(Image& dst_image, Image& src_image, | ||||||
|                  std::span<const VideoCommon::ImageCopy> copies); |                  std::span<const VideoCommon::ImageCopy> copies); | ||||||
|  |  | ||||||
|  |     void CopyBGR(Image& dst_image, Image& src_image, | ||||||
|  |                  std::span<const VideoCommon::ImageCopy> copies); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     ProgramManager& program_manager; |     ProgramManager& program_manager; | ||||||
|  |  | ||||||
| @@ -44,7 +63,10 @@ private: | |||||||
|     OGLProgram block_linear_unswizzle_2d_program; |     OGLProgram block_linear_unswizzle_2d_program; | ||||||
|     OGLProgram block_linear_unswizzle_3d_program; |     OGLProgram block_linear_unswizzle_3d_program; | ||||||
|     OGLProgram pitch_unswizzle_program; |     OGLProgram pitch_unswizzle_program; | ||||||
|  |     OGLProgram copy_bgra_program; | ||||||
|     OGLProgram copy_bc4_program; |     OGLProgram copy_bc4_program; | ||||||
|  |  | ||||||
|  |     Bgr565CopyPass bgr_copy_pass; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| GLenum StoreFormat(u32 bytes_per_block); | GLenum StoreFormat(u32 bytes_per_block); | ||||||
|   | |||||||
| @@ -93,6 +93,11 @@ struct TextureCacheRuntime { | |||||||
|         // No known Vulkan driver has broken image views |         // No known Vulkan driver has broken image views | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool HasNativeBgr() const noexcept { | ||||||
|  |         // All known Vulkan drivers can natively handle BGR textures | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class Image : public VideoCommon::ImageBase { | class Image : public VideoCommon::ImageBase { | ||||||
|   | |||||||
| @@ -120,9 +120,10 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i | |||||||
|     if (lhs.info.type == ImageType::Linear) { |     if (lhs.info.type == ImageType::Linear) { | ||||||
|         base = SubresourceBase{.level = 0, .layer = 0}; |         base = SubresourceBase{.level = 0, .layer = 0}; | ||||||
|     } else { |     } else { | ||||||
|         // We are passing relaxed formats as an option, having broken views or not won't matter |         // We are passing relaxed formats as an option, having broken views/bgr or not won't matter | ||||||
|         static constexpr bool broken_views = false; |         static constexpr bool broken_views = false; | ||||||
|         base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); |         static constexpr bool native_bgr = true; | ||||||
|  |         base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views, native_bgr); | ||||||
|     } |     } | ||||||
|     if (!base) { |     if (!base) { | ||||||
|         LOG_ERROR(HW_GPU, "Image alias should have been flipped"); |         LOG_ERROR(HW_GPU, "Image alias should have been flipped"); | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i | |||||||
|           .height = std::max(image_info.size.height >> range.base.level, 1u), |           .height = std::max(image_info.size.height >> range.base.level, 1u), | ||||||
|           .depth = std::max(image_info.size.depth >> range.base.level, 1u), |           .depth = std::max(image_info.size.depth >> range.base.level, 1u), | ||||||
|       } { |       } { | ||||||
|     ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), |     ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true), | ||||||
|                "Image view format {} is incompatible with image format {}", info.format, |                "Image view format {} is incompatible with image format {}", info.format, | ||||||
|                image_info.format); |                image_info.format); | ||||||
|     const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |     const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||||||
|   | |||||||
| @@ -876,6 +876,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||||||
|         return ImageId{}; |         return ImageId{}; | ||||||
|     } |     } | ||||||
|     const bool broken_views = runtime.HasBrokenTextureViewFormats(); |     const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||||||
|  |     const bool native_bgr = runtime.HasNativeBgr(); | ||||||
|     ImageId image_id; |     ImageId image_id; | ||||||
|     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||||||
|         if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |         if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | ||||||
| @@ -885,11 +886,12 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||||||
|             if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && |             if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||||||
|                 existing.pitch == info.pitch && |                 existing.pitch == info.pitch && | ||||||
|                 IsPitchLinearSameSize(existing, info, strict_size) && |                 IsPitchLinearSameSize(existing, info, strict_size) && | ||||||
|                 IsViewCompatible(existing.format, info.format, broken_views)) { |                 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | ||||||
|                 image_id = existing_image_id; |                 image_id = existing_image_id; | ||||||
|                 return true; |                 return true; | ||||||
|             } |             } | ||||||
|         } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { |         } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | ||||||
|  |                                  native_bgr)) { | ||||||
|             image_id = existing_image_id; |             image_id = existing_image_id; | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
| @@ -920,6 +922,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||||||
|     ImageInfo new_info = info; |     ImageInfo new_info = info; | ||||||
|     const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |     const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||||||
|     const bool broken_views = runtime.HasBrokenTextureViewFormats(); |     const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||||||
|  |     const bool native_bgr = runtime.HasNativeBgr(); | ||||||
|     std::vector<ImageId> overlap_ids; |     std::vector<ImageId> overlap_ids; | ||||||
|     std::vector<ImageId> left_aliased_ids; |     std::vector<ImageId> left_aliased_ids; | ||||||
|     std::vector<ImageId> right_aliased_ids; |     std::vector<ImageId> right_aliased_ids; | ||||||
| @@ -935,8 +938,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         static constexpr bool strict_size = true; |         static constexpr bool strict_size = true; | ||||||
|         const std::optional<OverlapResult> solution = |         const std::optional<OverlapResult> solution = ResolveOverlap( | ||||||
|             ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); |             new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | ||||||
|         if (solution) { |         if (solution) { | ||||||
|             gpu_addr = solution->gpu_addr; |             gpu_addr = solution->gpu_addr; | ||||||
|             cpu_addr = solution->cpu_addr; |             cpu_addr = solution->cpu_addr; | ||||||
| @@ -946,10 +949,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||||||
|         } |         } | ||||||
|         static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; |         static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||||||
|         const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |         const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||||||
|         if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { |         if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | ||||||
|             left_aliased_ids.push_back(overlap_id); |             left_aliased_ids.push_back(overlap_id); | ||||||
|         } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, |         } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||||||
|                                  broken_views)) { |                                  broken_views, native_bgr)) { | ||||||
|             right_aliased_ids.push_back(overlap_id); |             right_aliased_ids.push_back(overlap_id); | ||||||
|         } |         } | ||||||
|     }); |     }); | ||||||
|   | |||||||
| @@ -1035,13 +1035,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri | |||||||
|  |  | ||||||
| std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, | std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, | ||||||
|                                             VAddr cpu_addr, const ImageBase& overlap, |                                             VAddr cpu_addr, const ImageBase& overlap, | ||||||
|                                             bool strict_size, bool broken_views) { |                                             bool strict_size, bool broken_views, bool native_bgr) { | ||||||
|     ASSERT(new_info.type != ImageType::Linear); |     ASSERT(new_info.type != ImageType::Linear); | ||||||
|     ASSERT(overlap.info.type != ImageType::Linear); |     ASSERT(overlap.info.type != ImageType::Linear); | ||||||
|     if (!IsLayerStrideCompatible(new_info, overlap.info)) { |     if (!IsLayerStrideCompatible(new_info, overlap.info)) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { |     if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views, native_bgr)) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     if (gpu_addr == overlap.gpu_addr) { |     if (gpu_addr == overlap.gpu_addr) { | ||||||
| @@ -1085,14 +1085,14 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { | |||||||
|  |  | ||||||
| std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, | std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, | ||||||
|                                                GPUVAddr candidate_addr, RelaxedOptions options, |                                                GPUVAddr candidate_addr, RelaxedOptions options, | ||||||
|                                                bool broken_views) { |                                                bool broken_views, bool native_bgr) { | ||||||
|     const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); |     const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); | ||||||
|     if (!base) { |     if (!base) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     const ImageInfo& existing = image.info; |     const ImageInfo& existing = image.info; | ||||||
|     if (False(options & RelaxedOptions::Format)) { |     if (False(options & RelaxedOptions::Format)) { | ||||||
|         if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { |         if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -1129,8 +1129,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const | |||||||
| } | } | ||||||
|  |  | ||||||
| bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, | bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, | ||||||
|                    RelaxedOptions options, bool broken_views) { |                    RelaxedOptions options, bool broken_views, bool native_bgr) { | ||||||
|     return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); |     return FindSubresource(candidate, image, candidate_addr, options, broken_views, native_bgr) | ||||||
|  |         .has_value(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||||||
|   | |||||||
| @@ -87,7 +87,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima | |||||||
| [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | ||||||
|                                                           GPUVAddr gpu_addr, VAddr cpu_addr, |                                                           GPUVAddr gpu_addr, VAddr cpu_addr, | ||||||
|                                                           const ImageBase& overlap, |                                                           const ImageBase& overlap, | ||||||
|                                                           bool strict_size, bool broken_views); |                                                           bool strict_size, bool broken_views, | ||||||
|  |                                                           bool native_bgr); | ||||||
|  |  | ||||||
| [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); | [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); | ||||||
|  |  | ||||||
| @@ -95,11 +96,11 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima | |||||||
|                                                              const ImageBase& image, |                                                              const ImageBase& image, | ||||||
|                                                              GPUVAddr candidate_addr, |                                                              GPUVAddr candidate_addr, | ||||||
|                                                              RelaxedOptions options, |                                                              RelaxedOptions options, | ||||||
|                                                              bool broken_views); |                                                              bool broken_views, bool native_bgr); | ||||||
|  |  | ||||||
| [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, | [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, | ||||||
|                                  GPUVAddr candidate_addr, RelaxedOptions options, |                                  GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views, | ||||||
|                                  bool broken_views); |                                  bool native_bgr); | ||||||
|  |  | ||||||
| void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||||||
|                       const ImageBase* src); |                       const ImageBase* src); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user