diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index d357c7024..48abc4a15 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -26,6 +26,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->graphics_api_combo->setEnabled(not_running); ui->toggle_shader_jit->setEnabled(not_running); ui->toggle_disk_shader_cache->setEnabled(hw_renderer_enabled && not_running); + ui->physical_device_combo->setEnabled(not_running); SetPhysicalDeviceComboVisibility(ui->graphics_api_combo->currentIndex()); connect(ui->graphics_api_combo, qOverload(&QComboBox::currentIndexChanged), this, diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index c3bf55c44..3cc3099dd 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -22,13 +22,31 @@ inline T MakeInt(const std::byte* bytes) { return integer; } -template +template inline void DecodePixel(const std::byte* source, std::byte* dest) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; if constexpr (format == PixelFormat::D24S8) { const u32 d24s8 = std::rotl(MakeInt(source), 8); std::memcpy(dest, &d24s8, sizeof(u32)); + } else if constexpr (format == PixelFormat::RGBA8 && converted) { + const u32 rgba = MakeInt(source); + const u32 abgr = Common::swap32(rgba); + std::memcpy(dest, &abgr, 4); + } else if constexpr (format == PixelFormat::RGB8 && converted) { + u32 rgb{}; + std::memcpy(&rgb, source, 3); + const u32 abgr = Common::swap32(rgb << 8) | 0xFF000000; + std::memcpy(dest, &abgr, 4); + } else if constexpr (format == PixelFormat::RGB565 && converted) { + const auto abgr = Color::DecodeRGB565(reinterpret_cast(source)); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGB5A1 && converted) { + const auto abgr = Color::DecodeRGB5A1(reinterpret_cast(source)); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGBA4 && converted) { + const auto abgr = Color::DecodeRGBA4(reinterpret_cast(source)); + std::memcpy(dest, abgr.AsArray(), 4); } else if constexpr (format == PixelFormat::IA8) { std::memset(dest, static_cast(source[1]), 3); dest[3] = source[0]; @@ -95,23 +113,43 @@ inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byt dest_pixel[3] = std::byte{alpha}; } -template +template inline void EncodePixel(const std::byte* source, std::byte* dest) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; if constexpr (format == PixelFormat::D24S8) { const u32 s8d24 = std::rotr(MakeInt(source), 8); std::memcpy(dest, &s8d24, sizeof(u32)); + } else if constexpr (format == PixelFormat::RGBA8 && converted) { + const u32 abgr = MakeInt(source); + const u32 rgba = Common::swap32(abgr); + std::memcpy(dest, &rgba, 4); + } else if constexpr (format == PixelFormat::RGB8 && converted) { + const u32 abgr = MakeInt(source); + const u32 rgb = Common::swap32(abgr << 8); + std::memcpy(dest, &rgb, 3); + } else if constexpr (format == PixelFormat::RGB565 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Color::EncodeRGB565(rgba, reinterpret_cast(dest)); + } else if constexpr (format == PixelFormat::RGB5A1 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Color::EncodeRGB5A1(rgba, reinterpret_cast(dest)); + } else if constexpr (format == PixelFormat::RGBA4 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Color::EncodeRGBA4(rgba, reinterpret_cast(dest)); } else { std::memcpy(dest, source, bytes_per_pixel); } } -template +template inline void MortonCopyTile(u32 stride, std::span tile_buffer, std::span linear_buffer) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format); + constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4; constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4; @@ -127,10 +165,10 @@ inline void MortonCopyTile(u32 stride, std::span tile_buffer, } else if constexpr (is_4bit) { DecodePixel4(x, y, tile_buffer.data(), linear_pixel.data()); } else { - DecodePixel(tiled_pixel.data(), linear_pixel.data()); + DecodePixel(tiled_pixel.data(), linear_pixel.data()); } } else { - EncodePixel(linear_pixel.data(), tiled_pixel.data()); + EncodePixel(linear_pixel.data(), tiled_pixel.data()); } } } @@ -138,6 +176,7 @@ inline void MortonCopyTile(u32 stride, std::span tile_buffer, /** * @brief Performs morton to/from linear convertions on the provided pixel data + * @param converted If true performs RGBA8 to/from convertion to all color formats * @param width, height The dimentions of the rectangular region of pixels in linear_buffer * @param start_offset The number of bytes from the start of the first tile to the start of * tiled_buffer @@ -160,11 +199,11 @@ inline void MortonCopyTile(u32 stride, std::span tile_buffer, * start_offset/end_offset are useful here as they tell us exactly where the data should be placed * in the linear_buffer. */ -template +template static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, std::span linear_buffer, std::span tiled_buffer) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); + constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); @@ -202,7 +241,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, if (start_offset < aligned_start_offset && !morton_to_linear) { std::array tmp_buf; auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); - MortonCopyTile(width, tmp_buf, linear_data); + MortonCopyTile(width, tmp_buf, linear_data); std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, std::min(aligned_start_offset, end_offset) - start_offset); @@ -215,7 +254,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, while (tiled_offset < buffer_end) { auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); - MortonCopyTile(width, tiled_data, linear_data); + MortonCopyTile(width, tiled_data, linear_data); tiled_offset += tile_size; LinearNextTile(); } @@ -225,7 +264,7 @@ static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) { std::array tmp_buf; auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); - MortonCopyTile(width, tmp_buf, linear_data); + MortonCopyTile(width, tmp_buf, linear_data); std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset); } @@ -254,6 +293,14 @@ static constexpr std::array UNSWIZZLE_TABLE = { MortonCopy // 17 }; +static constexpr std::array UNSWIZZLE_TABLE_CONVERTED = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy // 4 +}; + static constexpr std::array SWIZZLE_TABLE = { MortonCopy, // 0 MortonCopy, // 1 @@ -275,4 +322,12 @@ static constexpr std::array SWIZZLE_TABLE = { MortonCopy // 17 }; +static constexpr std::array SWIZZLE_TABLE_CONVERTED = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy // 4 +}; + } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 8c5a38c75..56155e7e4 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -916,10 +916,8 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); if (surface->is_tiled) { - std::vector unswizzled_data(load_info.width * load_info.height * - GetBytesPerPixel(load_info.pixel_format)); - UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, unswizzled_data); - runtime.FormatConvert(*surface, true, unswizzled_data, staging.mapped); + UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, + runtime.NeedsConvertion(surface->pixel_format)); } else { runtime.FormatConvert(*surface, true, upload_data, staging.mapped); } @@ -959,10 +957,8 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); if (surface->is_tiled) { - std::vector temp_data(flush_info.width * flush_info.height * - GetBytesPerPixel(flush_info.pixel_format)); - runtime.FormatConvert(*surface, false, mapped, temp_data); - SwizzleTexture(flush_info, flush_start, flush_end, temp_data, download_dest); + SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest, + runtime.NeedsConvertion(surface->pixel_format)); } else { runtime.FormatConvert(*surface, false, mapped, download_dest); } diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index 52363d21c..6655575d3 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -48,17 +48,20 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d } void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_linear, std::span dest_tiled) { + std::span source_linear, std::span dest_tiled, + bool convert) { const u32 func_index = static_cast(swizzle_info.pixel_format); - const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index]; + const MortonFunc SwizzleImpl = (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index]; SwizzleImpl(swizzle_info.width, swizzle_info.height, start_addr - swizzle_info.addr, end_addr - swizzle_info.addr, source_linear, dest_tiled); } void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_tiled, std::span dest_linear) { + std::span source_tiled, std::span dest_linear, + bool convert) { const u32 func_index = static_cast(unswizzle_info.pixel_format); - const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; + const MortonFunc UnswizzleImpl = + (convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index]; UnswizzleImpl(unswizzle_info.width, unswizzle_info.height, start_addr - unswizzle_info.addr, end_addr - unswizzle_info.addr, dest_linear, source_tiled); } diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index 58bab1eee..91da22f84 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -116,7 +116,8 @@ struct TextureCubeConfig { * @param dest_linear The output buffer where the generated linear data will be written to. */ void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_tiled, std::span dest_linear); + std::span source_tiled, std::span dest_linear, + bool convert = false); /** * Swizzles a linear texture according to the morton code. @@ -128,7 +129,8 @@ void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAd * @param dest_linear The output buffer where the generated linear data will be written to. */ void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_linear, std::span dest_tiled); + std::span source_linear, std::span dest_tiled, + bool convert = false); } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 2577714e5..f6ed56d18 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -11,7 +11,7 @@ namespace OpenGL { D24S8toRGBA8::D24S8toRGBA8(bool use_texture_view) : use_texture_view{use_texture_view} { constexpr std::string_view cs_source = R"( -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(binding = 0) uniform highp sampler2D depth; layout(binding = 1) uniform lowp usampler2D stencil; layout(rgba8, binding = 2) uniform highp writeonly image2D color; @@ -77,7 +77,7 @@ void D24S8toRGBA8::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDispatchCompute(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1); + glDispatchCompute(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); if (use_texture_view) { temp_tex.Release(); diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 716f162bb..5bd5136a9 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -319,6 +319,11 @@ const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( return reinterpreters[static_cast(dest_format)]; } +bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const { + return driver.IsOpenGLES() && + (format == VideoCore::PixelFormat::RGB8 || format == VideoCore::PixelFormat::RGBA8); +} + void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarget, VideoCore::SurfaceType type, OGLTexture& texture) const { const GLint framebuffer = target == GL_DRAW_FRAMEBUFFER ? draw_fbo.handle : read_fbo.handle; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 01d7085b7..7add29201 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -97,6 +97,9 @@ public: [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( VideoCore::PixelFormat dest_format) const; + /// Returns true if the provided pixel format needs convertion + [[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const; + private: /// Returns the framebuffer used for texture downloads void BindFramebuffer(GLenum target, GLint level, GLenum textarget, VideoCore::SurfaceType type, diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp index 5119ac2d1..dc4eae00c 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -14,7 +14,7 @@ D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(set = 0, binding = 0) uniform highp texture2D depth; layout(set = 0, binding = 1) uniform lowp utexture2D stencil; layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color; @@ -154,7 +154,7 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(Common::Vec2i), src_offset.AsArray()); - command_buffer.dispatch(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1); + command_buffer.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3e396e473..e31f7c3bf 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -16,6 +16,8 @@ #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/video_core.h" +#include + namespace Vulkan { MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); @@ -93,7 +95,7 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() { return layout; } -constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024; constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024; constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024; @@ -121,10 +123,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { // Create a 1x1 clear texture to use in the NULL case, - default_texture = - runtime.Allocate(1, 1, VideoCore::PixelFormat::RGBA8, VideoCore::TextureType::Texture2D); - runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture, - vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + CreateDefaultTextures(); uniform_block_data.lighting_lut_dirty.fill(true); @@ -162,7 +161,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan } for (u32 i = 0; i < 7; i++) { - pipeline_cache.BindStorageImage(i, default_texture.image_view); + pipeline_cache.BindStorageImage(i, default_storage_texture.image_view); } // Explicitly call the derived version to avoid warnings about calling virtual @@ -174,6 +173,7 @@ RasterizerVulkan::~RasterizerVulkan() { renderpass_cache.ExitRenderpass(); scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown); + VmaAllocator allocator = instance.GetAllocator(); vk::Device device = instance.GetDevice(); for (auto& [key, sampler] : samplers) { @@ -184,10 +184,10 @@ RasterizerVulkan::~RasterizerVulkan() { device.destroyFramebuffer(framebuffer); } - const VideoCore::HostTextureTag tag = { - .format = VideoCore::PixelFormat::RGBA8, .width = 1, .height = 1}; - - runtime.Recycle(tag, std::move(default_texture)); + vmaDestroyImage(allocator, default_texture.image, default_texture.allocation); + vmaDestroyImage(allocator, default_storage_texture.image, default_storage_texture.allocation); + device.destroyImageView(default_texture.image_view); + device.destroyImageView(default_storage_texture.image_view); device.destroySampler(default_sampler); } @@ -374,7 +374,6 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi enable_attributes[input_reg] = true; offset += vertex_attributes.GetStride(attribute_index); } - } else { // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings // respectively @@ -675,7 +674,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { if (surface != nullptr) { pipeline_cache.BindStorageImage(binding, surface->alloc.image_view); } else { - pipeline_cache.BindStorageImage(binding, default_texture.image_view); + pipeline_cache.BindStorageImage(binding, default_storage_texture.image_view); } }; @@ -687,16 +686,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // filters GL_LINEAR/GL_NEAREST so emulate them by setting minLod = 0, and maxLod = 0.25, // and using minFilter = VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST const bool skip_mipmap = config.type == Pica::TexturingRegs::TextureConfig::TextureCube; - info = - SamplerInfo{.mag_filter = config.mag_filter, - .min_filter = config.min_filter, - .mip_filter = config.mip_filter, - .wrap_s = config.wrap_s, - .wrap_t = config.wrap_t, - .border_color = config.border_color.raw, - .lod_min = skip_mipmap ? 0.f : static_cast(config.lod.min_level), - .lod_max = skip_mipmap ? 0.25f : static_cast(config.lod.max_level), - .lod_bias = static_cast(config.lod.bias)}; + info = SamplerInfo{.mag_filter = config.mag_filter, + .min_filter = config.min_filter, + .mip_filter = config.mip_filter, + .wrap_s = config.wrap_s, + .wrap_t = config.wrap_t, + .border_color = config.border_color.raw, + .lod_min = skip_mipmap ? 0.f : static_cast(config.lod.min_level), + .lod_max = skip_mipmap ? 0.f : static_cast(config.lod.max_level), + .lod_bias = static_cast(config.lod.bias)}; // Search the cache and bind the appropriate sampler if (auto it = samplers.find(info); it != samplers.end()) { @@ -708,8 +706,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { } }; - renderpass_cache.ExitRenderpass(); - // Sync and bind the texture surfaces const auto pica_textures = regs.texturing.GetTextures(); for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { @@ -722,9 +718,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { case TextureType::Shadow2D: { auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - pipeline_cache.BindStorageImage(0, surface->alloc.image_view); + pipeline_cache.BindStorageImage(0, surface->GetImageView()); } else { - pipeline_cache.BindStorageImage(0, default_texture.image_view); + pipeline_cache.BindStorageImage(0, default_storage_texture.image_view); } continue; } @@ -874,13 +870,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); - // Bind the vertex buffer at the current mapped offset. This effectively means - // that when base_vertex is zero the GPU will start drawing from the current mapped - // offset not the start of the buffer. - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), - vertex_buffer.GetBufferOffset()); - const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); const u32 batch_size = static_cast(vertex_batch.size()); for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { @@ -892,6 +881,12 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); vertex_buffer.Commit(vertex_size); + // Bind the vertex buffer at the current mapped offset. This effectively means + // that when base_vertex is zero the GPU will start drawing from the current mapped + // offset not the start of the buffer. + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); + command_buffer.draw(vertices, 1, base_vertex, 0); } } @@ -1642,6 +1637,66 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info) return device.createFramebuffer(framebuffer_info); } +void RasterizerVulkan::CreateDefaultTextures() { + default_texture.format = vk::Format::eR8G8B8A8Unorm; + default_storage_texture.format = vk::Format::eR8G8B8A8Uint; + + vk::ImageCreateInfo image_info = {.imageType = vk::ImageType::e2D, + .format = default_texture.format, + .extent = {1, 1, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = GetImageUsage(vk::ImageAspectFlagBits::eColor)}; + + const VmaAllocationCreateInfo alloc_info = {.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE}; + + VkImage unsafe_image{}; + VkImageCreateInfo& unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &default_texture.allocation, nullptr); + if (result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed allocating default texture with error {}", result); + UNREACHABLE(); + } + + default_texture.image = vk::Image{unsafe_image}; + vk::ImageViewCreateInfo view_info = { + .image = default_texture.image, + .viewType = vk::ImageViewType::e2D, + .format = default_texture.format, + .subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1}}; + + vk::Device device = instance.GetDevice(); + default_texture.image_view = device.createImageView(view_info); + + // Define the default texture for storage descriptors + image_info.format = default_storage_texture.format; + result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, &unsafe_image, + &default_storage_texture.allocation, nullptr); + if (result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed allocating default storage texture with error {}", + result); + UNREACHABLE(); + } + + default_storage_texture.image = vk::Image{unsafe_image}; + + view_info.format = default_storage_texture.format; + view_info.image = default_storage_texture.image; + default_storage_texture.image_view = device.createImageView(view_info); + + runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture, + vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + runtime.Transition(scheduler.GetUploadCommandBuffer(), default_storage_texture, + vk::ImageLayout::eGeneral, 0, 1); +} + void RasterizerVulkan::FlushBuffers() { vertex_buffer.Flush(); uniform_buffer.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 14ed6c95b..e71254774 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -222,6 +222,9 @@ private: /// Internal implementation for AccelerateDrawBatch bool AccelerateDrawBatchInternal(bool is_indexed); + /// Copies vertex data performing needed convertions and casts + void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data); + struct VertexArrayInfo { u32 vs_input_index_min; u32 vs_input_index_max; @@ -246,6 +249,8 @@ private: /// Creates a new Vulkan framebuffer object vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info); + void CreateDefaultTextures(); + private: const Instance& instance; TaskScheduler& scheduler; @@ -274,8 +279,9 @@ private: std::vector vertex_batch; std::array binding_offsets{}; - ImageAlloc default_texture; vk::Sampler default_sampler; + ImageAlloc default_texture; + ImageAlloc default_storage_texture; struct { Pica::Shader::UniformData data{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 79c9ced63..77c1bb349 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1642,7 +1642,16 @@ layout (set = 0, binding = 0, std140) uniform vs_config { UNREACHABLE(); } - out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, prefix); + out += + fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix); + } + } + out += '\n'; + + // cast input registers to float to avoid computational errors + for (std::size_t i = 0; i < used_regs.size(); ++i) { + if (used_regs[i]) { + out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); } } out += '\n'; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a766023e2..0c3dc3a8b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -33,6 +33,15 @@ void Swapchain::Create(u32 width, u32 height) { is_outdated = false; is_suboptimal = false; + // Destroy the previous image views + vk::Device device = instance.GetDevice(); + for (auto& image : swapchain_images) { + if (image.image) { + device.destroyImageView(image.image_view); + device.destroyFramebuffer(image.framebuffer); + } + } + // Fetch information about the provided surface Configure(width, height); @@ -61,7 +70,6 @@ void Swapchain::Create(u32 width, u32 height) { .clipped = true, .oldSwapchain = swapchain}; - vk::Device device = instance.GetDevice(); vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info); // If an old swapchain exists, destroy it and move the new one to its place. @@ -71,12 +79,6 @@ void Swapchain::Create(u32 width, u32 height) { auto images = device.getSwapchainImagesKHR(swapchain); - // Destroy the previous images - for (auto& image : swapchain_images) { - device.destroyImageView(image.image_view); - device.destroyFramebuffer(image.framebuffer); - } - swapchain_images.clear(); swapchain_images.resize(images.size()); @@ -113,6 +115,10 @@ void Swapchain::Create(u32 width, u32 height) { constexpr u64 ACQUIRE_TIMEOUT = 1000000000; void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) { + if (NeedsRecreation()) [[unlikely]] { + return; + } + vk::Device device = instance.GetDevice(); vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired, VK_NULL_HANDLE, ¤t_image); @@ -132,6 +138,10 @@ void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) { } void Swapchain::Present(vk::Semaphore wait_for_present) { + if (NeedsRecreation()) [[unlikely]] { + return; + } + const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1, .pWaitSemaphores = &wait_for_present, .swapchainCount = 1, @@ -145,6 +155,7 @@ void Swapchain::Present(vk::Semaphore wait_for_present) { case vk::Result::eSuccess: break; case vk::Result::eSuboptimalKHR: + is_suboptimal = true; LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case vk::Result::eErrorOutOfDateKHR: @@ -154,8 +165,6 @@ void Swapchain::Present(vk::Semaphore wait_for_present) { LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); break; } - - current_frame = (current_frame + 1) % swapchain_images.size(); } void Swapchain::Configure(u32 width, u32 height) { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 972042b65..d7b30280d 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -82,7 +82,6 @@ private: // Swapchain state std::vector swapchain_images; u32 current_image = 0; - u32 current_frame = 0; bool is_outdated = true; bool is_suboptimal = true; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index f4a2aea78..ebf846caa 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -210,50 +210,28 @@ void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& a void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span source, std::span dest) { - if (!surface.NeedsConvert()) { + if (!NeedsConvertion(surface.pixel_format)) { std::memcpy(dest.data(), source.data(), source.size()); return; } - // Since this is the most common case handle it separately - if (surface.pixel_format == VideoCore::PixelFormat::RGBA8) { - return Pica::Texture::ConvertABGRToRGBA(source, dest); - } - - // Handle simple D24S8 interleave case - if (surface.GetInternalFormat() == vk::Format::eD24UnormS8Uint) { - if (!upload) { - return Pica::Texture::InterleaveD24S8(source, dest); - } else { - UNREACHABLE(); - } - } - if (upload) { switch (surface.pixel_format) { + case VideoCore::PixelFormat::RGBA8: + return Pica::Texture::ConvertABGRToRGBA(source, dest); case VideoCore::PixelFormat::RGB8: return Pica::Texture::ConvertBGRToRGBA(source, dest); - case VideoCore::PixelFormat::RGBA4: - return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest); - case VideoCore::PixelFormat::RGB5A1: - return Pica::Texture::ConvertRGB5A1ToRGBA8(source, dest); default: break; } } else { switch (surface.pixel_format) { - case VideoCore::PixelFormat::D24S8: - return Pica::Texture::ConvertD32S8ToD24S8(source, dest); case VideoCore::PixelFormat::RGBA4: return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest); - case VideoCore::PixelFormat::RGB8: - return Pica::Texture::ConvertRGBAToBGR(source, dest); - default: - break; } } - LOG_WARNING(Render_Vulkan, "Missing format convertion: {} {} {}", + LOG_WARNING(Render_Vulkan, "Missing linear format convertion: {} {} {}", vk::to_string(surface.traits.native), upload ? "->" : "<-", vk::to_string(surface.alloc.format)); } @@ -457,6 +435,14 @@ const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( return reinterpreters[static_cast(dest_format)]; } +bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const { + const FormatTraits traits = instance.GetTraits(format); + const VideoCore::SurfaceType type = VideoCore::GetFormatType(format); + return type == VideoCore::SurfaceType::Color && + (format == VideoCore::PixelFormat::RGBA8 || !traits.blit_support || + !traits.attachment_support); +} + void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer, u32 layer_count) { @@ -588,9 +574,12 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa if (is_scaled) { ScaledUpload(upload); } else { + u32 region_count = 0; + std::array copy_regions; + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); const VideoCore::Rect2D rect = upload.texture_rect; - const vk::BufferImageCopy copy_region = { + vk::BufferImageCopy copy_region = { .bufferOffset = staging.buffer_offset, .bufferRowLength = rect.GetWidth(), .bufferImageHeight = rect.GetHeight(), @@ -601,11 +590,25 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; + if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { + copy_regions[region_count++] = copy_region; + } else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) { + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + copy_regions[region_count++] = copy_region; + + if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { + copy_region.bufferOffset += 4 * staging.size / 5; + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + copy_regions[region_count++] = copy_region; + } + } + runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, alloc.levels, 0, texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); command_buffer.copyBufferToImage(staging.buffer, alloc.image, - vk::ImageLayout::eTransferDstOptimal, copy_region); + vk::ImageLayout::eTransferDstOptimal, region_count, + copy_regions.data()); } InvalidateAllWatcher(); @@ -667,13 +670,6 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi runtime.staging_offsets[current_slot] += staging.size; } -bool Surface::NeedsConvert() const { - // RGBA8 needs a byteswap since R8G8B8A8UnormPack32 does not exist - // D24S8 always needs an interleave pass even if natively supported - return alloc.format != traits.native || pixel_format == VideoCore::PixelFormat::RGBA8 || - pixel_format == VideoCore::PixelFormat::D24S8; -} - u32 Surface::GetInternalBytesPerPixel() const { return vk::blockSize(alloc.format); } diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 64f7ba776..d8f992d05 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -33,7 +33,7 @@ struct ImageAlloc { vk::ImageUsageFlags usage; vk::Format format; vk::ImageLayout layout = vk::ImageLayout::eUndefined; - vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor; u32 levels = 1; u32 layers = 1; }; @@ -92,6 +92,9 @@ public: [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( VideoCore::PixelFormat dest_format) const; + /// Returns true if the provided pixel format needs convertion + [[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const; + /// Performs operations that need to be done on every scheduler slot switch void OnSlotSwitch(u32 new_slot); @@ -131,9 +134,6 @@ public: /// Downloads pixel data to staging from a rectangle region of the surface texture void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); - /// Returns true if the surface requires pixel data convertion - bool NeedsConvert() const; - /// Returns the bpp of the internal surface format u32 GetInternalBytesPerPixel() const; diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index 82c0faf34..cbc177ae7 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -289,6 +289,16 @@ void ConvertRGB5A1ToRGBA8(std::span source, std::span source, std::span dest) { + u32 j = 0; + for (std::size_t i = 0; i < dest.size(); i += 2) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba)); + Color::EncodeRGB5A1(rgba, reinterpret_cast(dest.data() + i)); + j += 4; + } +} + void ConvertD32S8ToD24S8(std::span source, std::span dest) { std::size_t depth_offset = 0; std::size_t stencil_offset = 4 * source.size() / 5; @@ -316,4 +326,15 @@ void InterleaveD24S8(std::span source, std::span des } } +void DeinterleaveD24S8(std::span source, std::span dest) { + std::size_t depth_offset = 0; + std::size_t stencil_offset = 3 * source.size() / 4; + for (std::size_t i = 0; i < dest.size(); i += 4) { + dest[stencil_offset] = source[i]; + std::memcpy(dest.data() + depth_offset, source.data() + i + 1, 3); + depth_offset += 3; + stencil_offset += 1; + } +} + } // namespace Pica::Texture diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h index a7158e351..bc27aaaf2 100644 --- a/src/video_core/texture/texture_decode.h +++ b/src/video_core/texture/texture_decode.h @@ -87,8 +87,12 @@ void ConvertRGBA8ToRGBA4(std::span source, std::span void ConvertRGB5A1ToRGBA8(std::span source, std::span dest); +void ConvertRGBA8ToRGB5A1(std::span source, std::span dest); + void ConvertD32S8ToD24S8(std::span source, std::span dest); void InterleaveD24S8(std::span source, std::span dest); +void DeinterleaveD24S8(std::span source, std::span dest); + } // namespace Pica::Texture