diff --git a/src/android/app/src/main/AndroidManifest.xml b/src/android/app/src/main/AndroidManifest.xml index cfc9a7474..2831a12bc 100644 --- a/src/android/app/src/main/AndroidManifest.xml +++ b/src/android/app/src/main/AndroidManifest.xml @@ -30,7 +30,8 @@ android:supportsRtl="true" android:isGame="true" android:banner="@mipmap/ic_launcher" - android:requestLegacyExternalStorage="true"> + android:requestLegacyExternalStorage="true" + android:debuggable="true"> (sdl2_config->GetInteger("Renderer", "graphics_api", 2)); Settings::values.async_command_recording = - sdl2_config->GetBoolean("Renderer", "async_command_recording", true); + sdl2_config->GetBoolean("Renderer", "async_command_recording", false); + Settings::values.spirv_shader_gen = sdl2_config->GetBoolean("Renderer", "spirv_shader_gen", true); + Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", true); Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true); Settings::values.shaders_accurate_mul = diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp new file mode 100644 index 000000000..31c903830 --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp @@ -0,0 +1,176 @@ +// Copyright 2019 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include "common/logging/log.h" +#include "common/settings.h" +#include "input_common/main.h" +#include "jni/emu_window/emu_window_vk.h" +#include "jni/id_cache.h" +#include "jni/input_manager.h" +#include "network/network.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" + +static bool IsPortraitMode() { + return JNI_FALSE != IDCache::GetEnvForThread()->CallStaticBooleanMethod( + IDCache::GetNativeLibraryClass(), IDCache::GetIsPortraitMode()); +} + +static void UpdateLandscapeScreenLayout() { + Settings::values.layout_option = + static_cast(IDCache::GetEnvForThread()->CallStaticIntMethod( + IDCache::GetNativeLibraryClass(), IDCache::GetLandscapeScreenLayout())); +} + +void EmuWindow_Android_Vulkan::OnSurfaceChanged(ANativeWindow* surface) { + render_window = surface; + StopPresenting(); +} + +bool EmuWindow_Android_Vulkan::OnTouchEvent(int x, int y, bool pressed) { + if (pressed) { + return TouchPressed((unsigned)std::max(x, 0), (unsigned)std::max(y, 0)); + } + + TouchReleased(); + return true; +} + +void EmuWindow_Android_Vulkan::OnTouchMoved(int x, int y) { + TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0)); +} + +void EmuWindow_Android_Vulkan::OnFramebufferSizeChanged() { + UpdateLandscapeScreenLayout(); + const bool is_portrait_mode{IsPortraitMode()}; + const int bigger{window_width > window_height ? window_width : window_height}; + const int smaller{window_width < window_height ? window_width : window_height}; + if (is_portrait_mode) { + UpdateCurrentFramebufferLayout(smaller, bigger, is_portrait_mode); + } else { + UpdateCurrentFramebufferLayout(bigger, smaller, is_portrait_mode); + } +} + +EmuWindow_Android_Vulkan::EmuWindow_Android_Vulkan(ANativeWindow* surface) { + LOG_DEBUG(Frontend, "Initializing EmuWindow_Android_Vulkan"); + + if (!surface) { + LOG_CRITICAL(Frontend, "surface is nullptr"); + return; + } + + Network::Init(); + + host_window = surface; + CreateWindowSurface(); + + if (core_context = CreateSharedContext(); !core_context) { + LOG_CRITICAL(Frontend, "CreateSharedContext() failed"); + return; + } + + OnFramebufferSizeChanged(); +} + +bool EmuWindow_Android_Vulkan::CreateWindowSurface() { + if (!host_window) { + return true; + } + + window_info.type = Frontend::WindowSystemType::Android; + window_info.render_surface = host_window; + + return true; +} + +void EmuWindow_Android_Vulkan::DestroyWindowSurface() { + /*if (!egl_surface) { + return; + } + if (eglGetCurrentSurface(EGL_DRAW) == egl_surface) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + if (!eglDestroySurface(egl_display, egl_surface)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + egl_surface = EGL_NO_SURFACE;*/ +} + +void EmuWindow_Android_Vulkan::DestroyContext() { + /*if (!egl_context) { + return; + } + if (eglGetCurrentContext() == egl_context) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + if (!eglDestroyContext(egl_display, egl_context)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + if (!eglTerminate(egl_display)) { + LOG_CRITICAL(Frontend, "eglTerminate() failed"); + } + egl_context = EGL_NO_CONTEXT; + egl_display = EGL_NO_DISPLAY;*/ +} + +EmuWindow_Android_Vulkan::~EmuWindow_Android_Vulkan() { + DestroyWindowSurface(); + DestroyContext(); +} + +std::unique_ptr EmuWindow_Android_Vulkan::CreateSharedContext() const { + return std::make_unique(); +} + +void EmuWindow_Android_Vulkan::StopPresenting() { + /*if (presenting_state == PresentingState::Running) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + }*/ + presenting_state = PresentingState::Stopped; +} + +void EmuWindow_Android_Vulkan::TryPresenting() { + if (presenting_state != PresentingState::Running) { + if (presenting_state == PresentingState::Initial) { + /*eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);*/ + presenting_state = PresentingState::Running; + } else { + return; + } + } + /*eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0); + if (VideoCore::g_renderer) { + VideoCore::g_renderer->TryPresent(0); + eglSwapBuffers(egl_display, egl_surface); + }*/ +} + +void EmuWindow_Android_Vulkan::PollEvents() { + if (!render_window) { + return; + } + + host_window = render_window; + render_window = nullptr; + + DestroyWindowSurface(); + CreateWindowSurface(); + OnFramebufferSizeChanged(); + presenting_state = PresentingState::Initial; +} + +void EmuWindow_Android_Vulkan::MakeCurrent() { + core_context->MakeCurrent(); +} + +void EmuWindow_Android_Vulkan::DoneCurrent() { + core_context->DoneCurrent(); +} diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.h b/src/android/app/src/main/jni/emu_window/emu_window_vk.h new file mode 100644 index 000000000..87a48fa06 --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.h @@ -0,0 +1,59 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "core/frontend/emu_window.h" + +struct ANativeWindow; + +class SharedContext_Android : public Frontend::GraphicsContext {}; + +class EmuWindow_Android_Vulkan : public Frontend::EmuWindow { +public: + EmuWindow_Android_Vulkan(ANativeWindow* surface); + ~EmuWindow_Android_Vulkan(); + + void Present(); + + /// Called by the onSurfaceChanges() method to change the surface + void OnSurfaceChanged(ANativeWindow* surface); + + /// Handles touch event that occur.(Touched or released) + bool OnTouchEvent(int x, int y, bool pressed); + + /// Handles movement of touch pointer + void OnTouchMoved(int x, int y); + + void PollEvents() override; + void MakeCurrent() override; + void DoneCurrent() override; + + void TryPresenting(); + void StopPresenting(); + + std::unique_ptr CreateSharedContext() const override; + +private: + void OnFramebufferSizeChanged(); + bool CreateWindowSurface(); + void DestroyWindowSurface(); + void DestroyContext(); + + ANativeWindow* render_window{}; + ANativeWindow* host_window{}; + + int window_width{1080}; + int window_height{2220}; + + std::unique_ptr core_context; + + enum class PresentingState { + Initial, + Running, + Stopped, + }; + PresentingState presenting_state{}; +}; diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index c22f06c39..35ff03bd6 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -32,7 +32,7 @@ #include "jni/camera/ndk_camera.h" #include "jni/camera/still_image_camera.h" #include "jni/config.h" -#include "jni/emu_window/emu_window.h" +#include "jni/emu_window/emu_window_vk.h" #include "jni/game_info.h" #include "jni/game_settings.h" #include "jni/id_cache.h" @@ -48,7 +48,7 @@ namespace { ANativeWindow* s_surf; -std::unique_ptr window; +std::unique_ptr window; std::atomic stop_run{true}; std::atomic pause_emulation{false}; @@ -146,7 +146,7 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { return Core::System::ResultStatus::ErrorLoader; } - window = std::make_unique(s_surf); + window = std::make_unique(s_surf); Core::System& system{Core::System::GetInstance()}; diff --git a/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_timeline_semaphore.so b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_timeline_semaphore.so new file mode 100644 index 000000000..06545335e Binary files /dev/null and b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_timeline_semaphore.so differ diff --git a/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so new file mode 100644 index 000000000..b999455c4 Binary files /dev/null and b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so differ diff --git a/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so b/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so new file mode 100644 index 000000000..65ac529d1 Binary files /dev/null and b/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so differ diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 99b4fdb1b..05cb21660 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -167,27 +167,7 @@ void RendererVulkan::PrepareRendertarget() { LCD::Read(color_fill.raw, lcd_color_addr); if (color_fill.is_enabled) { - TextureInfo& texture = screen_infos[i].texture; - runtime.Transition(texture.alloc, vk::ImageLayout::eTransferDstOptimal, 0, - texture.alloc.levels); - - scheduler.Record([image = texture.alloc.image, - color_fill](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - const vk::ClearColorValue clear_color = { - .float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f, - color_fill.color_b / 255.0f, 1.0f}}; - - const vk::ImageSubresourceRange range = { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }; - - render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, - clear_color, range); - }); + LoadColorToActiveVkTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); } else { TextureInfo& texture = screen_infos[i].texture; if (texture.width != framebuffer.width || texture.height != framebuffer.height || @@ -217,7 +197,7 @@ void RendererVulkan::BeginRendering() { present_textures[i] = vk::DescriptorImageInfo{ .imageView = info.display_texture ? info.display_texture->image_view : info.texture.alloc.image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; + .imageLayout = vk::ImageLayout::eGeneral}; } present_textures[3] = vk::DescriptorImageInfo{.sampler = present_samplers[current_sampler]}; @@ -301,7 +281,7 @@ void RendererVulkan::CompileShaders() { .mipmapMode = vk::SamplerMipmapMode::eLinear, .addressModeU = vk::SamplerAddressMode::eClampToEdge, .addressModeV = vk::SamplerAddressMode::eClampToEdge, - .anisotropyEnable = true, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), .maxAnisotropy = properties.limits.maxSamplerAnisotropy, .compareEnable = false, .compareOp = vk::CompareOp::eAlways, @@ -492,6 +472,55 @@ void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, } } +void RendererVulkan::LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) { + const vk::ClearColorValue clear_color = { + .float32 = std::array{color_r / 255.0f, color_g / 255.0f, color_b / 255.0f, 1.0f}}; + + renderpass_cache.ExitRenderpass(); + scheduler.Record([image = texture.alloc.image, + clear_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageSubresourceRange range = {.aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS}; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); +} + void RendererVulkan::ReloadSampler() { current_sampler = !Settings::values.filter_mode.GetValue(); } @@ -857,14 +886,21 @@ void RendererVulkan::SwapBuffers() { render_cmdbuf.setScissor(0, scissor); }); + DrawScreens(layout, false); + renderpass_cache.ExitRenderpass(); - for (auto& info : screen_infos) { - ImageAlloc* alloc = info.display_texture ? info.display_texture : &info.texture.alloc; - runtime.Transition(*alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, alloc->levels); - } + scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; - DrawScreens(layout, false); + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, {}); + }); const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 0bccba311..4a907a97e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -88,6 +88,7 @@ private: void BuildPipelines(); void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer); + void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); void ConfigureRenderPipeline(); void PrepareRendertarget(); void BeginRendering(); diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index 85fa39cbd..3db44f015 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -8,13 +8,13 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" namespace Vulkan { -BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, - DescriptorManager& desc_manager) - : scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} { +BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager) + : scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} { constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require @@ -22,95 +22,91 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(set = 0, binding = 0) uniform highp texture2D depth; layout(set = 0, binding = 1) uniform lowp utexture2D stencil; layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color; - layout(push_constant, std140) uniform ComputeInfo { - mediump ivec2 src_offset; +mediump ivec2 src_offset; }; - void main() { - ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); - ivec2 tex_coord = src_offset + dst_coord; - - highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; - highp uint value = stencil_val | (depth_val << 8); - imageStore(color, dst_coord, uvec4(value)); +ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); +ivec2 tex_coord = src_offset + dst_coord; +highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); +lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; +highp uint value = stencil_val | (depth_val << 8); +imageStore(color, dst_coord, uvec4(value)); } - )"; compute_shader = - Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); + Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); const std::array compute_layout_bindings = { - vk::DescriptorSetLayoutBinding{.binding = 0, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}, - vk::DescriptorSetLayoutBinding{.binding = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}, - vk::DescriptorSetLayoutBinding{.binding = 2, - .descriptorType = vk::DescriptorType::eStorageImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}}; + vk::DescriptorSetLayoutBinding{.binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 2, + .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}}; const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { - .bindingCount = static_cast(compute_layout_bindings.size()), - .pBindings = compute_layout_bindings.data()}; + .bindingCount = static_cast(compute_layout_bindings.size()), + .pBindings = compute_layout_bindings.data()}; descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); const std::array update_template_entries = { - vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = 0, - .stride = sizeof(vk::DescriptorImageInfo)}, - vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = sizeof(vk::DescriptorImageInfo), - .stride = 0}, - vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageImage, - .offset = 2 * sizeof(vk::DescriptorImageInfo), - .stride = 0}}; + vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo)}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = sizeof(vk::DescriptorImageInfo), + .stride = 0}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .offset = 2 * sizeof(vk::DescriptorImageInfo), + .stride = 0}}; const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), - .pDescriptorUpdateEntries = update_template_entries.data(), - .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, - .descriptorSetLayout = descriptor_layout}; + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = descriptor_layout}; update_template = device.createDescriptorUpdateTemplate(template_info); const vk::PushConstantRange push_range = { - .stageFlags = vk::ShaderStageFlagBits::eCompute, - .offset = 0, - .size = sizeof(Common::Vec2i), + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Common::Vec2i), }; const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1, - .pSetLayouts = &descriptor_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_range}; + .pSetLayouts = &descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range}; compute_pipeline_layout = device.createPipelineLayout(layout_info); const vk::PipelineShaderStageCreateInfo compute_stage = { - .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; + .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage, - .layout = compute_pipeline_layout}; + .layout = compute_pipeline_layout}; if (const auto result = device.createComputePipeline({}, compute_info); - result.result == vk::Result::eSuccess) { + result.result == vk::Result::eSuccess) { compute_pipeline = result.value; } else { LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); @@ -128,23 +124,98 @@ BlitHelper::~BlitHelper() { void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { - source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels); - dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels); - const std::array textures = { - vk::DescriptorImageInfo{.imageView = source.GetDepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, - vk::DescriptorImageInfo{.imageView = source.GetStencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, - vk::DescriptorImageInfo{.imageView = dest.GetImageView(), - .imageLayout = vk::ImageLayout::eGeneral}}; + vk::DescriptorImageInfo{.imageView = source.GetDepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = source.GetStencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = dest.GetImageView(), + .imageLayout = vk::ImageLayout::eGeneral}}; vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); - scheduler.Record([this, set, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, - 0, set, {}); + scheduler.Record([this, set, blit, + src_image = source.alloc.image, + dst_image = dest.alloc.image](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | + vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + } + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | + vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + } + }; + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); @@ -152,6 +223,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, sizeof(Common::Vec2i), src_offset.AsArray()); render_cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); }); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp index 2f837d803..73b085498 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp @@ -34,7 +34,7 @@ constexpr static std::array RASTERIZER_SETS = { vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage}, - .binding_count = 7}}; + .binding_count = 4}}; constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) { vk::ShaderStageFlags flags; diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp index 020db7b20..f5504563a 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -6,14 +6,14 @@ #include "video_core/renderer_vulkan/vk_format_reinterpreter.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" namespace Vulkan { D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager, TextureRuntime& runtime) - : FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, - device{instance.GetDevice()} { + : FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, device{instance.GetDevice()} { constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require @@ -21,95 +21,91 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(set = 0, binding = 0) uniform highp texture2D depth; layout(set = 0, binding = 1) uniform lowp utexture2D stencil; layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color; - layout(push_constant, std140) uniform ComputeInfo { - mediump ivec2 src_offset; +mediump ivec2 src_offset; }; - void main() { - ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); - - highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; - highp uvec4 components = - uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); - imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0)); +ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); +highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); +lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; +highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); +imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0)); } - )"; compute_shader = - Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); + Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); const std::array compute_layout_bindings = { - vk::DescriptorSetLayoutBinding{.binding = 0, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}, - vk::DescriptorSetLayoutBinding{.binding = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}, - vk::DescriptorSetLayoutBinding{.binding = 2, - .descriptorType = vk::DescriptorType::eStorageImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute}}; + vk::DescriptorSetLayoutBinding{.binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 2, + .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}}; const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { - .bindingCount = static_cast(compute_layout_bindings.size()), - .pBindings = compute_layout_bindings.data()}; + .bindingCount = static_cast(compute_layout_bindings.size()), + .pBindings = compute_layout_bindings.data()}; descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); const std::array update_template_entries = { - vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = 0, - .stride = sizeof(vk::DescriptorImageInfo)}, - vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = sizeof(vk::DescriptorImageInfo), - .stride = 0}, - vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageImage, - .offset = 2 * sizeof(vk::DescriptorImageInfo), - .stride = 0}}; + vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo)}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = sizeof(vk::DescriptorImageInfo), + .stride = 0}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .offset = 2 * sizeof(vk::DescriptorImageInfo), + .stride = 0}}; const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), - .pDescriptorUpdateEntries = update_template_entries.data(), - .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, - .descriptorSetLayout = descriptor_layout}; + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = descriptor_layout}; update_template = device.createDescriptorUpdateTemplate(template_info); const vk::PushConstantRange push_range = { - .stageFlags = vk::ShaderStageFlagBits::eCompute, - .offset = 0, - .size = sizeof(Common::Vec2i), + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Common::Vec2i), }; const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1, - .pSetLayouts = &descriptor_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_range}; + .pSetLayouts = &descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range}; compute_pipeline_layout = device.createPipelineLayout(layout_info); const vk::PipelineShaderStageCreateInfo compute_stage = { - .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; + .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage, - .layout = compute_pipeline_layout}; + .layout = compute_pipeline_layout}; if (const auto result = device.createComputePipeline({}, compute_info); - result.result == vk::Result::eSuccess) { + result.result == vk::Result::eSuccess) { compute_pipeline = result.value; } else { LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); @@ -127,23 +123,81 @@ D24S8toRGBA8::~D24S8toRGBA8() { void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest, VideoCore::Rect2D dst_rect) { - source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels); - dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels); - const std::array textures = { - vk::DescriptorImageInfo{.imageView = source.GetDepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, - vk::DescriptorImageInfo{.imageView = source.GetStencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, - vk::DescriptorImageInfo{.imageView = dest.GetImageView(), - .imageLayout = vk::ImageLayout::eGeneral}}; + vk::DescriptorImageInfo{.imageView = source.GetDepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = source.GetStencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = dest.GetImageView(), + .imageLayout = vk::ImageLayout::eGeneral}}; vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); - scheduler.Record([this, set, src_rect](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, - 0, set, {}); + runtime.GetRenderpassCache().ExitRenderpass(); + scheduler.Record([this, set, src_rect, + src_image = source.alloc.image, + dst_image = dest.alloc.image](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | + vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | + vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + } + }; + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom); @@ -151,6 +205,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf sizeof(Common::Vec2i), src_offset.AsArray()); render_cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); }); } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d6935e941..46432c73c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -164,7 +164,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); // Enable the instance extensions the backend uses - auto extensions = GetInstanceExtensions(window_info.type, enable_validation); + auto extensions = GetInstanceExtensions(window_info.type, false); // Use required platform-specific flags auto flags = GetInstanceFlags(); @@ -182,8 +182,13 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) .engineVersion = VK_MAKE_VERSION(1, 0, 0), .apiVersion = available_version}; + std::array layers; +#ifdef ANDROID + u32 layer_count = 1; + layers[0] = "VK_LAYER_KHRONOS_timeline_semaphore"; +#else u32 layer_count = 0; - std::array layers; +#endif if (enable_validation) { layers[layer_count++] = "VK_LAYER_KHRONOS_validation"; @@ -285,12 +290,12 @@ void Instance::CreateFormatTable() { const vk::FormatFeatureFlagBits attachment_usage = (aspect & vk::ImageAspectFlagBits::eDepth) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment - : vk::FormatFeatureFlagBits::eColorAttachment; + : vk::FormatFeatureFlagBits::eColorAttachmentBlend; const bool supports_transfer = (properties.optimalTilingFeatures & transfer_usage) == transfer_usage; const bool supports_blit = (properties.optimalTilingFeatures & blit_usage) == blit_usage; - const bool supports_attachment = + bool supports_attachment = (properties.optimalTilingFeatures & attachment_usage) == attachment_usage; const bool supports_storage = (properties.optimalTilingFeatures & storage_usage) == storage_usage; @@ -325,6 +330,10 @@ void Instance::CreateFormatTable() { vk::to_string(format), vk::to_string(fallback)); } + if (pixel_format == VideoCore::PixelFormat::RGB8) { + supports_attachment = false; + } + const u32 index = static_cast(pixel_format); format_table[index] = FormatTraits{.transfer_support = supports_transfer, .blit_support = supports_blit, @@ -448,9 +457,10 @@ bool Instance::CreateDevice() { .shaderStorageImageMultisample = available.shaderStorageImageMultisample, .shaderClipDistance = available.shaderClipDistance}}, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true}, - feature_chain.get(), + //feature_chain.get(), feature_chain.get(), - feature_chain.get()}; + //feature_chain.get() + }; // Create logical device try { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index c645ff17a..751030fb6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -90,6 +90,21 @@ public: return !features.logicOp; } + bool UseGeometryShaders() const { +#ifndef __ANDROID__ + return features.geometryShader; +#else + // Geometry shaders are extremely expensive on tilers to avoid them at all + // cost even if it hurts accuracy somewhat. TODO: Make this an option + return false; +#endif + } + + /// Returns true if anisotropic filtering is supported + bool IsAnisotropicFilteringSupported() const { + return features.samplerAnisotropy; + } + /// Returns true when VK_KHR_timeline_semaphore is supported bool IsTimelineSemaphoreSupported() const { return timeline_semaphores; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5fe3db7bc..05253250d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -206,6 +206,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { PicaVSConfig config{regs.rasterizer, regs.vs, setup}; + config.state.use_geometry_shader = instance.UseGeometryShaders(); u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; for (u32 i = 0; i < layout.attribute_count; i++) { @@ -243,7 +244,10 @@ void PipelineCache::UseTrivialVertexShader() { } void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { - return UseTrivialGeometryShader(); + if (!instance.UseGeometryShaders()) { + return UseTrivialGeometryShader(); + } + const PicaFixedGSConfig gs_config{regs}; const vk::ShaderModule handle = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, @@ -285,7 +289,7 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { const vk::DescriptorImageInfo image_info = { - .imageView = image_view, .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; + .imageView = image_view, .imageLayout = vk::ImageLayout::eGeneral}; desc_manager.SetBinding(1, binding, DescriptorData{image_info}); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5a6a4f9e7..2fd4b428c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,7 @@ namespace Vulkan { constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024; constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat}; @@ -62,9 +62,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { - null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); - null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1); - uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_size_aligned_vs = Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); @@ -563,7 +560,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { case TextureType::Shadow2D: { auto surface = res_cache.GetTextureSurface(texture); if (surface) { - surface->Transition(vk::ImageLayout::eGeneral, 0, surface->alloc.levels); pipeline_cache.BindStorageImage(0, surface->GetStorageView()); } else { pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView()); @@ -596,8 +592,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { auto surface = res_cache.GetTextureCube(config); if (surface) { - surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, - surface->alloc.levels); pipeline_cache.BindTexture(3, surface->GetImageView()); } else { pipeline_cache.BindTexture(3, null_surface.GetImageView()); @@ -628,12 +622,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { .extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}}; runtime.CopyTextures(*color_surface, temp, copy); - temp.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, temp.alloc.levels); - pipeline_cache.BindTexture(texture_index, temp.GetImageView()); } else { - surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, - surface->alloc.levels); pipeline_cache.BindTexture(texture_index, surface->GetImageView()); } @@ -708,21 +698,28 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { it->second = CreateFramebuffer(framebuffer_info); } - if (color_surface) { - color_surface->Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1); - } - - if (depth_surface) { - depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); - } - const RenderpassState renderpass_info = { .renderpass = framebuffer_info.renderpass, .framebuffer = it->second, .render_area = vk::Rect2D{.offset = {static_cast(draw_rect.left), static_cast(draw_rect.bottom)}, .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}}, - .clear = {}}; + .clear = {}, + }; + + renderpass_cache.ExitRenderpass(); + + scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, {}); + }); renderpass_cache.EnterRenderpass(renderpass_info); @@ -772,10 +769,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } - static int submit_threshold = 80; + static int submit_threshold = 120; submit_threshold--; if (!submit_threshold) { - submit_threshold = 80; + submit_threshold = 120; scheduler.Flush(); } @@ -1094,7 +1091,7 @@ vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) { .addressModeU = PicaToVK::WrapMode(info.wrap_s), .addressModeV = PicaToVK::WrapMode(info.wrap_t), .mipLodBias = info.lod_bias / 256.0f, - .anisotropyEnable = true, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), .maxAnisotropy = properties.limits.maxSamplerAnisotropy, .compareEnable = false, .compareOp = vk::CompareOp::eAlways, diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 75f16f5a1..81fce7156 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -63,10 +63,10 @@ RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) cached_renderpasses[color][depth][0] = CreateRenderPass( color_format, depth_format, vk::AttachmentLoadOp::eLoad, - vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal); + vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); cached_renderpasses[color][depth][1] = CreateRenderPass( color_format, depth_format, vk::AttachmentLoadOp::eClear, - vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal); + vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); } } } @@ -170,7 +170,7 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de .finalLayout = final_layout}; color_attachment_ref = vk::AttachmentReference{ - .attachment = attachment_count++, .layout = vk::ImageLayout::eColorAttachmentOptimal}; + .attachment = attachment_count++, .layout = vk::ImageLayout::eGeneral}; use_color = true; } @@ -182,12 +182,12 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de .storeOp = vk::AttachmentStoreOp::eStore, .stencilLoadOp = load_op, .stencilStoreOp = vk::AttachmentStoreOp::eStore, - .initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, - .finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal}; + .initialLayout = vk::ImageLayout::eGeneral, + .finalLayout = vk::ImageLayout::eGeneral}; depth_attachment_ref = vk::AttachmentReference{.attachment = attachment_count++, - .layout = vk::ImageLayout::eDepthStencilAttachmentOptimal}; + .layout = vk::ImageLayout::eGeneral}; use_depth = true; } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index ba7a620c6..f64fee8b4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1633,12 +1633,12 @@ void main() { gl_Position = vert_position; gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; - gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 - if (enable_clip1) { - gl_ClipDistance[1] = dot(clip_coef, vert_position); - } else { - gl_ClipDistance[1] = 0; - } + //gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + //if (enable_clip1) { + // gl_ClipDistance[1] = dot(clip_coef, vert_position); + //} else { + // gl_ClipDistance[1] = 0; + //} } )"; @@ -1682,7 +1682,9 @@ layout (set = 0, binding = 0, std140) uniform vs_config { }; )"; - out += GetVertexInterfaceDeclaration(true); + if (!config.state.use_geometry_shader) { + out += GetVertexInterfaceDeclaration(true); + } // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { @@ -1754,61 +1756,63 @@ layout (set = 0, binding = 0, std140) uniform vs_config { } out += '\n'; - // output attributes declaration - for (u32 i = 0; i < config.state.num_outputs; ++i) { - out += fmt::format("vec4 vs_out_attr{};\n", i, i); - } - - const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { - const u32 slot = static_cast(slot_semantic); - const u32 attrib = config.semantic_maps[slot].attribute_index; - const u32 comp = config.semantic_maps[slot].component_index; - if (attrib < config.gs_output_attributes) { - return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]); + if (config.state.use_geometry_shader) { + // output attributes declaration + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += fmt::format("layout(location = {0}) out vec4 vs_out_attr{0};\n", i); + } + out += "void EmitVtx() {}\n"; + } else { + // output attributes declaration + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += fmt::format("vec4 vs_out_attr{};\n", i); } - return "0.0"; - }; - out += "vec4 GetVertexQuaternion() {\n"; - out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + - semantic(VSOutputAttributes::QUATERNION_Y) + ", " + - semantic(VSOutputAttributes::QUATERNION_Z) + ", " + - semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; - out += "}\n\n"; + const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { + const u32 slot = static_cast(slot_semantic); + const u32 attrib = config.semantic_maps[slot].attribute_index; + const u32 comp = config.semantic_maps[slot].component_index; + if (attrib < config.gs_output_attributes) { + return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]); + } + return "0.0"; + }; - out += "void EmitVtx() {\n"; - out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + - semantic(VSOutputAttributes::POSITION_Y) + ", " + - semantic(VSOutputAttributes::POSITION_Z) + ", " + - semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " gl_Position = vtx_pos;\n"; - out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; - //out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; - //out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 - //out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; - //out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; + out += "vec4 GetVertexQuaternion() {\n"; + out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + + semantic(VSOutputAttributes::QUATERNION_Z) + ", " + + semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; + out += "}\n\n"; - // This is inaccurate! - out += " normquat = GetVertexQuaternion();\n"; + out += "void EmitVtx() {\n"; + out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + + semantic(VSOutputAttributes::POSITION_Y) + ", " + + semantic(VSOutputAttributes::POSITION_Z) + ", " + + semantic(VSOutputAttributes::POSITION_W) + ");\n"; + out += " gl_Position = vtx_pos;\n"; + out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + out += " normquat = GetVertexQuaternion();\n"; - out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + - semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + - ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; - out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; + out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + + ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; + out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; - out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; - out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; + out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; + out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; - out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; - out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + - semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + - ");\n\n"; + out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; + out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + + ");\n\n"; - out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; - out += "}\n"; + out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; + out += "}\n"; + } out += "\nvoid main() {\n"; for (u32 i = 0; i < config.state.num_outputs; ++i) { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 1b00bf9e0..f4b92257f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -165,9 +165,7 @@ struct PicaShaderConfigCommon { // output_map[output register index] -> output attribute index std::array output_map; - - - + bool use_geometry_shader; u32 vs_output_attributes; u32 gs_output_attributes; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index 7a19cd1d9..1e4975b48 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -4,6 +4,7 @@ #pragma once +#include #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 9c65a0b9b..118a3f8de 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -40,7 +40,6 @@ namespace Vulkan { return vk::PipelineStageFlagBits::eVertexInput; case vk::BufferUsageFlagBits::eUniformBuffer: return vk::PipelineStageFlagBits::eVertexShader | - vk::PipelineStageFlagBits::eGeometryShader | vk::PipelineStageFlagBits::eFragmentShader; case vk::BufferUsageFlagBits::eUniformTexelBuffer: return vk::PipelineStageFlagBits::eFragmentShader; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 4acd1c8c1..eb329478d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -29,7 +29,7 @@ struct StagingBuffer { class StreamBuffer { static constexpr u32 MAX_BUFFER_VIEWS = 3; - static constexpr u32 BUCKET_COUNT = 4; + static constexpr u32 BUCKET_COUNT = 2; public: /// Staging only constructor diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 55ce5a497..05105101a 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -66,6 +66,7 @@ void Swapchain::Create() { .queueFamilyIndexCount = queue_family_indices_count, .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, + .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit, .presentMode = present_mode, .clipped = true, .oldSwapchain = swapchain}; diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 32b0401fc..b46a41d50 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -16,23 +16,56 @@ namespace Vulkan { -vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { +[[nodiscard]] vk::ImageAspectFlags MakeAspect(VideoCore::SurfaceType type) { switch (type) { - case VideoCore::SurfaceType::Color: - case VideoCore::SurfaceType::Texture: - case VideoCore::SurfaceType::Fill: - return vk::ImageAspectFlagBits::eColor; - case VideoCore::SurfaceType::Depth: - return vk::ImageAspectFlagBits::eDepth; - case VideoCore::SurfaceType::DepthStencil: - return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - default: - UNREACHABLE_MSG("Invalid surface type!"); + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::ImageAspectFlagBits::eColor; + case VideoCore::SurfaceType::Depth: + return vk::ImageAspectFlagBits::eDepth; + case VideoCore::SurfaceType::DepthStencil: + return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid surface type {}", type); + UNREACHABLE(); } return vk::ImageAspectFlagBits::eColor; } +[[nodiscard]] vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) { + switch (pixel_format) { + case VideoCore::PixelFormat::D16: + case VideoCore::PixelFormat::D24: + case VideoCore::PixelFormat::D24S8: + return vk::Filter::eNearest; + default: + return vk::Filter::eLinear; + } +} + +[[nodiscard]] vk::ClearValue MakeClearValue(VideoCore::ClearValue clear) { + static_assert(sizeof(VideoCore::ClearValue) == sizeof(vk::ClearValue)); + + vk::ClearValue value{}; + std::memcpy(&value, &clear, sizeof(vk::ClearValue)); + return value; + } + + [[nodiscard]] vk::ClearColorValue MakeClearColorValue(VideoCore::ClearValue clear) { + return vk::ClearColorValue{ + .float32 = std::array{clear.color[0], clear.color[1], clear.color[2], clear.color[3]} + }; + } + + [[nodiscard]] vk::ClearDepthStencilValue MakeClearDepthStencilValue(VideoCore::ClearValue clear) { + return vk::ClearDepthStencilValue{ + .depth = clear.depth, + .stencil = clear.stencil + }; + } + u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { u32 depth_offset = 0; u32 stencil_offset = 4 * data.size / 5; @@ -133,7 +166,7 @@ void TextureRuntime::Finish() { ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { const FormatTraits traits = instance.GetTraits(format); - const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format)); + const vk::ImageAspectFlags aspect = MakeAspect(VideoCore::GetFormatType(format)); // Depth buffers are not supposed to support blit by the spec so don't require it. const bool is_suitable = traits.transfer_support && traits.attachment_support && @@ -271,6 +304,31 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma alloc.storage_view = device.createImageView(storage_view_info); } + scheduler.Record([image = alloc.image, + aspect = alloc.aspect](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { + const vk::ImageMemoryBarrier init_barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }; + + upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier); + + }); + return alloc; } @@ -316,110 +374,305 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, VideoCore::ClearValue value) { - const vk::ImageAspectFlags aspect = ToVkAspect(surface.type); renderpass_cache.ExitRenderpass(); - surface.Transition(vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); - - vk::ClearValue clear_value{}; - if (aspect & vk::ImageAspectFlagBits::eColor) { - clear_value.color = vk::ClearColorValue{ - .float32 = - std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})}; - } else if (aspect & vk::ImageAspectFlagBits::eDepth || - aspect & vk::ImageAspectFlagBits::eStencil) { - clear_value.depthStencil = - vk::ClearDepthStencilValue{.depth = value.depth, .stencil = value.stencil}; - } + const bool is_color = surface.type != VideoCore::SurfaceType::Depth && + surface.type != VideoCore::SurfaceType::DepthStencil; if (clear.texture_rect == surface.GetScaledRect()) { - scheduler.Record([aspect, image = surface.alloc.image, clear_value, - clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + scheduler.Record([aspect = MakeAspect(surface.type), + image = surface.alloc.image, + value, is_color, clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { const vk::ImageSubresourceRange range = {.aspectMask = aspect, - .baseMipLevel = clear.texture_level, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1}; + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1}; - if (aspect & vk::ImageAspectFlagBits::eColor) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + if (is_color) { render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, - clear_value.color, range); - } else if (aspect & vk::ImageAspectFlagBits::eDepth || - aspect & vk::ImageAspectFlagBits::eStencil) { + MakeClearColorValue(value), range); + } else { render_cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, - clear_value.depthStencil, range); + MakeClearDepthStencilValue(value), range); } + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); }); - } else { - vk::RenderPass clear_renderpass; - if (aspect & vk::ImageAspectFlagBits::eColor) { - clear_renderpass = renderpass_cache.GetRenderpass( - surface.pixel_format, VideoCore::PixelFormat::Invalid, true); - surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1); - } else if (aspect & vk::ImageAspectFlagBits::eDepth) { - clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, - surface.pixel_format, true); - surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); - } + return true; + } - const vk::ImageView framebuffer_view = surface.GetFramebufferView(); + ClearTextureWithRenderpass(surface, clear, value); + return true; +} - auto [it, new_framebuffer] = - clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{}); - if (new_framebuffer) { - const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass, - .attachmentCount = 1, - .pAttachments = &framebuffer_view, - .width = surface.GetScaledWidth(), - .height = surface.GetScaledHeight(), - .layers = 1}; +void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value) { + const bool is_color = surface.type != VideoCore::SurfaceType::Depth && + surface.type != VideoCore::SurfaceType::DepthStencil; - vk::Device device = instance.GetDevice(); - it->second = device.createFramebuffer(framebuffer_info); - } + const vk::RenderPass clear_renderpass = + is_color ? renderpass_cache.GetRenderpass(surface.pixel_format, + VideoCore::PixelFormat::Invalid, true) + : renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, + surface.pixel_format, true); - const RenderpassState clear_info = { + const vk::ImageView framebuffer_view = surface.GetFramebufferView(); + + auto [it, new_framebuffer] = + clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{}); + if (new_framebuffer) { + const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass, + .attachmentCount = 1, + .pAttachments = &framebuffer_view, + .width = surface.GetScaledWidth(), + .height = surface.GetScaledHeight(), + .layers = 1}; + + vk::Device device = instance.GetDevice(); + it->second = device.createFramebuffer(framebuffer_info); + } + + const RenderpassState clear_info = { .renderpass = clear_renderpass, .framebuffer = it->second, .render_area = vk::Rect2D{.offset = {static_cast(clear.texture_rect.left), static_cast(clear.texture_rect.bottom)}, - .extent = {clear.texture_rect.GetWidth(), - clear.texture_rect.GetHeight()}}, - .clear = clear_value}; + .extent = {clear.texture_rect.GetWidth(), + clear.texture_rect.GetHeight()}}, + .clear = MakeClearValue(value) + }; - renderpass_cache.EnterRenderpass(clear_info); - renderpass_cache.ExitRenderpass(); - } + scheduler.Record([aspect = MakeAspect(surface.type), + image = surface.alloc.image, + level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }; - return true; + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + }); + + renderpass_cache.EnterRenderpass(clear_info); + renderpass_cache.ExitRenderpass(); + + scheduler.Record([aspect = MakeAspect(surface.type), + image = surface.alloc.image, + level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { renderpass_cache.ExitRenderpass(); - source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); - dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); + scheduler.Record([src_image = source.alloc.image, + dst_image = dest.alloc.image, + aspect = MakeAspect(source.type), copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageCopy image_copy = { + .srcSubresource = {.aspectMask = aspect, + .mipLevel = copy.src_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), 0}, + .dstSubresource = {.aspectMask = aspect, + .mipLevel = copy.dst_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), 0}, + .extent = {copy.extent.width, copy.extent.height, 1}}; - scheduler.Record([src_image = source.alloc.image, src_type = source.type, - dst_image = dest.alloc.image, dst_type = dest.type, - copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - const vk::ImageCopy image_copy = {.srcSubresource = {.aspectMask = ToVkAspect(src_type), - .mipLevel = copy.src_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffset = {static_cast(copy.src_offset.x), - static_cast(copy.src_offset.y), 0}, - .dstSubresource = {.aspectMask = ToVkAspect(dst_type), - .mipLevel = copy.dst_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffset = {static_cast(copy.dst_offset.x), - static_cast(copy.dst_offset.y), 0}, - .extent = {copy.extent.width, copy.extent.height, 1}}; + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = copy.src_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = copy.dst_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = copy.src_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = copy.dst_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + }; - render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, image_copy); + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, + dst_image, vk::ImageLayout::eTransferDstOptimal, image_copy); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, {}); }); return true; @@ -429,13 +682,10 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { renderpass_cache.ExitRenderpass(); - source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); - dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); - - scheduler.Record([src_iamge = source.alloc.image, src_type = source.type, - dst_image = dest.alloc.image, dst_type = dest.type, - format = source.pixel_format, - blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + scheduler.Record([src_image = source.alloc.image, + aspect = MakeAspect(source.type), + filter = MakeFilter(source.pixel_format), + dst_image = dest.alloc.image, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { const std::array source_offsets = {vk::Offset3D{static_cast(blit.src_rect.left), static_cast(blit.src_rect.bottom), 0}, vk::Offset3D{static_cast(blit.src_rect.right), @@ -446,26 +696,111 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), 1}}; - const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(src_type), - .mipLevel = blit.src_level, - .baseArrayLayer = blit.src_layer, - .layerCount = 1}, - .srcOffsets = source_offsets, - .dstSubresource = {.aspectMask = ToVkAspect(dst_type), - .mipLevel = blit.dst_level, - .baseArrayLayer = blit.dst_layer, - .layerCount = 1}, - .dstOffsets = dest_offsets}; + const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect, + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1}, + .srcOffsets = source_offsets, + .dstSubresource = {.aspectMask = aspect, + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1}, + .dstOffsets = dest_offsets}; - // Don't use linear filtering on depth attachments - const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 || - format == VideoCore::PixelFormat::D24 || - format == VideoCore::PixelFormat::D16 - ? vk::Filter::eNearest - : vk::Filter::eLinear; + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = blit.src_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = blit.dst_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + } + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = blit.src_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = blit.dst_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + } + } + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + + render_cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, + dst_image, vk::ImageLayout::eTransferDstOptimal, blit_area, + filter); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, {}); - render_cmdbuf.blitImage(src_iamge, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, blit_area, filtering); }); return true; @@ -522,115 +857,6 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const { !traits.attachment_support); } -void TextureRuntime::Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, - u32 level_count) { - LayoutTracker& tracker = alloc.tracker; - if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) { - return; - } - - renderpass_cache.ExitRenderpass(); - - struct LayoutInfo { - vk::AccessFlags access; - vk::PipelineStageFlags stage; - }; - - // Get optimal transition settings for every image layout. Settings taken from Dolphin - auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo { - LayoutInfo info; - switch (layout) { - case vk::ImageLayout::eUndefined: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - info.access = vk::AccessFlagBits::eNone; - info.stage = vk::PipelineStageFlagBits::eTopOfPipe; - break; - case vk::ImageLayout::ePreinitialized: - // Image has been pre-initialized by the host, so ensure all writes have completed. - info.access = vk::AccessFlagBits::eHostWrite; - info.stage = vk::PipelineStageFlagBits::eHost; - break; - case vk::ImageLayout::eColorAttachmentOptimal: - // Image was being used as a color attachment, so ensure all writes have completed. - info.access = vk::AccessFlagBits::eColorAttachmentRead | - vk::AccessFlagBits::eColorAttachmentWrite; - info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput; - break; - case vk::ImageLayout::eDepthStencilAttachmentOptimal: - // Image was being used as a depthstencil attachment, so ensure all writes have - // completed. - info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | - vk::AccessFlagBits::eDepthStencilAttachmentWrite; - info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | - vk::PipelineStageFlagBits::eLateFragmentTests; - break; - case vk::ImageLayout::ePresentSrcKHR: - info.access = vk::AccessFlagBits::eNone; - info.stage = vk::PipelineStageFlagBits::eBottomOfPipe; - break; - case vk::ImageLayout::eShaderReadOnlyOptimal: - // Image was being used as a shader resource, make sure all reads have finished. - info.access = vk::AccessFlagBits::eShaderRead; - info.stage = vk::PipelineStageFlagBits::eFragmentShader; - break; - case vk::ImageLayout::eTransferSrcOptimal: - // Image was being used as a copy source, ensure all reads have finished. - info.access = vk::AccessFlagBits::eTransferRead; - info.stage = vk::PipelineStageFlagBits::eTransfer; - break; - case vk::ImageLayout::eTransferDstOptimal: - // Image was being used as a copy destination, ensure all writes have finished. - info.access = vk::AccessFlagBits::eTransferWrite; - info.stage = vk::PipelineStageFlagBits::eTransfer; - break; - case vk::ImageLayout::eGeneral: - info.access = vk::AccessFlagBits::eInputAttachmentRead; - info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput | - vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader; - break; - case vk::ImageLayout::eDepthStencilReadOnlyOptimal: - // Image is going to be sampled from a compute shader - info.access = vk::AccessFlagBits::eShaderRead; - info.stage = vk::PipelineStageFlagBits::eComputeShader; - break; - default: - LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); - UNREACHABLE(); - } - - return info; - }; - - LayoutInfo dest = GetLayoutInfo(new_layout); - tracker.ForEachLayoutRange( - level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) { - scheduler.Record([old_layout, new_layout, dest, start, count, image = alloc.image, - aspect = alloc.aspect, layers = alloc.layers, - GetLayoutInfo](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - LayoutInfo source = GetLayoutInfo(old_layout); - const vk::ImageMemoryBarrier barrier = {.srcAccessMask = source.access, - .dstAccessMask = dest.access, - .oldLayout = old_layout, - .newLayout = new_layout, - .image = image, - .subresourceRange = {.aspectMask = aspect, - .baseMipLevel = start, - .levelCount = count, - .baseArrayLayer = 0, - .layerCount = layers}}; - - render_cmdbuf.pipelineBarrier(source.stage, dest.stage, - vk::DependencyFlagBits::eByRegion, {}, {}, barrier); - }); - }); - - tracker.SetLayout(new_layout, level, level_count); - for (u32 i = 0; i < level_count; i++) { - ASSERT(alloc.tracker.GetLayout(level + i) == new_layout); - } -} - Surface::Surface(TextureRuntime& runtime) : runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} {} @@ -666,10 +892,6 @@ Surface::~Surface() { } } -void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) { - runtime.Transition(alloc, new_layout, level, level_count); -} - MICROPROFILE_DEFINE(Vulkan_Upload, "Vulkan", "Texture Upload", MP_RGB(128, 192, 64)); void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { MICROPROFILE_SCOPE(Vulkan_Upload); @@ -685,40 +907,92 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa if (is_scaled) { ScaledUpload(upload, staging); } else { - Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); - scheduler.Record([aspect = alloc.aspect, image = alloc.image, format = alloc.format, - staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - u32 region_count = 0; - std::array copy_regions; + scheduler.Record([aspect = alloc.aspect, image = alloc.image, + format = alloc.format, staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + u32 num_copies = 1; + std::array buffer_image_copies; const VideoCore::Rect2D rect = upload.texture_rect; - vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset + upload.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = {.aspectMask = aspect, - .mipLevel = upload.texture_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; + buffer_image_copies[0] = vk::BufferImageCopy{ + .bufferOffset = staging.buffer_offset + upload.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = {.aspectMask = aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; - if (aspect & vk::ImageAspectFlagBits::eColor) { - copy_regions[region_count++] = copy_region; - } else if (aspect & vk::ImageAspectFlagBits::eDepth) { - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[region_count++] = copy_region; - - if (aspect & vk::ImageAspectFlagBits::eStencil) { - copy_region.bufferOffset += UnpackDepthStencil(staging, format); - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - copy_regions[region_count++] = copy_region; - } + if (aspect & vk::ImageAspectFlagBits::eStencil) { + buffer_image_copies[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + vk::BufferImageCopy& stencil_copy = buffer_image_copies[1]; + stencil_copy = buffer_image_copies[0]; + stencil_copy.bufferOffset += UnpackDepthStencil(staging, format); + stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + num_copies++; } - render_cmdbuf.copyBufferToImage(staging.buffer, image, - vk::ImageLayout::eTransferDstOptimal, region_count, - copy_regions.data()); + static constexpr vk::AccessFlags WRITE_ACCESS_FLAGS = + vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eColorAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + static constexpr vk::AccessFlags READ_ACCESS_FLAGS = + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentRead; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = WRITE_ACCESS_FLAGS, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = upload.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = upload.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal, + num_copies, buffer_image_copies.data()); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, {}); }); runtime.upload_buffer.Commit(staging.size); @@ -744,23 +1018,68 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi if (is_scaled) { ScaledDownload(download, staging); } else { - Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); - scheduler.Record([aspect = alloc.aspect, image = alloc.image, staging, - download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + scheduler.Record([aspect = alloc.aspect, image = alloc.image, + staging, download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){ const VideoCore::Rect2D rect = download.texture_rect; - const vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset + download.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = {.aspectMask = aspect, - .mipLevel = download.texture_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = staging.buffer_offset + download.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = {.aspectMask = aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = download.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier image_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = download.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, - staging.buffer, copy_region); + staging.buffer, buffer_image_copy); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + memory_write_barrier, {}, image_write_barrier); }); runtime.download_buffer.Commit(staging.size); } diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 191a54567..5b17dedb5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -114,9 +114,6 @@ public: void FormatConvert(const Surface& surface, bool upload, std::span source, std::span dest); - /// Transitions the mip level range of the surface to new_layout - void Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count); - /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, VideoCore::ClearValue value); @@ -140,7 +137,16 @@ public: /// Returns true if the provided pixel format needs convertion [[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const; + /// Returns a reference to the renderpass cache + [[nodiscard]] RenderpassCache& GetRenderpassCache() { + return renderpass_cache; + } + private: + /// Clears a partial texture rect using a clear rectangle + void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value); + /// Returns the current Vulkan instance const Instance& GetInstance() const { return instance; @@ -175,9 +181,6 @@ public: TextureRuntime& runtime); ~Surface() override; - /// Transitions the mip level range of the surface to new_layout - void Transition(vk::ImageLayout new_layout, u32 level, u32 level_count); - /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);