android: Add vulkan support to frontend

This commit is contained in:
GPUCode
2022-12-28 14:01:39 +02:00
parent 0d1646e4df
commit ad45b9880d
27 changed files with 1311 additions and 550 deletions

View File

@@ -30,7 +30,8 @@
android:supportsRtl="true" android:supportsRtl="true"
android:isGame="true" android:isGame="true"
android:banner="@mipmap/ic_launcher" android:banner="@mipmap/ic_launcher"
android:requestLegacyExternalStorage="true"> android:requestLegacyExternalStorage="true"
android:debuggable="true">
<activity <activity
android:name="org.citra.citra_emu.ui.main.MainActivity" android:name="org.citra.citra_emu.ui.main.MainActivity"

View File

@@ -19,6 +19,8 @@ add_library(citra-android SHARED
default_ini.h default_ini.h
emu_window/emu_window.cpp emu_window/emu_window.cpp
emu_window/emu_window.h emu_window/emu_window.h
emu_window/emu_window_vk.cpp
emu_window/emu_window_vk.h
game_info.cpp game_info.cpp
game_info.h game_info.h
game_settings.cpp game_settings.cpp

View File

@@ -117,7 +117,9 @@ void Config::ReadValues() {
Settings::values.graphics_api = Settings::values.graphics_api =
static_cast<Settings::GraphicsAPI>(sdl2_config->GetInteger("Renderer", "graphics_api", 2)); static_cast<Settings::GraphicsAPI>(sdl2_config->GetInteger("Renderer", "graphics_api", 2));
Settings::values.async_command_recording = Settings::values.async_command_recording =
sdl2_config->GetBoolean("Renderer", "async_command_recording", true); sdl2_config->GetBoolean("Renderer", "async_command_recording", false);
Settings::values.spirv_shader_gen = sdl2_config->GetBoolean("Renderer", "spirv_shader_gen", true);
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", true);
Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true);
Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true); Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true);
Settings::values.shaders_accurate_mul = Settings::values.shaders_accurate_mul =

View File

@@ -0,0 +1,176 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstdlib>
#include <string>
#include <android/native_window_jni.h>
#include "common/logging/log.h"
#include "common/settings.h"
#include "input_common/main.h"
#include "jni/emu_window/emu_window_vk.h"
#include "jni/id_cache.h"
#include "jni/input_manager.h"
#include "network/network.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
static bool IsPortraitMode() {
return JNI_FALSE != IDCache::GetEnvForThread()->CallStaticBooleanMethod(
IDCache::GetNativeLibraryClass(), IDCache::GetIsPortraitMode());
}
static void UpdateLandscapeScreenLayout() {
Settings::values.layout_option =
static_cast<Settings::LayoutOption>(IDCache::GetEnvForThread()->CallStaticIntMethod(
IDCache::GetNativeLibraryClass(), IDCache::GetLandscapeScreenLayout()));
}
void EmuWindow_Android_Vulkan::OnSurfaceChanged(ANativeWindow* surface) {
render_window = surface;
StopPresenting();
}
bool EmuWindow_Android_Vulkan::OnTouchEvent(int x, int y, bool pressed) {
if (pressed) {
return TouchPressed((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
}
TouchReleased();
return true;
}
void EmuWindow_Android_Vulkan::OnTouchMoved(int x, int y) {
TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
}
void EmuWindow_Android_Vulkan::OnFramebufferSizeChanged() {
UpdateLandscapeScreenLayout();
const bool is_portrait_mode{IsPortraitMode()};
const int bigger{window_width > window_height ? window_width : window_height};
const int smaller{window_width < window_height ? window_width : window_height};
if (is_portrait_mode) {
UpdateCurrentFramebufferLayout(smaller, bigger, is_portrait_mode);
} else {
UpdateCurrentFramebufferLayout(bigger, smaller, is_portrait_mode);
}
}
EmuWindow_Android_Vulkan::EmuWindow_Android_Vulkan(ANativeWindow* surface) {
LOG_DEBUG(Frontend, "Initializing EmuWindow_Android_Vulkan");
if (!surface) {
LOG_CRITICAL(Frontend, "surface is nullptr");
return;
}
Network::Init();
host_window = surface;
CreateWindowSurface();
if (core_context = CreateSharedContext(); !core_context) {
LOG_CRITICAL(Frontend, "CreateSharedContext() failed");
return;
}
OnFramebufferSizeChanged();
}
bool EmuWindow_Android_Vulkan::CreateWindowSurface() {
if (!host_window) {
return true;
}
window_info.type = Frontend::WindowSystemType::Android;
window_info.render_surface = host_window;
return true;
}
void EmuWindow_Android_Vulkan::DestroyWindowSurface() {
/*if (!egl_surface) {
return;
}
if (eglGetCurrentSurface(EGL_DRAW) == egl_surface) {
eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
}
if (!eglDestroySurface(egl_display, egl_surface)) {
LOG_CRITICAL(Frontend, "eglDestroySurface() failed");
}
egl_surface = EGL_NO_SURFACE;*/
}
void EmuWindow_Android_Vulkan::DestroyContext() {
/*if (!egl_context) {
return;
}
if (eglGetCurrentContext() == egl_context) {
eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
}
if (!eglDestroyContext(egl_display, egl_context)) {
LOG_CRITICAL(Frontend, "eglDestroySurface() failed");
}
if (!eglTerminate(egl_display)) {
LOG_CRITICAL(Frontend, "eglTerminate() failed");
}
egl_context = EGL_NO_CONTEXT;
egl_display = EGL_NO_DISPLAY;*/
}
EmuWindow_Android_Vulkan::~EmuWindow_Android_Vulkan() {
DestroyWindowSurface();
DestroyContext();
}
std::unique_ptr<Frontend::GraphicsContext> EmuWindow_Android_Vulkan::CreateSharedContext() const {
return std::make_unique<SharedContext_Android>();
}
void EmuWindow_Android_Vulkan::StopPresenting() {
/*if (presenting_state == PresentingState::Running) {
eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
}*/
presenting_state = PresentingState::Stopped;
}
void EmuWindow_Android_Vulkan::TryPresenting() {
if (presenting_state != PresentingState::Running) {
if (presenting_state == PresentingState::Initial) {
/*eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);*/
presenting_state = PresentingState::Running;
} else {
return;
}
}
/*eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0);
if (VideoCore::g_renderer) {
VideoCore::g_renderer->TryPresent(0);
eglSwapBuffers(egl_display, egl_surface);
}*/
}
void EmuWindow_Android_Vulkan::PollEvents() {
if (!render_window) {
return;
}
host_window = render_window;
render_window = nullptr;
DestroyWindowSurface();
CreateWindowSurface();
OnFramebufferSizeChanged();
presenting_state = PresentingState::Initial;
}
void EmuWindow_Android_Vulkan::MakeCurrent() {
core_context->MakeCurrent();
}
void EmuWindow_Android_Vulkan::DoneCurrent() {
core_context->DoneCurrent();
}

View File

@@ -0,0 +1,59 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "core/frontend/emu_window.h"
struct ANativeWindow;
class SharedContext_Android : public Frontend::GraphicsContext {};
class EmuWindow_Android_Vulkan : public Frontend::EmuWindow {
public:
EmuWindow_Android_Vulkan(ANativeWindow* surface);
~EmuWindow_Android_Vulkan();
void Present();
/// Called by the onSurfaceChanges() method to change the surface
void OnSurfaceChanged(ANativeWindow* surface);
/// Handles touch event that occur.(Touched or released)
bool OnTouchEvent(int x, int y, bool pressed);
/// Handles movement of touch pointer
void OnTouchMoved(int x, int y);
void PollEvents() override;
void MakeCurrent() override;
void DoneCurrent() override;
void TryPresenting();
void StopPresenting();
std::unique_ptr<GraphicsContext> CreateSharedContext() const override;
private:
void OnFramebufferSizeChanged();
bool CreateWindowSurface();
void DestroyWindowSurface();
void DestroyContext();
ANativeWindow* render_window{};
ANativeWindow* host_window{};
int window_width{1080};
int window_height{2220};
std::unique_ptr<Frontend::GraphicsContext> core_context;
enum class PresentingState {
Initial,
Running,
Stopped,
};
PresentingState presenting_state{};
};

View File

@@ -32,7 +32,7 @@
#include "jni/camera/ndk_camera.h" #include "jni/camera/ndk_camera.h"
#include "jni/camera/still_image_camera.h" #include "jni/camera/still_image_camera.h"
#include "jni/config.h" #include "jni/config.h"
#include "jni/emu_window/emu_window.h" #include "jni/emu_window/emu_window_vk.h"
#include "jni/game_info.h" #include "jni/game_info.h"
#include "jni/game_settings.h" #include "jni/game_settings.h"
#include "jni/id_cache.h" #include "jni/id_cache.h"
@@ -48,7 +48,7 @@ namespace {
ANativeWindow* s_surf; ANativeWindow* s_surf;
std::unique_ptr<EmuWindow_Android> window; std::unique_ptr<EmuWindow_Android_Vulkan> window;
std::atomic<bool> stop_run{true}; std::atomic<bool> stop_run{true};
std::atomic<bool> pause_emulation{false}; std::atomic<bool> pause_emulation{false};
@@ -146,7 +146,7 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) {
return Core::System::ResultStatus::ErrorLoader; return Core::System::ResultStatus::ErrorLoader;
} }
window = std::make_unique<EmuWindow_Android>(s_surf); window = std::make_unique<EmuWindow_Android_Vulkan>(s_surf);
Core::System& system{Core::System::GetInstance()}; Core::System& system{Core::System::GetInstance()};

View File

@@ -167,27 +167,7 @@ void RendererVulkan::PrepareRendertarget() {
LCD::Read(color_fill.raw, lcd_color_addr); LCD::Read(color_fill.raw, lcd_color_addr);
if (color_fill.is_enabled) { if (color_fill.is_enabled) {
TextureInfo& texture = screen_infos[i].texture; LoadColorToActiveVkTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
runtime.Transition(texture.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
texture.alloc.levels);
scheduler.Record([image = texture.alloc.image,
color_fill](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ClearColorValue clear_color = {
.float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f,
color_fill.color_b / 255.0f, 1.0f}};
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
};
render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal,
clear_color, range);
});
} else { } else {
TextureInfo& texture = screen_infos[i].texture; TextureInfo& texture = screen_infos[i].texture;
if (texture.width != framebuffer.width || texture.height != framebuffer.height || if (texture.width != framebuffer.width || texture.height != framebuffer.height ||
@@ -217,7 +197,7 @@ void RendererVulkan::BeginRendering() {
present_textures[i] = vk::DescriptorImageInfo{ present_textures[i] = vk::DescriptorImageInfo{
.imageView = info.display_texture ? info.display_texture->image_view .imageView = info.display_texture ? info.display_texture->image_view
: info.texture.alloc.image_view, : info.texture.alloc.image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; .imageLayout = vk::ImageLayout::eGeneral};
} }
present_textures[3] = vk::DescriptorImageInfo{.sampler = present_samplers[current_sampler]}; present_textures[3] = vk::DescriptorImageInfo{.sampler = present_samplers[current_sampler]};
@@ -301,7 +281,7 @@ void RendererVulkan::CompileShaders() {
.mipmapMode = vk::SamplerMipmapMode::eLinear, .mipmapMode = vk::SamplerMipmapMode::eLinear,
.addressModeU = vk::SamplerAddressMode::eClampToEdge, .addressModeU = vk::SamplerAddressMode::eClampToEdge,
.addressModeV = vk::SamplerAddressMode::eClampToEdge, .addressModeV = vk::SamplerAddressMode::eClampToEdge,
.anisotropyEnable = true, .anisotropyEnable = instance.IsAnisotropicFilteringSupported(),
.maxAnisotropy = properties.limits.maxSamplerAnisotropy, .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.compareEnable = false, .compareEnable = false,
.compareOp = vk::CompareOp::eAlways, .compareOp = vk::CompareOp::eAlways,
@@ -492,6 +472,55 @@ void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture,
} }
} }
void RendererVulkan::LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) {
const vk::ClearColorValue clear_color = {
.float32 = std::array{color_r / 255.0f, color_g / 255.0f, color_b / 255.0f, 1.0f}};
renderpass_cache.ExitRenderpass();
scheduler.Record([image = texture.alloc.image,
clear_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageSubresourceRange range = {.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS};
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderRead |
vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = range
};
const vk::ImageMemoryBarrier post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead |
vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = range
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
});
}
void RendererVulkan::ReloadSampler() { void RendererVulkan::ReloadSampler() {
current_sampler = !Settings::values.filter_mode.GetValue(); current_sampler = !Settings::values.filter_mode.GetValue();
} }
@@ -857,14 +886,21 @@ void RendererVulkan::SwapBuffers() {
render_cmdbuf.setScissor(0, scissor); render_cmdbuf.setScissor(0, scissor);
}); });
DrawScreens(layout, false);
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
for (auto& info : screen_infos) { scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
ImageAlloc* alloc = info.display_texture ? info.display_texture : &info.texture.alloc; const vk::MemoryBarrier memory_write_barrier = {
runtime.Transition(*alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, alloc->levels); .srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
} .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
DrawScreens(layout, false); render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();

View File

@@ -88,6 +88,7 @@ private:
void BuildPipelines(); void BuildPipelines();
void ConfigureFramebufferTexture(TextureInfo& texture, void ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer); const GPU::Regs::FramebufferConfig& framebuffer);
void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
void ConfigureRenderPipeline(); void ConfigureRenderPipeline();
void PrepareRendertarget(); void PrepareRendertarget();
void BeginRendering(); void BeginRendering();

View File

@@ -8,13 +8,13 @@
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan { namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager)
DescriptorManager& desc_manager) : scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} {
: scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"( constexpr std::string_view cs_source = R"(
#version 450 core #version 450 core
#extension GL_EXT_samplerless_texture_functions : require #extension GL_EXT_samplerless_texture_functions : require
@@ -22,95 +22,91 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform highp texture2D depth; layout(set = 0, binding = 0) uniform highp texture2D depth;
layout(set = 0, binding = 1) uniform lowp utexture2D stencil; layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color; layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color;
layout(push_constant, std140) uniform ComputeInfo { layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset; mediump ivec2 src_offset;
}; };
void main() { void main() {
ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy);
ivec2 tex_coord = src_offset + dst_coord; ivec2 tex_coord = src_offset + dst_coord;
highp uint depth_val =
highp uint depth_val = uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0));
uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; highp uint value = stencil_val | (depth_val << 8);
highp uint value = stencil_val | (depth_val << 8); imageStore(color, dst_coord, uvec4(value));
imageStore(color, dst_coord, uvec4(value));
} }
)"; )";
compute_shader = compute_shader =
Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High);
const std::array compute_layout_bindings = { const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{.binding = 0, vk::DescriptorSetLayoutBinding{.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}, .stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 1, vk::DescriptorSetLayoutBinding{.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}, .stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 2, vk::DescriptorSetLayoutBinding{.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage, .descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}}; .stageFlags = vk::ShaderStageFlagBits::eCompute}};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()), .bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data()}; .pBindings = compute_layout_bindings.data()};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
const std::array update_template_entries = { const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, vk::DescriptorUpdateTemplateEntry{.dstBinding = 0,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0, .offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)}, .stride = sizeof(vk::DescriptorImageInfo)},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, vk::DescriptorUpdateTemplateEntry{.dstBinding = 1,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo), .offset = sizeof(vk::DescriptorImageInfo),
.stride = 0}, .stride = 0},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, vk::DescriptorUpdateTemplateEntry{.dstBinding = 2,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage, .descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo), .offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0}}; .stride = 0}};
const vk::DescriptorUpdateTemplateCreateInfo template_info = { const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()), .descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(), .pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout}; .descriptorSetLayout = descriptor_layout};
update_template = device.createDescriptorUpdateTemplate(template_info); update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = { const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0, .offset = 0,
.size = sizeof(Common::Vec2i), .size = sizeof(Common::Vec2i),
}; };
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1, const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout, .pSetLayouts = &descriptor_layout,
.pushConstantRangeCount = 1, .pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range}; .pPushConstantRanges = &push_range};
compute_pipeline_layout = device.createPipelineLayout(layout_info); compute_pipeline_layout = device.createPipelineLayout(layout_info);
const vk::PipelineShaderStageCreateInfo compute_stage = { const vk::PipelineShaderStageCreateInfo compute_stage = {
.stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"};
const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage, const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage,
.layout = compute_pipeline_layout}; .layout = compute_pipeline_layout};
if (const auto result = device.createComputePipeline({}, compute_info); if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) { result.result == vk::Result::eSuccess) {
compute_pipeline = result.value; compute_pipeline = result.value;
} else { } else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
@@ -128,23 +124,98 @@ BlitHelper::~BlitHelper() {
void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) { const VideoCore::TextureBlit& blit) {
source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(), vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = source.GetStencilView(), vk::DescriptorImageInfo{.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = dest.GetImageView(), vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}}; .imageLayout = vk::ImageLayout::eGeneral}};
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
scheduler.Record([this, set, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { scheduler.Record([this, set, blit,
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, src_image = source.alloc.image,
0, set, {}); dst_image = dest.alloc.image](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const std::array pre_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {});
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
@@ -152,6 +223,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
sizeof(Common::Vec2i), src_offset.AsArray()); sizeof(Common::Vec2i), src_offset.AsArray());
render_cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); render_cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
}); });
} }

View File

@@ -34,7 +34,7 @@ constexpr static std::array RASTERIZER_SETS = {
vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage, vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage}, vk::DescriptorType::eStorageImage},
.binding_count = 7}}; .binding_count = 4}};
constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) { constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
vk::ShaderStageFlags flags; vk::ShaderStageFlags flags;

View File

@@ -6,14 +6,14 @@
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h" #include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan { namespace Vulkan {
D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, Scheduler& scheduler, D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager, TextureRuntime& runtime) DescriptorManager& desc_manager, TextureRuntime& runtime)
: FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, : FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, device{instance.GetDevice()} {
device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"( constexpr std::string_view cs_source = R"(
#version 450 core #version 450 core
#extension GL_EXT_samplerless_texture_functions : require #extension GL_EXT_samplerless_texture_functions : require
@@ -21,95 +21,91 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform highp texture2D depth; layout(set = 0, binding = 0) uniform highp texture2D depth;
layout(set = 0, binding = 1) uniform lowp utexture2D stencil; layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color; layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color;
layout(push_constant, std140) uniform ComputeInfo { layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset; mediump ivec2 src_offset;
}; };
void main() { void main() {
ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy);
highp uint depth_val =
highp uint depth_val = uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; highp uvec4 components =
highp uvec4 components = uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
} }
)"; )";
compute_shader = compute_shader =
Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High);
const std::array compute_layout_bindings = { const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{.binding = 0, vk::DescriptorSetLayoutBinding{.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}, .stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 1, vk::DescriptorSetLayoutBinding{.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}, .stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 2, vk::DescriptorSetLayoutBinding{.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage, .descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}}; .stageFlags = vk::ShaderStageFlagBits::eCompute}};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()), .bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data()}; .pBindings = compute_layout_bindings.data()};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
const std::array update_template_entries = { const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, vk::DescriptorUpdateTemplateEntry{.dstBinding = 0,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0, .offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)}, .stride = sizeof(vk::DescriptorImageInfo)},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, vk::DescriptorUpdateTemplateEntry{.dstBinding = 1,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo), .offset = sizeof(vk::DescriptorImageInfo),
.stride = 0}, .stride = 0},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, vk::DescriptorUpdateTemplateEntry{.dstBinding = 2,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage, .descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo), .offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0}}; .stride = 0}};
const vk::DescriptorUpdateTemplateCreateInfo template_info = { const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()), .descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(), .pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout}; .descriptorSetLayout = descriptor_layout};
update_template = device.createDescriptorUpdateTemplate(template_info); update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = { const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0, .offset = 0,
.size = sizeof(Common::Vec2i), .size = sizeof(Common::Vec2i),
}; };
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1, const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout, .pSetLayouts = &descriptor_layout,
.pushConstantRangeCount = 1, .pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range}; .pPushConstantRanges = &push_range};
compute_pipeline_layout = device.createPipelineLayout(layout_info); compute_pipeline_layout = device.createPipelineLayout(layout_info);
const vk::PipelineShaderStageCreateInfo compute_stage = { const vk::PipelineShaderStageCreateInfo compute_stage = {
.stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"};
const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage, const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage,
.layout = compute_pipeline_layout}; .layout = compute_pipeline_layout};
if (const auto result = device.createComputePipeline({}, compute_info); if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) { result.result == vk::Result::eSuccess) {
compute_pipeline = result.value; compute_pipeline = result.value;
} else { } else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
@@ -127,23 +123,81 @@ D24S8toRGBA8::~D24S8toRGBA8() {
void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest, void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest,
VideoCore::Rect2D dst_rect) { VideoCore::Rect2D dst_rect) {
source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(), vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = source.GetStencilView(), vk::DescriptorImageInfo{.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = dest.GetImageView(), vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}}; .imageLayout = vk::ImageLayout::eGeneral}};
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
scheduler.Record([this, set, src_rect](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { runtime.GetRenderpassCache().ExitRenderpass();
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, scheduler.Record([this, set, src_rect,
0, set, {}); src_image = source.alloc.image,
dst_image = dest.alloc.image](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {});
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom); const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom);
@@ -151,6 +205,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
sizeof(Common::Vec2i), src_offset.AsArray()); sizeof(Common::Vec2i), src_offset.AsArray());
render_cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); render_cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
}); });
} }

View File

@@ -164,7 +164,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
// Enable the instance extensions the backend uses // Enable the instance extensions the backend uses
auto extensions = GetInstanceExtensions(window_info.type, enable_validation); auto extensions = GetInstanceExtensions(window_info.type, false);
// Use required platform-specific flags // Use required platform-specific flags
auto flags = GetInstanceFlags(); auto flags = GetInstanceFlags();
@@ -182,8 +182,13 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
.engineVersion = VK_MAKE_VERSION(1, 0, 0), .engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = available_version}; .apiVersion = available_version};
std::array<const char*, 3> layers;
#ifdef ANDROID
u32 layer_count = 1;
layers[0] = "VK_LAYER_KHRONOS_timeline_semaphore";
#else
u32 layer_count = 0; u32 layer_count = 0;
std::array<const char*, 2> layers; #endif
if (enable_validation) { if (enable_validation) {
layers[layer_count++] = "VK_LAYER_KHRONOS_validation"; layers[layer_count++] = "VK_LAYER_KHRONOS_validation";
@@ -285,12 +290,12 @@ void Instance::CreateFormatTable() {
const vk::FormatFeatureFlagBits attachment_usage = const vk::FormatFeatureFlagBits attachment_usage =
(aspect & vk::ImageAspectFlagBits::eDepth) (aspect & vk::ImageAspectFlagBits::eDepth)
? vk::FormatFeatureFlagBits::eDepthStencilAttachment ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
: vk::FormatFeatureFlagBits::eColorAttachment; : vk::FormatFeatureFlagBits::eColorAttachmentBlend;
const bool supports_transfer = const bool supports_transfer =
(properties.optimalTilingFeatures & transfer_usage) == transfer_usage; (properties.optimalTilingFeatures & transfer_usage) == transfer_usage;
const bool supports_blit = (properties.optimalTilingFeatures & blit_usage) == blit_usage; const bool supports_blit = (properties.optimalTilingFeatures & blit_usage) == blit_usage;
const bool supports_attachment = bool supports_attachment =
(properties.optimalTilingFeatures & attachment_usage) == attachment_usage; (properties.optimalTilingFeatures & attachment_usage) == attachment_usage;
const bool supports_storage = const bool supports_storage =
(properties.optimalTilingFeatures & storage_usage) == storage_usage; (properties.optimalTilingFeatures & storage_usage) == storage_usage;
@@ -325,6 +330,10 @@ void Instance::CreateFormatTable() {
vk::to_string(format), vk::to_string(fallback)); vk::to_string(format), vk::to_string(fallback));
} }
if (pixel_format == VideoCore::PixelFormat::RGB8) {
supports_attachment = false;
}
const u32 index = static_cast<u32>(pixel_format); const u32 index = static_cast<u32>(pixel_format);
format_table[index] = FormatTraits{.transfer_support = supports_transfer, format_table[index] = FormatTraits{.transfer_support = supports_transfer,
.blit_support = supports_blit, .blit_support = supports_blit,
@@ -448,9 +457,10 @@ bool Instance::CreateDevice() {
.shaderStorageImageMultisample = available.shaderStorageImageMultisample, .shaderStorageImageMultisample = available.shaderStorageImageMultisample,
.shaderClipDistance = available.shaderClipDistance}}, .shaderClipDistance = available.shaderClipDistance}},
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true}, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true},
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(), //feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>(), feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>(),
feature_chain.get<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>()}; //feature_chain.get<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>()
};
// Create logical device // Create logical device
try { try {

View File

@@ -90,6 +90,21 @@ public:
return !features.logicOp; return !features.logicOp;
} }
bool UseGeometryShaders() const {
#ifndef __ANDROID__
return features.geometryShader;
#else
// Geometry shaders are extremely expensive on tilers to avoid them at all
// cost even if it hurts accuracy somewhat. TODO: Make this an option
return false;
#endif
}
/// Returns true if anisotropic filtering is supported
bool IsAnisotropicFilteringSupported() const {
return features.samplerAnisotropy;
}
/// Returns true when VK_KHR_timeline_semaphore is supported /// Returns true when VK_KHR_timeline_semaphore is supported
bool IsTimelineSemaphoreSupported() const { bool IsTimelineSemaphoreSupported() const {
return timeline_semaphores; return timeline_semaphores;

View File

@@ -206,6 +206,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup, Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) { const VertexLayout& layout) {
PicaVSConfig config{regs.rasterizer, regs.vs, setup}; PicaVSConfig config{regs.rasterizer, regs.vs, setup};
config.state.use_geometry_shader = instance.UseGeometryShaders();
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
for (u32 i = 0; i < layout.attribute_count; i++) { for (u32 i = 0; i < layout.attribute_count; i++) {
@@ -243,7 +244,10 @@ void PipelineCache::UseTrivialVertexShader() {
} }
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
return UseTrivialGeometryShader(); if (!instance.UseGeometryShaders()) {
return UseTrivialGeometryShader();
}
const PicaFixedGSConfig gs_config{regs}; const PicaFixedGSConfig gs_config{regs};
const vk::ShaderModule handle = const vk::ShaderModule handle =
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
@@ -285,7 +289,7 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = { const vk::DescriptorImageInfo image_info = {
.imageView = image_view, .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; .imageView = image_view, .imageLayout = vk::ImageLayout::eGeneral};
desc_manager.SetBinding(1, binding, DescriptorData{image_info}); desc_manager.SetBinding(1, binding, DescriptorData{image_info});
} }

View File

@@ -24,7 +24,7 @@ namespace Vulkan {
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024;
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat}; constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat};
@@ -62,9 +62,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1);
uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs = uniform_size_aligned_vs =
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
@@ -563,7 +560,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
case TextureType::Shadow2D: { case TextureType::Shadow2D: {
auto surface = res_cache.GetTextureSurface(texture); auto surface = res_cache.GetTextureSurface(texture);
if (surface) { if (surface) {
surface->Transition(vk::ImageLayout::eGeneral, 0, surface->alloc.levels);
pipeline_cache.BindStorageImage(0, surface->GetStorageView()); pipeline_cache.BindStorageImage(0, surface->GetStorageView());
} else { } else {
pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView()); pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView());
@@ -596,8 +592,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureCube(config); auto surface = res_cache.GetTextureCube(config);
if (surface) { if (surface) {
surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels);
pipeline_cache.BindTexture(3, surface->GetImageView()); pipeline_cache.BindTexture(3, surface->GetImageView());
} else { } else {
pipeline_cache.BindTexture(3, null_surface.GetImageView()); pipeline_cache.BindTexture(3, null_surface.GetImageView());
@@ -628,12 +622,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
.extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}}; .extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}};
runtime.CopyTextures(*color_surface, temp, copy); runtime.CopyTextures(*color_surface, temp, copy);
temp.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, temp.alloc.levels);
pipeline_cache.BindTexture(texture_index, temp.GetImageView()); pipeline_cache.BindTexture(texture_index, temp.GetImageView());
} else { } else {
surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels);
pipeline_cache.BindTexture(texture_index, surface->GetImageView()); pipeline_cache.BindTexture(texture_index, surface->GetImageView());
} }
@@ -708,21 +698,28 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
it->second = CreateFramebuffer(framebuffer_info); it->second = CreateFramebuffer(framebuffer_info);
} }
if (color_surface) {
color_surface->Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
}
if (depth_surface) {
depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
const RenderpassState renderpass_info = { const RenderpassState renderpass_info = {
.renderpass = framebuffer_info.renderpass, .renderpass = framebuffer_info.renderpass,
.framebuffer = it->second, .framebuffer = it->second,
.render_area = vk::Rect2D{.offset = {static_cast<s32>(draw_rect.left), .render_area = vk::Rect2D{.offset = {static_cast<s32>(draw_rect.left),
static_cast<s32>(draw_rect.bottom)}, static_cast<s32>(draw_rect.bottom)},
.extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}}, .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}},
.clear = {}}; .clear = {},
};
renderpass_cache.ExitRenderpass();
scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
renderpass_cache.EnterRenderpass(renderpass_info); renderpass_cache.EnterRenderpass(renderpass_info);
@@ -772,10 +769,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface); depth_surface);
} }
static int submit_threshold = 80; static int submit_threshold = 120;
submit_threshold--; submit_threshold--;
if (!submit_threshold) { if (!submit_threshold) {
submit_threshold = 80; submit_threshold = 120;
scheduler.Flush(); scheduler.Flush();
} }
@@ -1094,7 +1091,7 @@ vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
.addressModeU = PicaToVK::WrapMode(info.wrap_s), .addressModeU = PicaToVK::WrapMode(info.wrap_s),
.addressModeV = PicaToVK::WrapMode(info.wrap_t), .addressModeV = PicaToVK::WrapMode(info.wrap_t),
.mipLodBias = info.lod_bias / 256.0f, .mipLodBias = info.lod_bias / 256.0f,
.anisotropyEnable = true, .anisotropyEnable = instance.IsAnisotropicFilteringSupported(),
.maxAnisotropy = properties.limits.maxSamplerAnisotropy, .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.compareEnable = false, .compareEnable = false,
.compareOp = vk::CompareOp::eAlways, .compareOp = vk::CompareOp::eAlways,

View File

@@ -63,10 +63,10 @@ RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler)
cached_renderpasses[color][depth][0] = CreateRenderPass( cached_renderpasses[color][depth][0] = CreateRenderPass(
color_format, depth_format, vk::AttachmentLoadOp::eLoad, color_format, depth_format, vk::AttachmentLoadOp::eLoad,
vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal); vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral);
cached_renderpasses[color][depth][1] = CreateRenderPass( cached_renderpasses[color][depth][1] = CreateRenderPass(
color_format, depth_format, vk::AttachmentLoadOp::eClear, color_format, depth_format, vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal); vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral);
} }
} }
} }
@@ -170,7 +170,7 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de
.finalLayout = final_layout}; .finalLayout = final_layout};
color_attachment_ref = vk::AttachmentReference{ color_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++, .layout = vk::ImageLayout::eColorAttachmentOptimal}; .attachment = attachment_count++, .layout = vk::ImageLayout::eGeneral};
use_color = true; use_color = true;
} }
@@ -182,12 +182,12 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de
.storeOp = vk::AttachmentStoreOp::eStore, .storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = load_op, .stencilLoadOp = load_op,
.stencilStoreOp = vk::AttachmentStoreOp::eStore, .stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, .initialLayout = vk::ImageLayout::eGeneral,
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal}; .finalLayout = vk::ImageLayout::eGeneral};
depth_attachment_ref = depth_attachment_ref =
vk::AttachmentReference{.attachment = attachment_count++, vk::AttachmentReference{.attachment = attachment_count++,
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal}; .layout = vk::ImageLayout::eGeneral};
use_depth = true; use_depth = true;
} }

View File

@@ -1633,12 +1633,12 @@ void main() {
gl_Position = vert_position; gl_Position = vert_position;
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 //gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
if (enable_clip1) { //if (enable_clip1) {
gl_ClipDistance[1] = dot(clip_coef, vert_position); // gl_ClipDistance[1] = dot(clip_coef, vert_position);
} else { //} else {
gl_ClipDistance[1] = 0; // gl_ClipDistance[1] = 0;
} //}
} }
)"; )";
@@ -1682,7 +1682,9 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
}; };
)"; )";
out += GetVertexInterfaceDeclaration(true); if (!config.state.use_geometry_shader) {
out += GetVertexInterfaceDeclaration(true);
}
// input attributes declaration // input attributes declaration
for (std::size_t i = 0; i < used_regs.size(); ++i) { for (std::size_t i = 0; i < used_regs.size(); ++i) {
@@ -1754,61 +1756,63 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
} }
out += '\n'; out += '\n';
// output attributes declaration if (config.state.use_geometry_shader) {
for (u32 i = 0; i < config.state.num_outputs; ++i) { // output attributes declaration
out += fmt::format("vec4 vs_out_attr{};\n", i, i); for (u32 i = 0; i < config.state.num_outputs; ++i) {
} out += fmt::format("layout(location = {0}) out vec4 vs_out_attr{0};\n", i);
}
const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { out += "void EmitVtx() {}\n";
const u32 slot = static_cast<u32>(slot_semantic); } else {
const u32 attrib = config.semantic_maps[slot].attribute_index; // output attributes declaration
const u32 comp = config.semantic_maps[slot].component_index; for (u32 i = 0; i < config.state.num_outputs; ++i) {
if (attrib < config.gs_output_attributes) { out += fmt::format("vec4 vs_out_attr{};\n", i);
return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]);
} }
return "0.0";
};
out += "vec4 GetVertexQuaternion() {\n"; const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string {
out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + const u32 slot = static_cast<u32>(slot_semantic);
semantic(VSOutputAttributes::QUATERNION_Y) + ", " + const u32 attrib = config.semantic_maps[slot].attribute_index;
semantic(VSOutputAttributes::QUATERNION_Z) + ", " + const u32 comp = config.semantic_maps[slot].component_index;
semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; if (attrib < config.gs_output_attributes) {
out += "}\n\n"; return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]);
}
return "0.0";
};
out += "void EmitVtx() {\n"; out += "vec4 GetVertexQuaternion() {\n";
out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " +
semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::QUATERNION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::QUATERNION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n"; semantic(VSOutputAttributes::QUATERNION_W) + ");\n";
out += " gl_Position = vtx_pos;\n"; out += "}\n\n";
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
//out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
//out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
//out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
//out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n";
// This is inaccurate! out += "void EmitVtx() {\n";
out += " normquat = GetVertexQuaternion();\n"; out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " +
semantic(VSOutputAttributes::POSITION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n";
out += " gl_Position = vtx_pos;\n";
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
out += " normquat = GetVertexQuaternion();\n";
out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " +
semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) +
", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n";
out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n";
out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n";
out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n";
out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n";
out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " +
semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) +
");\n\n"; ");\n\n";
out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n";
out += "}\n"; out += "}\n";
}
out += "\nvoid main() {\n"; out += "\nvoid main() {\n";
for (u32 i = 0; i < config.state.num_outputs; ++i) { for (u32 i = 0; i < config.state.num_outputs; ++i) {

View File

@@ -165,9 +165,7 @@ struct PicaShaderConfigCommon {
// output_map[output register index] -> output attribute index // output_map[output register index] -> output attribute index
std::array<u32, 16> output_map; std::array<u32, 16> output_map;
bool use_geometry_shader;
u32 vs_output_attributes; u32 vs_output_attributes;
u32 gs_output_attributes; u32 gs_output_attributes;

View File

@@ -4,6 +4,7 @@
#pragma once #pragma once
#include <span>
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan { namespace Vulkan {

View File

@@ -40,7 +40,6 @@ namespace Vulkan {
return vk::PipelineStageFlagBits::eVertexInput; return vk::PipelineStageFlagBits::eVertexInput;
case vk::BufferUsageFlagBits::eUniformBuffer: case vk::BufferUsageFlagBits::eUniformBuffer:
return vk::PipelineStageFlagBits::eVertexShader | return vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eGeometryShader |
vk::PipelineStageFlagBits::eFragmentShader; vk::PipelineStageFlagBits::eFragmentShader;
case vk::BufferUsageFlagBits::eUniformTexelBuffer: case vk::BufferUsageFlagBits::eUniformTexelBuffer:
return vk::PipelineStageFlagBits::eFragmentShader; return vk::PipelineStageFlagBits::eFragmentShader;

View File

@@ -29,7 +29,7 @@ struct StagingBuffer {
class StreamBuffer { class StreamBuffer {
static constexpr u32 MAX_BUFFER_VIEWS = 3; static constexpr u32 MAX_BUFFER_VIEWS = 3;
static constexpr u32 BUCKET_COUNT = 4; static constexpr u32 BUCKET_COUNT = 2;
public: public:
/// Staging only constructor /// Staging only constructor

View File

@@ -66,6 +66,7 @@ void Swapchain::Create() {
.queueFamilyIndexCount = queue_family_indices_count, .queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(), .pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform, .preTransform = transform,
.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit,
.presentMode = present_mode, .presentMode = present_mode,
.clipped = true, .clipped = true,
.oldSwapchain = swapchain}; .oldSwapchain = swapchain};

View File

@@ -16,23 +16,56 @@
namespace Vulkan { namespace Vulkan {
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { [[nodiscard]] vk::ImageAspectFlags MakeAspect(VideoCore::SurfaceType type) {
switch (type) { switch (type) {
case VideoCore::SurfaceType::Color: case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture: case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill: case VideoCore::SurfaceType::Fill:
return vk::ImageAspectFlagBits::eColor; return vk::ImageAspectFlagBits::eColor;
case VideoCore::SurfaceType::Depth: case VideoCore::SurfaceType::Depth:
return vk::ImageAspectFlagBits::eDepth; return vk::ImageAspectFlagBits::eDepth;
case VideoCore::SurfaceType::DepthStencil: case VideoCore::SurfaceType::DepthStencil:
return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
default: default:
UNREACHABLE_MSG("Invalid surface type!"); LOG_CRITICAL(Render_Vulkan, "Invalid surface type {}", type);
UNREACHABLE();
} }
return vk::ImageAspectFlagBits::eColor; return vk::ImageAspectFlagBits::eColor;
} }
[[nodiscard]] vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) {
switch (pixel_format) {
case VideoCore::PixelFormat::D16:
case VideoCore::PixelFormat::D24:
case VideoCore::PixelFormat::D24S8:
return vk::Filter::eNearest;
default:
return vk::Filter::eLinear;
}
}
[[nodiscard]] vk::ClearValue MakeClearValue(VideoCore::ClearValue clear) {
static_assert(sizeof(VideoCore::ClearValue) == sizeof(vk::ClearValue));
vk::ClearValue value{};
std::memcpy(&value, &clear, sizeof(vk::ClearValue));
return value;
}
[[nodiscard]] vk::ClearColorValue MakeClearColorValue(VideoCore::ClearValue clear) {
return vk::ClearColorValue{
.float32 = std::array{clear.color[0], clear.color[1], clear.color[2], clear.color[3]}
};
}
[[nodiscard]] vk::ClearDepthStencilValue MakeClearDepthStencilValue(VideoCore::ClearValue clear) {
return vk::ClearDepthStencilValue{
.depth = clear.depth,
.stencil = clear.stencil
};
}
u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
u32 depth_offset = 0; u32 depth_offset = 0;
u32 stencil_offset = 4 * data.size / 5; u32 stencil_offset = 4 * data.size / 5;
@@ -133,7 +166,7 @@ void TextureRuntime::Finish() {
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) { VideoCore::TextureType type) {
const FormatTraits traits = instance.GetTraits(format); const FormatTraits traits = instance.GetTraits(format);
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format)); const vk::ImageAspectFlags aspect = MakeAspect(VideoCore::GetFormatType(format));
// Depth buffers are not supposed to support blit by the spec so don't require it. // Depth buffers are not supposed to support blit by the spec so don't require it.
const bool is_suitable = traits.transfer_support && traits.attachment_support && const bool is_suitable = traits.transfer_support && traits.attachment_support &&
@@ -271,6 +304,31 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
alloc.storage_view = device.createImageView(storage_view_info); alloc.storage_view = device.createImageView(storage_view_info);
} }
scheduler.Record([image = alloc.image,
aspect = alloc.aspect](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
const vk::ImageMemoryBarrier init_barrier = {
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eNone,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
};
upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
vk::PipelineStageFlagBits::eTopOfPipe,
vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier);
});
return alloc; return alloc;
} }
@@ -316,110 +374,305 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value) { VideoCore::ClearValue value) {
const vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
surface.Transition(vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); const bool is_color = surface.type != VideoCore::SurfaceType::Depth &&
surface.type != VideoCore::SurfaceType::DepthStencil;
vk::ClearValue clear_value{};
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_value.color = vk::ClearColorValue{
.float32 =
std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})};
} else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) {
clear_value.depthStencil =
vk::ClearDepthStencilValue{.depth = value.depth, .stencil = value.stencil};
}
if (clear.texture_rect == surface.GetScaledRect()) { if (clear.texture_rect == surface.GetScaledRect()) {
scheduler.Record([aspect, image = surface.alloc.image, clear_value, scheduler.Record([aspect = MakeAspect(surface.type),
clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { image = surface.alloc.image,
value, is_color, clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageSubresourceRange range = {.aspectMask = aspect, const vk::ImageSubresourceRange range = {.aspectMask = aspect,
.baseMipLevel = clear.texture_level, .baseMipLevel = clear.texture_level,
.levelCount = 1, .levelCount = 1,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = 1}; .layerCount = 1};
if (aspect & vk::ImageAspectFlagBits::eColor) { const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
};
const vk::ImageMemoryBarrier post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
if (is_color) {
render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal,
clear_value.color, range); MakeClearColorValue(value), range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || } else {
aspect & vk::ImageAspectFlagBits::eStencil) {
render_cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, render_cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal,
clear_value.depthStencil, range); MakeClearDepthStencilValue(value), range);
} }
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
}); });
} else { return true;
vk::RenderPass clear_renderpass; }
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_renderpass = renderpass_cache.GetRenderpass(
surface.pixel_format, VideoCore::PixelFormat::Invalid, true);
surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true);
surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
const vk::ImageView framebuffer_view = surface.GetFramebufferView(); ClearTextureWithRenderpass(surface, clear, value);
return true;
}
auto [it, new_framebuffer] = void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear,
clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{}); VideoCore::ClearValue value) {
if (new_framebuffer) { const bool is_color = surface.type != VideoCore::SurfaceType::Depth &&
const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass, surface.type != VideoCore::SurfaceType::DepthStencil;
.attachmentCount = 1,
.pAttachments = &framebuffer_view,
.width = surface.GetScaledWidth(),
.height = surface.GetScaledHeight(),
.layers = 1};
vk::Device device = instance.GetDevice(); const vk::RenderPass clear_renderpass =
it->second = device.createFramebuffer(framebuffer_info); is_color ? renderpass_cache.GetRenderpass(surface.pixel_format,
} VideoCore::PixelFormat::Invalid, true)
: renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true);
const RenderpassState clear_info = { const vk::ImageView framebuffer_view = surface.GetFramebufferView();
auto [it, new_framebuffer] =
clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{});
if (new_framebuffer) {
const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass,
.attachmentCount = 1,
.pAttachments = &framebuffer_view,
.width = surface.GetScaledWidth(),
.height = surface.GetScaledHeight(),
.layers = 1};
vk::Device device = instance.GetDevice();
it->second = device.createFramebuffer(framebuffer_info);
}
const RenderpassState clear_info = {
.renderpass = clear_renderpass, .renderpass = clear_renderpass,
.framebuffer = it->second, .framebuffer = it->second,
.render_area = vk::Rect2D{.offset = {static_cast<s32>(clear.texture_rect.left), .render_area = vk::Rect2D{.offset = {static_cast<s32>(clear.texture_rect.left),
static_cast<s32>(clear.texture_rect.bottom)}, static_cast<s32>(clear.texture_rect.bottom)},
.extent = {clear.texture_rect.GetWidth(), .extent = {clear.texture_rect.GetWidth(),
clear.texture_rect.GetHeight()}}, clear.texture_rect.GetHeight()}},
.clear = clear_value}; .clear = MakeClearValue(value)
};
renderpass_cache.EnterRenderpass(clear_info); scheduler.Record([aspect = MakeAspect(surface.type),
renderpass_cache.ExitRenderpass(); image = surface.alloc.image,
} level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
};
return true; render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
});
renderpass_cache.EnterRenderpass(clear_info);
renderpass_cache.ExitRenderpass();
scheduler.Record([aspect = MakeAspect(surface.type),
image = surface.alloc.image,
level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageMemoryBarrier post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
});
} }
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) { const VideoCore::TextureCopy& copy) {
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); scheduler.Record([src_image = source.alloc.image,
dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); dst_image = dest.alloc.image,
aspect = MakeAspect(source.type), copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageCopy image_copy = {
.srcSubresource = {.aspectMask = aspect,
.mipLevel = copy.src_level,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
.dstSubresource = {.aspectMask = aspect,
.mipLevel = copy.dst_level,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
.extent = {copy.extent.width, copy.extent.height, 1}};
scheduler.Record([src_image = source.alloc.image, src_type = source.type, const std::array pre_barriers = {
dst_image = dest.alloc.image, dst_type = dest.type, vk::ImageMemoryBarrier{
copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { .srcAccessMask = vk::AccessFlagBits::eShaderWrite |
const vk::ImageCopy image_copy = {.srcSubresource = {.aspectMask = ToVkAspect(src_type), vk::AccessFlagBits::eColorAttachmentWrite |
.mipLevel = copy.src_level, vk::AccessFlagBits::eDepthStencilAttachmentWrite |
.baseArrayLayer = 0, vk::AccessFlagBits::eTransferWrite,
.layerCount = 1}, .dstAccessMask = vk::AccessFlagBits::eTransferRead,
.srcOffset = {static_cast<s32>(copy.src_offset.x), .oldLayout = vk::ImageLayout::eGeneral,
static_cast<s32>(copy.src_offset.y), 0}, .newLayout = vk::ImageLayout::eTransferSrcOptimal,
.dstSubresource = {.aspectMask = ToVkAspect(dst_type), .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.mipLevel = copy.dst_level, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.baseArrayLayer = 0, .image = src_image,
.layerCount = 1}, .subresourceRange{
.dstOffset = {static_cast<s32>(copy.dst_offset.x), .aspectMask = aspect,
static_cast<s32>(copy.dst_offset.y), 0}, .baseMipLevel = copy.src_level,
.extent = {copy.extent.width, copy.extent.height, 1}}; .levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = copy.dst_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eNone,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = copy.src_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = copy.dst_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
};
render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::ImageLayout::eTransferDstOptimal, image_copy); vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal,
dst_image, vk::ImageLayout::eTransferDstOptimal, image_copy);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
}); });
return true; return true;
@@ -429,13 +682,10 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) { const VideoCore::TextureBlit& blit) {
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); scheduler.Record([src_image = source.alloc.image,
dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); aspect = MakeAspect(source.type),
filter = MakeFilter(source.pixel_format),
scheduler.Record([src_iamge = source.alloc.image, src_type = source.type, dst_image = dest.alloc.image, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
dst_image = dest.alloc.image, dst_type = dest.type,
format = source.pixel_format,
blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left), const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left),
static_cast<s32>(blit.src_rect.bottom), 0}, static_cast<s32>(blit.src_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.src_rect.right), vk::Offset3D{static_cast<s32>(blit.src_rect.right),
@@ -446,26 +696,111 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
vk::Offset3D{static_cast<s32>(blit.dst_rect.right), vk::Offset3D{static_cast<s32>(blit.dst_rect.right),
static_cast<s32>(blit.dst_rect.top), 1}}; static_cast<s32>(blit.dst_rect.top), 1}};
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(src_type), const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect,
.mipLevel = blit.src_level, .mipLevel = blit.src_level,
.baseArrayLayer = blit.src_layer, .baseArrayLayer = blit.src_layer,
.layerCount = 1}, .layerCount = 1},
.srcOffsets = source_offsets, .srcOffsets = source_offsets,
.dstSubresource = {.aspectMask = ToVkAspect(dst_type), .dstSubresource = {.aspectMask = aspect,
.mipLevel = blit.dst_level, .mipLevel = blit.dst_level,
.baseArrayLayer = blit.dst_layer, .baseArrayLayer = blit.dst_layer,
.layerCount = 1}, .layerCount = 1},
.dstOffsets = dest_offsets}; .dstOffsets = dest_offsets};
// Don't use linear filtering on depth attachments const std::array read_barriers = {
const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 || vk::ImageMemoryBarrier{
format == VideoCore::PixelFormat::D24 || .srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
format == VideoCore::PixelFormat::D16 .dstAccessMask = vk::AccessFlagBits::eTransferRead,
? vk::Filter::eNearest .oldLayout = vk::ImageLayout::eGeneral,
: vk::Filter::eLinear; .newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = blit.src_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead |
vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = blit.dst_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
}
};
const std::array write_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = blit.src_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = aspect,
.baseMipLevel = blit.dst_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}
}
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers);
render_cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal,
dst_image, vk::ImageLayout::eTransferDstOptimal, blit_area,
filter);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
render_cmdbuf.blitImage(src_iamge, vk::ImageLayout::eTransferSrcOptimal, dst_image,
vk::ImageLayout::eTransferDstOptimal, blit_area, filtering);
}); });
return true; return true;
@@ -522,115 +857,6 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
!traits.attachment_support); !traits.attachment_support);
} }
void TextureRuntime::Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level,
u32 level_count) {
LayoutTracker& tracker = alloc.tracker;
if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) {
return;
}
renderpass_cache.ExitRenderpass();
struct LayoutInfo {
vk::AccessFlags access;
vk::PipelineStageFlags stage;
};
// Get optimal transition settings for every image layout. Settings taken from Dolphin
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
LayoutInfo info;
switch (layout) {
case vk::ImageLayout::eUndefined:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
break;
case vk::ImageLayout::ePreinitialized:
// Image has been pre-initialized by the host, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eHostWrite;
info.stage = vk::PipelineStageFlagBits::eHost;
break;
case vk::ImageLayout::eColorAttachmentOptimal:
// Image was being used as a color attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
break;
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
// Image was being used as a depthstencil attachment, so ensure all writes have
// completed.
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
break;
case vk::ImageLayout::ePresentSrcKHR:
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
break;
case vk::ImageLayout::eShaderReadOnlyOptimal:
// Image was being used as a shader resource, make sure all reads have finished.
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
break;
case vk::ImageLayout::eTransferSrcOptimal:
// Image was being used as a copy source, ensure all reads have finished.
info.access = vk::AccessFlagBits::eTransferRead;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eTransferDstOptimal:
// Image was being used as a copy destination, ensure all writes have finished.
info.access = vk::AccessFlagBits::eTransferWrite;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eGeneral:
info.access = vk::AccessFlagBits::eInputAttachmentRead;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader;
break;
case vk::ImageLayout::eDepthStencilReadOnlyOptimal:
// Image is going to be sampled from a compute shader
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eComputeShader;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
UNREACHABLE();
}
return info;
};
LayoutInfo dest = GetLayoutInfo(new_layout);
tracker.ForEachLayoutRange(
level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) {
scheduler.Record([old_layout, new_layout, dest, start, count, image = alloc.image,
aspect = alloc.aspect, layers = alloc.layers,
GetLayoutInfo](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
LayoutInfo source = GetLayoutInfo(old_layout);
const vk::ImageMemoryBarrier barrier = {.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = image,
.subresourceRange = {.aspectMask = aspect,
.baseMipLevel = start,
.levelCount = count,
.baseArrayLayer = 0,
.layerCount = layers}};
render_cmdbuf.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
});
});
tracker.SetLayout(new_layout, level, level_count);
for (u32 i = 0; i < level_count; i++) {
ASSERT(alloc.tracker.GetLayout(level + i) == new_layout);
}
}
Surface::Surface(TextureRuntime& runtime) Surface::Surface(TextureRuntime& runtime)
: runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} {} : runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} {}
@@ -666,10 +892,6 @@ Surface::~Surface() {
} }
} }
void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) {
runtime.Transition(alloc, new_layout, level, level_count);
}
MICROPROFILE_DEFINE(Vulkan_Upload, "Vulkan", "Texture Upload", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(Vulkan_Upload, "Vulkan", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload); MICROPROFILE_SCOPE(Vulkan_Upload);
@@ -685,40 +907,92 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
if (is_scaled) { if (is_scaled) {
ScaledUpload(upload, staging); ScaledUpload(upload, staging);
} else { } else {
Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); scheduler.Record([aspect = alloc.aspect, image = alloc.image,
scheduler.Record([aspect = alloc.aspect, image = alloc.image, format = alloc.format, format = alloc.format, staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { u32 num_copies = 1;
u32 region_count = 0; std::array<vk::BufferImageCopy, 2> buffer_image_copies;
std::array<vk::BufferImageCopy, 2> copy_regions;
const VideoCore::Rect2D rect = upload.texture_rect; const VideoCore::Rect2D rect = upload.texture_rect;
vk::BufferImageCopy copy_region = { buffer_image_copies[0] = vk::BufferImageCopy{
.bufferOffset = staging.buffer_offset + upload.buffer_offset, .bufferOffset = staging.buffer_offset + upload.buffer_offset,
.bufferRowLength = rect.GetWidth(), .bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(), .bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = aspect, .imageSubresource = {.aspectMask = aspect,
.mipLevel = upload.texture_level, .mipLevel = upload.texture_level,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = 1}, .layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0}, .imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
if (aspect & vk::ImageAspectFlagBits::eColor) { if (aspect & vk::ImageAspectFlagBits::eStencil) {
copy_regions[region_count++] = copy_region; buffer_image_copies[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
} else if (aspect & vk::ImageAspectFlagBits::eDepth) { vk::BufferImageCopy& stencil_copy = buffer_image_copies[1];
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; stencil_copy = buffer_image_copies[0];
copy_regions[region_count++] = copy_region; stencil_copy.bufferOffset += UnpackDepthStencil(staging, format);
stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
if (aspect & vk::ImageAspectFlagBits::eStencil) { num_copies++;
copy_region.bufferOffset += UnpackDepthStencil(staging, format);
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
} }
render_cmdbuf.copyBufferToImage(staging.buffer, image, static constexpr vk::AccessFlags WRITE_ACCESS_FLAGS =
vk::ImageLayout::eTransferDstOptimal, region_count, vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eColorAttachmentWrite |
copy_regions.data()); vk::AccessFlagBits::eDepthStencilAttachmentWrite;
static constexpr vk::AccessFlags READ_ACCESS_FLAGS =
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentRead;
const vk::ImageMemoryBarrier read_barrier = {
.srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = upload.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const vk::ImageMemoryBarrier write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = upload.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier);
render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal,
num_copies, buffer_image_copies.data());
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
}); });
runtime.upload_buffer.Commit(staging.size); runtime.upload_buffer.Commit(staging.size);
@@ -744,23 +1018,68 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
if (is_scaled) { if (is_scaled) {
ScaledDownload(download, staging); ScaledDownload(download, staging);
} else { } else {
Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); scheduler.Record([aspect = alloc.aspect, image = alloc.image,
scheduler.Record([aspect = alloc.aspect, image = alloc.image, staging, staging, download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){
download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const VideoCore::Rect2D rect = download.texture_rect; const VideoCore::Rect2D rect = download.texture_rect;
const vk::BufferImageCopy copy_region = { const vk::BufferImageCopy buffer_image_copy = {
.bufferOffset = staging.buffer_offset + download.buffer_offset, .bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(), .bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(), .bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = aspect, .imageSubresource = {.aspectMask = aspect,
.mipLevel = download.texture_level, .mipLevel = download.texture_level,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = 1}, .layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0}, .imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
const vk::ImageMemoryBarrier read_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = download.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const vk::ImageMemoryBarrier image_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = download.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier);
render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, copy_region); staging.buffer, buffer_image_copy);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, image_write_barrier);
}); });
runtime.download_buffer.Commit(staging.size); runtime.download_buffer.Commit(staging.size);
} }

View File

@@ -114,9 +114,6 @@ public:
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source, void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
std::span<std::byte> dest); std::span<std::byte> dest);
/// Transitions the mip level range of the surface to new_layout
void Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Fills the rectangle of the texture with the clear value provided /// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value); VideoCore::ClearValue value);
@@ -140,7 +137,16 @@ public:
/// Returns true if the provided pixel format needs convertion /// Returns true if the provided pixel format needs convertion
[[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const; [[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;
/// Returns a reference to the renderpass cache
[[nodiscard]] RenderpassCache& GetRenderpassCache() {
return renderpass_cache;
}
private: private:
/// Clears a partial texture rect using a clear rectangle
void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value);
/// Returns the current Vulkan instance /// Returns the current Vulkan instance
const Instance& GetInstance() const { const Instance& GetInstance() const {
return instance; return instance;
@@ -175,9 +181,6 @@ public:
TextureRuntime& runtime); TextureRuntime& runtime);
~Surface() override; ~Surface() override;
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Uploads pixel data in staging to a rectangle region of the surface texture /// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);