renderer_vulkan: Implement renderer and rasterizer classes

* Also WIP. Vulkan crashes when allocating command buffers, need to investigate...
This commit is contained in:
emufan4568
2022-09-18 01:11:37 +03:00
committed by GPUCode
parent 7ae0d0ef27
commit ab3a228e5e
34 changed files with 4191 additions and 347 deletions

View File

@ -235,6 +235,7 @@ void DebuggerBackend::Write(const Entry& entry) {
CLS(Render) \
SUB(Render, Software) \
SUB(Render, OpenGL) \
SUB(Render, Vulkan) \
CLS(Audio) \
SUB(Audio, DSP) \
SUB(Audio, Sink) \

View File

@ -83,8 +83,12 @@ add_library(video_core STATIC
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_vulkan/pica_to_vk.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_cache.cpp
@ -110,6 +114,8 @@ add_library(video_core STATIC
shader/shader_cache.h
shader/shader_interpreter.cpp
shader/shader_interpreter.h
shader/shader_uniforms.cpp
shader/shader_uniforms.h
swrasterizer/clipper.cpp
swrasterizer/clipper.h
swrasterizer/framebuffer.cpp
@ -183,7 +189,7 @@ create_target_directory_groups(video_core)
target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include)
target_include_directories(video_core PRIVATE ../../externals/vma)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad glslang nihstro-headers Boost::serialization)
target_link_libraries(video_core PRIVATE glad glslang SPIRV nihstro-headers Boost::serialization)
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
if (ARCHITECTURE_x86_64)

View File

@ -6,6 +6,7 @@
#include <algorithm>
#include <unordered_map>
#include <optional>
#include <vector>
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h"
#include "common/logging/log.h"
@ -901,24 +902,15 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
const u32 start_offset = load_start - surface->addr;
const u32 upload_size = static_cast<u32>(upload_data.size());
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
if (!surface->is_tiled) {
ASSERT(surface->type == SurfaceType::Color);
const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size);
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer);
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer);
} else {
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
}*/
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
if (surface->is_tiled) {
std::vector<std::byte> unswizzled_data(staging.size);
UnswizzleTexture(*surface, start_offset, upload_data, unswizzled_data);
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
} else {
UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped);
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
}
const BufferTextureCopy upload = {
@ -957,24 +949,15 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
const u32 start_offset = flush_start - surface->addr;
const u32 download_size = static_cast<u32>(download_dest.size());
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
if (!surface->is_tiled) {
ASSERT(surface->type == SurfaceType::Color);
const auto download_data = staging.mapped.subspan(start_offset, download_size);
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(download_data, download_dest);
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(download_data, download_dest);
} else {
std::memcpy(download_dest.data(), download_data.data(), download_size);
}*/
std::memcpy(download_dest.data(), download_data.data(), download_size);
if (surface->is_tiled) {
std::vector<std::byte> swizzled_data(staging.size);
SwizzleTexture(*surface, start_offset, staging.mapped, swizzled_data);
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, download_dest);
} else {
SwizzleTexture(*surface, start_offset, staging.mapped, download_dest);
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);
}
}

View File

@ -14,7 +14,6 @@ struct HostTextureTag {
PixelFormat format{};
u32 width = 0;
u32 height = 0;
u32 levels = 1;
u32 layers = 1;
auto operator<=>(const HostTextureTag&) const noexcept = default;

View File

@ -9,10 +9,6 @@
#include "common/common_types.h"
#include "core/hw/gpu.h"
namespace OpenGL {
struct ScreenInfo;
}
namespace Pica::Shader {
struct OutputVertex;
} // namespace Pica::Shader
@ -73,13 +69,6 @@ public:
return false;
}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
PAddr framebuffer_addr, u32 pixel_stride,
OpenGL::ScreenInfo& screen_info) {
return false;
}
/// Attempt to draw using hardware shaders
virtual bool AccelerateDrawBatch(bool is_indexed) {
return false;

View File

@ -159,6 +159,7 @@ struct FramebufferRegs {
} stencil_test;
union {
u32 depth_color_mask;
BitField<0, 1, u32> depth_test_enable;
BitField<4, 3, CompareFunc> depth_test_func;
BitField<8, 1, u32> red_enable;

View File

@ -12,7 +12,3 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout();
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height, is_portrait_mode);
}
void RendererBase::Sync() {
rasterizer->SyncEntireState();
}

View File

@ -21,6 +21,9 @@ public:
/// Initialize the renderer
virtual VideoCore::ResultStatus Init() = 0;
/// Returns the rasterizer owned by the renderer
virtual VideoCore::RasterizerInterface* Rasterizer() = 0;
/// Shutdown the renderer
virtual void ShutDown() = 0;
@ -37,6 +40,8 @@ public:
/// Cleans up after video dumping is ended
virtual void CleanupVideoDumping() = 0;
virtual void Sync() = 0;
/// Updates the framebuffer layout of the contained render window handle.
void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {});
@ -51,10 +56,6 @@ public:
return m_current_frame;
}
VideoCore::RasterizerInterface* Rasterizer() const {
return rasterizer.get();
}
Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
@ -63,11 +64,8 @@ public:
return render_window;
}
void Sync();
protected:
Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
};

View File

@ -20,6 +20,9 @@ class EmuWindow;
}
namespace OpenGL {
struct ScreenInfo;
class Driver;
class ShaderProgramManager;
@ -43,7 +46,7 @@ public:
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers

View File

@ -41,10 +41,12 @@ struct LightSrc {
float dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
@ -81,8 +83,10 @@ static_assert(sizeof(UniformData) == 0x4F0,
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);

View File

@ -124,6 +124,17 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
return DEFAULT_TUPLE;
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
Pica::Texture::ConvertBGRToRGB(source, dest);
} else {
std::memcpy(dest.data(), source.data(), source.size());
}
}
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
@ -302,6 +313,17 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
}
Surface::~Surface() {
const VideoCore::HostTextureTag tag = {
.format = pixel_format,
.width = GetScaledWidth(),
.height = GetScaledHeight(),
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
};
runtime.texture_recycler.emplace(tag, std::move(texture));
}
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
MICROPROFILE_SCOPE(OpenGL_Upload);
@ -327,8 +349,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.GetWidth(),
upload.texture_rect.GetHeight(),
tuple.format, tuple.type,
reinterpret_cast<void*>(upload.buffer_offset));
tuple.format, tuple.type, 0);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
@ -361,7 +382,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
const auto& tuple = runtime.GetFormatTuple(pixel_format);
glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
tuple.format, tuple.type, reinterpret_cast<void*>(download.buffer_offset));
tuple.format, tuple.type, 0);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@ -390,11 +411,9 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
rect_height, rect_width, 0);
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0);
}
}
@ -409,7 +428,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
const auto& tuple = runtime.GetFormatTuple(pixel_format);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
tuple.format, tuple.type, 0);
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};

View File

@ -70,6 +70,10 @@ public:
/// Returns the OpenGL format tuple associated with the provided pixel format
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Allocates an OpenGL texture with the specified dimentions and format
OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
@ -124,7 +128,7 @@ private:
class Surface : public VideoCore::SurfaceBase<Surface> {
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
~Surface() override = default;
~Surface() override;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);

View File

@ -15,7 +15,6 @@
#include "core/settings.h"
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
@ -381,6 +380,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
return VideoCore::ResultStatus::Success;
}
VideoCore::RasterizerInterface* RendererOpenGL::Rasterizer() {
return rasterizer.get();
}
/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
@ -570,7 +573,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
// only allows rows to have a memory alignement of 4.
ASSERT(pixel_stride % 4 == 0);
if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr,
if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr,
static_cast<u32>(pixel_stride), screen_info)) {
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
@ -1190,4 +1193,8 @@ void RendererOpenGL::CleanupVideoDumping() {
mailbox->free_cv.notify_one();
}
void RendererOpenGL::Sync() {
rasterizer->SyncEntireState();
}
} // namespace OpenGL

View File

@ -55,29 +55,21 @@ struct PresentationTexture {
OGLTexture texture;
};
class RasterizerOpenGL;
class RendererOpenGL : public RendererBase {
public:
explicit RendererOpenGL(Frontend::EmuWindow& window);
~RendererOpenGL() override;
/// Initialize the renderer
VideoCore::ResultStatus Init() override;
/// Shutdown the renderer
VideoCore::RasterizerInterface* Rasterizer() override;
void ShutDown() override;
/// Finalizes rendering the guest frame
void SwapBuffers() override;
/// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this
/// context
void TryPresent(int timeout_ms) override;
/// Prepares for video dumping (e.g. create necessary buffers, etc)
void PrepareVideoDumping() override;
/// Cleans up after video dumping is ended
void CleanupVideoDumping() override;
void Sync() override;
private:
void InitOpenGLObjects();
@ -108,6 +100,7 @@ private:
private:
Driver driver;
OpenGLState state;
std::unique_ptr<RasterizerOpenGL> rasterizer;
// OpenGL object IDs
OGLVertexArray vertex_array;

View File

@ -0,0 +1,966 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/frontend/framebuffer_layout.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/video_core.h"
namespace Vulkan {
constexpr std::string_view vertex_shader = R"(
#version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
// This is a truncated 3x3 matrix for 2D transformations:
// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
// The third column performs translation.
// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
// implicitly be [0, 0, 1]
layout (push_constant) uniform DrawInfo {
mat3x2 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int screen_id_l;
int screen_id_r;
int layer;
};
void main() {
// Multiply input position by the rotscale part of the matrix and then manually translate by
// the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
// to `vec3(vert_position.xy, 1.0)`
gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
gl_Position.y = -gl_Position.y;
frag_tex_coord = vert_tex_coord;
}
)";
constexpr std::string_view fragment_shader = R"(
version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (push_constant) uniform DrawInfo {
mat3x2 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int screen_id_l;
int screen_id_r;
int layer;
};
layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
void main() {
color = texture(screen_textures[screen_id_l], frag_tex_coord);
}
)";
constexpr std::string_view fragment_shader_anaglyph = R"(
version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
// Anaglyph Red-Cyan shader based on Dubois algorithm
// Constants taken from the paper:
// "Conversion of a Stereo Pair to Anaglyph with
// the Least-Squares Projection Method"
// Eric Dubois, March 2009
const mat3 l = mat3( 0.437, 0.449, 0.164,
-0.062,-0.062,-0.024,
-0.048,-0.050,-0.017);
const mat3 r = mat3(-0.011,-0.032,-0.007,
0.377, 0.761, 0.009,
-0.026,-0.093, 1.234);
layout (push_constant) uniform DrawInfo {
mat3x2 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int screen_id_l;
int screen_id_r;
int layer;
};
layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
void main() {
vec4 color_tex_l = texture(screen_textures[screen_id_l], frag_tex_coord);
vec4 color_tex_r = texture(screen_textures[screen_id_r], frag_tex_coord);
color = vec4(color_tex_l.rgb*l+color_tex_r.rgb*r, color_tex_l.a);
}
)";
constexpr std::string_view fragment_shader_interlaced = R"(
version 450 core
#extension GL_ARB_separate_shader_objects : enable
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (push_constant) uniform DrawInfo {
mat3x2 modelview_matrix;
vec4 i_resolution;
vec4 o_resolution;
int screen_id_l;
int screen_id_r;
int layer;
int reverse_interlaced;
};
layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
void main() {
float screen_row = o_resolution.x * frag_tex_coord.x;
if (int(screen_row) % 2 == reverse_interlaced)
color = texture(screen_textures[screen_id_l], frag_tex_coord);
else
color = texture(screen_textures[screen_id_r], frag_tex_coord);
}
)";
/// Vertex structure that the drawn screen rectangles are composed of.
struct ScreenRectVertex {
ScreenRectVertex() = default;
ScreenRectVertex(float x, float y, float u, float v) :
position{Common::MakeVec(x, y)}, tex_coord{Common::MakeVec(u, v)} {}
Common::Vec2f position;
Common::Vec2f tex_coord;
};
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 64;
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
*
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*
* @param flipped Whether the frame should be flipped upside down.
*/
static std::array<float, 3 * 2> MakeOrthographicMatrix(float width, float height, bool flipped) {
std::array<float, 3 * 2> matrix; // Laid out in column-major order
// Last matrix row is implicitly assumed to be [0, 0, 1].
if (flipped) {
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = 2.f / height; matrix[5] = -1.f;
// clang-format on
} else {
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// clang-format on
}
return matrix;
}
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
: RendererBase{window}, instance{window}, scheduler{instance}, renderpass_cache{instance, scheduler},
runtime{instance, scheduler, renderpass_cache}, swapchain{instance, renderpass_cache},
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}} {
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA");
telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650");
telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.1");
window.mailbox = nullptr;
}
RendererVulkan::~RendererVulkan() {
vk::Device device = instance.GetDevice();
device.destroyPipelineLayout(present_pipeline_layout);
device.destroyDescriptorSetLayout(present_descriptor_layout);
device.destroyDescriptorUpdateTemplate(present_update_template);
device.destroyShaderModule(present_vertex_shader);
for (u32 i = 0; i < PRESENT_PIPELINES; i++) {
device.destroyPipeline(present_pipelines[i]);
device.destroyShaderModule(present_shaders[i]);
}
for (std::size_t i = 0; i < present_samplers.size(); i++) {
device.destroySampler(present_samplers[i]);
}
}
VideoCore::ResultStatus RendererVulkan::Init() {
CompileShaders();
BuildLayouts();
BuildPipelines();
// Create the rasterizer
rasterizer = std::make_unique<RasterizerVulkan>(render_window, instance, scheduler,
runtime, renderpass_cache);
return VideoCore::ResultStatus::Success;
}
VideoCore::RasterizerInterface* RendererVulkan::Rasterizer() {
return rasterizer.get();
}
void RendererVulkan::ShutDown() {}
void RendererVulkan::Sync() {
rasterizer->SyncEntireState();
}
void RendererVulkan::PrepareRendertarget() {
for (int i = 0; i < 3; i++) {
int fb_id = i == 2 ? 1 : 0;
const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id];
// Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
u32 lcd_color_addr =
(fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr;
LCD::Regs::ColorFill color_fill = {0};
LCD::Read(color_fill.raw, lcd_color_addr);
if (color_fill.is_enabled) {
LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b,
screen_infos[i].texture);
} else {
if (screen_infos[i].texture.width != framebuffer.width ||
screen_infos[i].texture.height != framebuffer.height ||
screen_infos[i].texture.format != framebuffer.color_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
}
LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1);
// Resize the texture in case the framebuffer size has changed
screen_infos[i].texture.width = framebuffer.width;
screen_infos[i].texture.height = framebuffer.height;
}
}
}
void RendererVulkan::BeginRendering() {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[current_pipeline]);
for (std::size_t i = 0; i < screen_infos.size(); i++) {
runtime.Transition(command_buffer, screen_infos[i].display_texture,
vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
}
const std::array present_textures = {
vk::DescriptorImageInfo{
.sampler = present_samplers[current_sampler],
.imageView = screen_infos[0].display_texture.image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
},
vk::DescriptorImageInfo{
.sampler = present_samplers[current_sampler],
.imageView = screen_infos[1].display_texture.image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
},
vk::DescriptorImageInfo{
.sampler = present_samplers[current_sampler],
.imageView = screen_infos[2].display_texture.image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
},
};
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &present_descriptor_layout
};
vk::Device device = instance.GetDevice();
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures.data());
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout,
0, 1, &set, 0, nullptr);
}
void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
ScreenInfo& screen_info, bool right_eye) {
if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0)
right_eye = false;
const PAddr framebuffer_addr =
framebuffer.active_fb == 0
? (!right_eye ? framebuffer.address_left1 : framebuffer.address_right1)
: (!right_eye ? framebuffer.address_left2 : framebuffer.address_right2);
LOG_TRACE(Render_Vulkan, "0x{:08x} bytes from 0x{:08x}({}x{}), fmt {:x}",
framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(),
framebuffer.height.Value(), framebuffer.format);
int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
std::size_t pixel_stride = framebuffer.stride / bpp;
// OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately
ASSERT(pixel_stride * bpp == framebuffer.stride);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
// only allows rows to have a memory alignement of 4.
ASSERT(pixel_stride % 4 == 0);
if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) {
ASSERT(false);
// Reset the screen info's display texture to its own permanent texture
/*screen_info.display_texture = &screen_info.texture;
screen_info.display_texcoords = Common::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
vk::Rect2D region{{0, 0}, {framebuffer.width, framebuffer.height}};
std::span<u8> framebuffer_data(VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr),
screen_info.texture.GetSize());
screen_info.texture.Upload(0, 1, pixel_stride, region, framebuffer_data);*/
}
}
void RendererVulkan::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) {
const auto color = std::array<float, 4>{color_r / 255.0f, color_g / 255.0f, color_b / 255.0f, 1};
const vk::ClearColorValue clear_color = {
.float32 = color
};
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.clearColorImage(texture.alloc.image, vk::ImageLayout::eShaderReadOnlyOptimal,
clear_color, range);
}
void RendererVulkan::CompileShaders() {
vk::Device device = instance.GetDevice();
present_vertex_shader = Compile(vertex_shader, vk::ShaderStageFlagBits::eVertex,
device, ShaderOptimization::Debug);
present_shaders[0] = Compile(fragment_shader, vk::ShaderStageFlagBits::eFragment,
device, ShaderOptimization::Debug);
present_shaders[1] = Compile(fragment_shader_anaglyph, vk::ShaderStageFlagBits::eFragment,
device, ShaderOptimization::Debug);
present_shaders[2] = Compile(fragment_shader_interlaced, vk::ShaderStageFlagBits::eFragment,
device, ShaderOptimization::Debug);
auto properties = instance.GetPhysicalDevice().getProperties();
for (std::size_t i = 0; i < present_samplers.size(); i++) {
const vk::Filter filter_mode = i == 0 ? vk::Filter::eLinear : vk::Filter::eNearest;
const vk::SamplerCreateInfo sampler_info = {
.magFilter = filter_mode,
.minFilter = filter_mode,
.mipmapMode = vk::SamplerMipmapMode::eLinear,
.addressModeU = vk::SamplerAddressMode::eClampToEdge,
.addressModeV = vk::SamplerAddressMode::eClampToEdge,
.anisotropyEnable = true,
.maxAnisotropy = properties.limits.maxSamplerAnisotropy,
.compareEnable = false,
.compareOp = vk::CompareOp::eAlways,
.borderColor = vk::BorderColor::eIntOpaqueBlack,
.unnormalizedCoordinates = false
};
present_samplers[i] = device.createSampler(sampler_info);
}
}
void RendererVulkan::BuildLayouts() {
const vk::DescriptorSetLayoutBinding present_layout_binding = {
.binding = 0,
.descriptorType = vk::DescriptorType::eCombinedImageSampler,
.descriptorCount = 3,
.stageFlags = vk::ShaderStageFlagBits::eFragment
};
const vk::DescriptorSetLayoutCreateInfo present_layout_info = {
.bindingCount = 1,
.pBindings = &present_layout_binding
};
const vk::DescriptorUpdateTemplateEntry update_template_entry = {
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 3,
.descriptorType = vk::DescriptorType::eCombinedImageSampler,
.offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)
};
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = 1,
.pDescriptorUpdateEntries = &update_template_entry,
.descriptorSetLayout = present_descriptor_layout
};
vk::Device device = instance.GetDevice();
present_descriptor_layout = device.createDescriptorSetLayout(present_layout_info);
present_update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.offset = 0,
.size = sizeof(PresentUniformData),
};
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1,
.pSetLayouts = &present_descriptor_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range
};
present_pipeline_layout = device.createPipelineLayout(layout_info);
}
void RendererVulkan::BuildPipelines() {
const vk::VertexInputBindingDescription binding = {
.binding = 0,
.stride = sizeof(ScreenRectVertex),
.inputRate = vk::VertexInputRate::eVertex
};
const std::array attributes = {
vk::VertexInputAttributeDescription{
.location = 0,
.binding = 0,
.format = vk::Format::eR32G32Sfloat,
.offset = offsetof(ScreenRectVertex, position)
},
vk::VertexInputAttributeDescription{
.location = 1,
.binding = 0,
.format = vk::Format::eR32G32Sfloat,
.offset = offsetof(ScreenRectVertex, tex_coord)
}
};
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &binding,
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = attributes.data()
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = vk::PrimitiveTopology::eTriangleStrip,
.primitiveRestartEnable = false
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.cullMode = vk::CullModeFlagBits::eNone,
.frontFace = vk::FrontFace::eClockwise,
.depthBiasEnable = false,
.lineWidth = 1.0f
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false
};
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
.blendEnable = false,
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = &colorblend_attachment,
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
};
const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.viewportCount = 1,
.pViewports = &placeholder_viewport,
.scissorCount = 1,
.pScissors = &placeholder_scissor,
};
const std::array dynamic_states = {
vk::DynamicState::eViewport,
vk::DynamicState::eScissor
};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data()
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = false,
.depthWriteEnable = false,
.depthCompareOp = vk::CompareOp::eAlways,
.depthBoundsTestEnable = false,
.stencilTestEnable = false
};
for (u32 i = 0; i < PRESENT_PIPELINES; i++) {
const std::array shader_stages = {
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
.module = present_vertex_shader,
.pName = "main"
},
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
.module = present_shaders[i],
.pName = "main"
},
};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = present_pipeline_layout,
.renderPass = renderpass_cache.GetPresentRenderpass()
};
vk::Device device = instance.GetDevice();
if (const auto result = device.createGraphicsPipeline({}, pipeline_info);
result.result == vk::Result::eSuccess) {
present_pipelines[i] = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Unable to build present pipelines");
UNREACHABLE();
}
}
}
void RendererVulkan::ReloadSampler() {
current_sampler = !Settings::values.filter_mode;
}
void RendererVulkan::ReloadPipeline() {
switch (Settings::values.render_3d) {
case Settings::StereoRenderOption::Anaglyph:
current_pipeline = 1;
break;
case Settings::StereoRenderOption::Interlaced:
case Settings::StereoRenderOption::ReverseInterlaced:
current_pipeline = 2;
draw_info.reverse_interlaced =
Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced;
break;
default:
current_pipeline = 0;
break;
}
}
void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer) {
texture.format = framebuffer.color_format;
texture.width = framebuffer.width;
texture.height = framebuffer.height;
texture.alloc = runtime.Allocate(framebuffer.width, framebuffer.height,
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
VideoCore::TextureType::Texture2D);
}
void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h) {
auto& screen_info = screen_infos[screen_id];
const auto& texcoords = screen_info.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
const std::array vertices = {
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
ScreenRectVertex{x, y + h, texcoords.top, texcoords.left},
ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right},
};
std::memcpy(ptr, vertices.data(), size);
vertex_buffer.Commit(size);
// As this is the "DrawSingleScreenRotated" function, the output resolution dimensions have been
// swapped. If a non-rotated draw-screen function were to be added for book-mode games, those
// should probably be set to the standard (w, h, 1.0 / w, 1.0 / h) ordering.
const u16 scale_factor = VideoCore::GetResolutionScaleFactor();
const float width = static_cast<float>(screen_info.texture.width);
const float height = static_cast<float>(screen_info.texture.height);
draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor,
1.0f / (width * scale_factor),
1.0f / (height * scale_factor)};
draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w};
draw_info.screen_id_l = screen_id;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
}
void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) {
auto& screen_info = screen_infos[screen_id];
const auto& texcoords = screen_info.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
const std::array vertices = {
ScreenRectVertex{x, y, texcoords.bottom, texcoords.right},
ScreenRectVertex{x + w, y, texcoords.top, texcoords.right},
ScreenRectVertex{x, y + h, texcoords.bottom, texcoords.left},
ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.left},
};
std::memcpy(ptr, vertices.data(), size);
vertex_buffer.Commit(size);
const u16 scale_factor = VideoCore::GetResolutionScaleFactor();
const float width = static_cast<float>(screen_info.texture.width);
const float height = static_cast<float>(screen_info.texture.height);
draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor,
1.0f / (width * scale_factor),
1.0f / (height * scale_factor)};
draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w};
draw_info.screen_id_l = screen_id;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
}
void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r,
float x, float y, float w, float h) {
const ScreenInfo& screen_info_l = screen_infos[screen_id_l];
const auto& texcoords = screen_info_l.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
const std::array vertices = {
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
ScreenRectVertex{x, y + h, texcoords.top, texcoords.left},
ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right}
};
std::memcpy(ptr, vertices.data(), size);
vertex_buffer.Commit(size);
const u16 scale_factor = VideoCore::GetResolutionScaleFactor();
const float width = static_cast<float>(screen_info_l.texture.width);
const float height = static_cast<float>(screen_info_l.texture.height);
draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor,
1.0f / (width * scale_factor),
1.0f / (height * scale_factor)};
draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w};
draw_info.screen_id_l = screen_id_l;
draw_info.screen_id_r = screen_id_r;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
}
void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r,
float x, float y, float w, float h) {
const ScreenInfo& screen_info_l = screen_infos[screen_id_l];
const auto& texcoords = screen_info_l.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
const std::array<ScreenRectVertex, 4> vertices = {{
ScreenRectVertex(x, y, texcoords.bottom, texcoords.right),
ScreenRectVertex(x + w, y, texcoords.top, texcoords.right),
ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left),
ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left),
}};
std::memcpy(ptr, vertices.data(), size);
vertex_buffer.Commit(size);
const u16 scale_factor = VideoCore::GetResolutionScaleFactor();
const float width = static_cast<float>(screen_info_l.texture.width);
const float height = static_cast<float>(screen_info_l.texture.height);
draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor,
1.0f / (width * scale_factor),
1.0f / (height * scale_factor)};
draw_info.o_resolution = Common::Vec4f{w, h, 1.0f / w, 1.0f / h};
draw_info.screen_id_l = screen_id_l;
draw_info.screen_id_r = screen_id_r;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
}
void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool flipped) {
if (VideoCore::g_renderer_bg_color_update_requested.exchange(false)) {
// Update background color before drawing
clear_color.float32[0] = Settings::values.bg_red;
clear_color.float32[1] = Settings::values.bg_green;
clear_color.float32[2] = Settings::values.bg_blue;
}
if (VideoCore::g_renderer_sampler_update_requested.exchange(false)) {
// Set the new filtering mode for the sampler
ReloadSampler();
}
if (VideoCore::g_renderer_shader_update_requested.exchange(false)) {
ReloadPipeline();
}
const auto& top_screen = layout.top_screen;
const auto& bottom_screen = layout.bottom_screen;
// Set projection matrix
draw_info.modelview =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height), flipped);
const bool stereo_single_screen =
Settings::values.render_3d == Settings::StereoRenderOption::Anaglyph ||
Settings::values.render_3d == Settings::StereoRenderOption::Interlaced ||
Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced;
// Bind necessary state before drawing the screens
BeginRendering();
draw_info.layer = 0;
if (layout.top_screen_enabled) {
if (layout.is_rotated) {
if (Settings::values.render_3d == Settings::StereoRenderOption::Off) {
DrawSingleScreenRotated(0, top_screen.left,
top_screen.top, top_screen.GetWidth(),
top_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) {
DrawSingleScreenRotated(0, (float)top_screen.left / 2,
(float)top_screen.top, (float)top_screen.GetWidth() / 2,
(float)top_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreenRotated(1,
((float)top_screen.left / 2) + ((float)layout.width / 2),
(float)top_screen.top, (float)top_screen.GetWidth() / 2,
(float)top_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) {
DrawSingleScreenRotated(0, layout.top_screen.left,
layout.top_screen.top, layout.top_screen.GetWidth(),
layout.top_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreenRotated(1,
layout.cardboard.top_screen_right_eye +
((float)layout.width / 2),
layout.top_screen.top, layout.top_screen.GetWidth(),
layout.top_screen.GetHeight());
} else if (stereo_single_screen) {
DrawSingleScreenStereoRotated(0, 1, (float)top_screen.left, (float)top_screen.top,
(float)top_screen.GetWidth(), (float)top_screen.GetHeight());
}
} else {
if (Settings::values.render_3d == Settings::StereoRenderOption::Off) {
DrawSingleScreen(0, (float)top_screen.left, (float)top_screen.top,
(float)top_screen.GetWidth(), (float)top_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) {
DrawSingleScreen(0, (float)top_screen.left / 2, (float)top_screen.top,
(float)top_screen.GetWidth() / 2, (float)top_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreen(1,
((float)top_screen.left / 2) + ((float)layout.width / 2),
(float)top_screen.top, (float)top_screen.GetWidth() / 2,
(float)top_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) {
DrawSingleScreen(0, layout.top_screen.left, layout.top_screen.top,
layout.top_screen.GetWidth(), layout.top_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreen(1,
layout.cardboard.top_screen_right_eye + ((float)layout.width / 2),
layout.top_screen.top, layout.top_screen.GetWidth(),
layout.top_screen.GetHeight());
} else if (stereo_single_screen) {
DrawSingleScreenStereo(0, 1, (float)top_screen.left,
(float)top_screen.top, (float)top_screen.GetWidth(),
(float)top_screen.GetHeight());
}
}
}
draw_info.layer = 0;
if (layout.bottom_screen_enabled) {
if (layout.is_rotated) {
if (Settings::values.render_3d == Settings::StereoRenderOption::Off) {
DrawSingleScreenRotated(2, (float)bottom_screen.left,
(float)bottom_screen.top, (float)bottom_screen.GetWidth(),
(float)bottom_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) {
DrawSingleScreenRotated(
2, (float)bottom_screen.left / 2, (float)bottom_screen.top,
(float)bottom_screen.GetWidth() / 2, (float)bottom_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreenRotated(
2, ((float)bottom_screen.left / 2) + ((float)layout.width / 2),
(float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2,
(float)bottom_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) {
DrawSingleScreenRotated(2, layout.bottom_screen.left,
layout.bottom_screen.top, layout.bottom_screen.GetWidth(),
layout.bottom_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreenRotated(2,
layout.cardboard.bottom_screen_right_eye +
((float)layout.width / 2),
layout.bottom_screen.top, layout.bottom_screen.GetWidth(),
layout.bottom_screen.GetHeight());
} else if (stereo_single_screen) {
DrawSingleScreenStereoRotated(2, 2, (float)bottom_screen.left, (float)bottom_screen.top,
(float)bottom_screen.GetWidth(),
(float)bottom_screen.GetHeight());
}
} else {
if (Settings::values.render_3d == Settings::StereoRenderOption::Off) {
DrawSingleScreen(2, (float)bottom_screen.left,
(float)bottom_screen.top, (float)bottom_screen.GetWidth(),
(float)bottom_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) {
DrawSingleScreen(2, (float)bottom_screen.left / 2,
(float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2,
(float)bottom_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreen(2,
((float)bottom_screen.left / 2) + ((float)layout.width / 2),
(float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2,
(float)bottom_screen.GetHeight());
} else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) {
DrawSingleScreen(2, layout.bottom_screen.left,
layout.bottom_screen.top, layout.bottom_screen.GetWidth(),
layout.bottom_screen.GetHeight());
draw_info.layer = 1;
DrawSingleScreen(2,
layout.cardboard.bottom_screen_right_eye +
((float)layout.width / 2),
layout.bottom_screen.top, layout.bottom_screen.GetWidth(),
layout.bottom_screen.GetHeight());
} else if (stereo_single_screen) {
DrawSingleScreenStereo(2, 2, (float)bottom_screen.left,
(float)bottom_screen.top, (float)bottom_screen.GetWidth(),
(float)bottom_screen.GetHeight());
}
}
}
}
void RendererVulkan::SwapBuffers() {
const auto& layout = render_window.GetFramebufferLayout();
PrepareRendertarget();
// Create swapchain if needed
if (swapchain.NeedsRecreation()) {
swapchain.Create(layout.width, layout.height, false);
}
const vk::Viewport viewport = {
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(layout.width),
.height = static_cast<float>(layout.height),
.minDepth = 0.0f,
.maxDepth = 1.0f
};
const vk::Rect2D scissor = {
.offset = {0, 0},
.extent = {layout.width, layout.height}
};
const vk::ClearValue clear_value = {
.color = clear_color
};
const vk::RenderPassBeginInfo begin_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.framebuffer = swapchain.GetFramebuffer(),
.clearValueCount = 1,
.pClearValues = &clear_value,
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setViewport(0, viewport);
command_buffer.setScissor(0, scissor);
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
DrawScreens(layout, false);
// Flush all buffers to make the data visible to the GPU before submitting
vertex_buffer.Flush();
rasterizer->FlushBuffers();
command_buffer.endRenderPass();
scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore());
// Inform texture runtime about the switch
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
}
} // namespace Vulkan

View File

@ -0,0 +1,128 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
/// Structure used for storing information about the textures for each 3DS screen
struct TextureInfo {
ImageAlloc alloc;
u32 width;
u32 height;
GPU::Regs::PixelFormat format;
};
/// Structure used for storing information about the display target for each 3DS screen
struct ScreenInfo {
ImageAlloc display_texture;
Common::Rectangle<float> display_texcoords;
TextureInfo texture;
vk::Sampler sampler;
};
// Uniform data used for presenting the 3DS screens
struct PresentUniformData {
std::array<float, 3 * 2> modelview;
Common::Vec4f i_resolution;
Common::Vec4f o_resolution;
int screen_id_l = 0;
int screen_id_r = 0;
int layer = 0;
int reverse_interlaced = 0;
// Returns an immutable byte view of the uniform data
auto AsBytes() const {
return std::as_bytes(std::span{this, 1});
}
};
static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
constexpr u32 PRESENT_PIPELINES = 3;
class RasterizerVulkan;
class RendererVulkan : public RendererBase {
public:
RendererVulkan(Frontend::EmuWindow& window);
~RendererVulkan() override;
VideoCore::ResultStatus Init() override;
VideoCore::RasterizerInterface* Rasterizer() override;
void ShutDown() override;
void SwapBuffers() override;
void TryPresent(int timeout_ms) override {}
void PrepareVideoDumping() override {}
void CleanupVideoDumping() override {}
void Sync() override;
private:
void ReloadSampler();
void ReloadPipeline();
void CompileShaders();
void BuildLayouts();
void BuildPipelines();
void ConfigureRenderPipeline();
void PrepareRendertarget();
void BeginRendering();
void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer);
void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped);
void DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h);
void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h);
void DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
void UpdateFramerate();
/// Loads framebuffer from emulated memory into the display information structure
void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
ScreenInfo& screen_info, bool right_eye);
/// Fills active OpenGL texture with the given RGB color.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
private:
Instance instance;
TaskScheduler scheduler;
RenderpassCache renderpass_cache;
TextureRuntime runtime;
Swapchain swapchain;
std::unique_ptr<RasterizerVulkan> rasterizer;
StreamBuffer vertex_buffer;
// Present pipelines (Normal, Anaglyph, Interlaced)
vk::PipelineLayout present_pipeline_layout;
vk::DescriptorSetLayout present_descriptor_layout;
vk::DescriptorUpdateTemplate present_update_template;
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
std::array<vk::Sampler, 2> present_samplers;
vk::ShaderModule present_vertex_shader;
u32 current_pipeline = 0;
u32 current_sampler = 0;
/// Display information for top and bottom screens respectively
std::array<ScreenInfo, 3> screen_infos;
PresentUniformData draw_info{};
vk::ClearColorValue clear_color{};
};
} // namespace Vulkan

View File

@ -139,6 +139,8 @@ PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler,
descriptor_dirty.fill(true);
LoadDiskCache();
trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
}
PipelineCache::~PipelineCache() {
@ -152,6 +154,18 @@ PipelineCache::~PipelineCache() {
device.destroyDescriptorUpdateTemplate(update_templates[i]);
}
for (auto& [key, module] : programmable_vertex_shaders.shader_cache) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fixed_geometry_shaders.shaders) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fragment_shaders.shaders) {
device.destroyShaderModule(module);
}
for (const auto& [hash, pipeline] : graphics_pipelines) {
device.destroyPipeline(pipeline);
}
@ -224,7 +238,7 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
shader_hashes[ProgramType::FS] = config.Hash();
}
void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_view) {
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.imageView = image_view,
@ -232,10 +246,21 @@ void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_vie
}
};
SetBinding(set, descriptor, data);
SetBinding(1, binding, data);
}
void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 offset, u32 size) {
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.imageView = image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
}
};
SetBinding(3, binding, data);
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
const DescriptorData data = {
.buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer,
@ -244,25 +269,25 @@ void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 o
}
};
SetBinding(set, descriptor, data);
SetBinding(0, binding, data);
}
void PipelineCache::BindTexelBuffer(u32 set, u32 descriptor, vk::BufferView buffer_view) {
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
const DescriptorData data = {
.buffer_view = buffer_view
};
SetBinding(set, descriptor, data);
SetBinding(0, binding, data);
}
void PipelineCache::BindSampler(u32 set, u32 descriptor, vk::Sampler sampler) {
void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.sampler = sampler
}
};
SetBinding(set, descriptor, data);
SetBinding(2, binding, data);
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
@ -454,13 +479,25 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
};
const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}};
const vk::Viewport viewport = {
.x = 0.0f,
.y = 0.0f,
.width = 1.0f,
.height = 1.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f
};
const vk::Rect2D scissor = {
.offset = {0, 0},
.extent = {1, 1}
};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.viewportCount = 1,
.pViewports = &placeholder_viewport,
.pViewports = &viewport,
.scissorCount = 1,
.pScissors = &placeholder_scissor,
.pScissors = &scissor,
};
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
@ -483,7 +520,8 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = extended_dynamic_states ? 14u : 6u,
.dynamicStateCount =
extended_dynamic_states ? static_cast<u32>(dynamic_states.size()) : 6u,
.pDynamicStates = dynamic_states.data()
};

View File

@ -106,6 +106,16 @@ struct PipelineInfo {
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
RasterizationState rasterization{};
DepthStencilState depth_stencil{};
bool IsDepthWriteEnabled() const {
const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8;
const bool depth_write =
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
const bool stencil_write =
has_stencil && depth_stencil.stencil_test_enable && depth_stencil.stencil_write_mask != 0;
return depth_write || stencil_write;
}
};
union DescriptorData {
@ -164,17 +174,20 @@ public:
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::Regs& regs);
/// Binds a texture to the specified descriptor
void BindTexture(u32 set, u32 binding, vk::ImageView view);
/// Binds a texture to the specified binding
void BindTexture(u32 binding, vk::ImageView image_view);
/// Binds a buffer to the specified descriptor
void BindBuffer(u32 set, u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a storage image to the specified binding
void BindStorageImage(u32 binding, vk::ImageView image_view);
/// Binds a buffer to the specified descriptor
void BindTexelBuffer(u32 set, u32 binding, vk::BufferView buffer_view);
/// Binds a buffer to the specified binding
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a sampler to the specified descriptor
void BindSampler(u32 set, u32 binding, vk::Sampler sampler);
/// Binds a buffer to the specified binding
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
/// Binds a sampler to the specified binding
void BindSampler(u32 binding, vk::Sampler sampler);
/// Sets the viewport rectangle to the provided values
void SetViewport(float x, float y, float width, float height);
@ -185,6 +198,10 @@ public:
/// Marks all descriptor sets as dirty
void MarkDescriptorSetsDirty();
vk::ImageView GetTexture(u32 set, u32 binding) const {
return update_data[set][binding].image_info.imageView;
}
private:
/// Binds a resource to the provided binding
void SetBinding(u32 set, u32 binding, DescriptorData data);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,314 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_uniforms.h"
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
struct ScreenInfo;
class Instance;
class TaskScheduler;
class RenderpassCache;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
// TODO(wwylele): remove this once mipmap for cube is implemented
bool supress_mipmap_for_cube = false;
auto operator<=>(const SamplerInfo&) const noexcept = default;
};
struct FramebufferInfo {
vk::ImageView color;
vk::ImageView depth;
vk::RenderPass renderpass;
u32 width = 1;
u32 height = 1;
auto operator<=>(const FramebufferInfo&) const noexcept = default;
};
}
namespace std {
template <>
struct hash<Vulkan::SamplerInfo> {
std::size_t operator()(const Vulkan::SamplerInfo& info) const noexcept {
return Common::ComputeHash64(&info, sizeof(Vulkan::SamplerInfo));
}
};
template <>
struct hash<Vulkan::FramebufferInfo> {
std::size_t operator()(const Vulkan::FramebufferInfo& info) const noexcept {
return Common::ComputeHash64(&info, sizeof(Vulkan::FramebufferInfo));
}
};
} // namespace std
namespace Vulkan {
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
friend class RendererVulkan;
public:
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler,
TextureRuntime& runtime, RenderpassCache& renderpass_cache);
~RasterizerVulkan() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers
void SyncEntireState() override;
/// Flushes all rasterizer owned buffers
void FlushBuffers();
private:
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index,
const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
bool Draw(bool accelerate, bool is_indexed);
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
/// Setup vertex shader for AccelerateDrawBatch
bool SetupVertexShader();
/// Setup geometry shader for AccelerateDrawBatch
bool SetupGeometryShader();
/// Creates a new sampler object
vk::Sampler CreateSampler(const SamplerInfo& info);
/// Creates a new Vulkan framebuffer object
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
private:
const Instance& instance;
TaskScheduler& scheduler;
TextureRuntime& runtime;
RenderpassCache& renderpass_cache;
RasterizerCache res_cache;
PipelineCache pipeline_cache;
bool shader_dirty = true;
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
constexpr static VertexLayout GetVertexLayout();
Common::Vec4f position;
Common::Vec4f color;
Common::Vec2f tex_coord0;
Common::Vec2f tex_coord1;
Common::Vec2f tex_coord2;
float tex_coord0_w;
Common::Vec4f normquat;
Common::Vec3f view;
};
std::vector<HardwareVertex> vertex_batch;
ImageAlloc default_texture;
vk::Sampler default_sampler;
struct {
Pica::Shader::UniformData data{};
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
bool lighting_lut_dirty_any = true;
bool fog_lut_dirty = true;
bool proctex_noise_lut_dirty = true;
bool proctex_color_map_dirty = true;
bool proctex_alpha_map_dirty = true;
bool proctex_lut_dirty = true;
bool proctex_diff_lut_dirty = true;
bool dirty = true;
} uniform_block_data = {};
std::array<bool, 16> hw_enabled_attributes{};
std::array<SamplerInfo, 3> texture_samplers;
SamplerInfo texture_cube_sampler;
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
StreamBuffer vertex_buffer;
StreamBuffer uniform_buffer;
StreamBuffer index_buffer;
StreamBuffer texture_buffer;
StreamBuffer texture_lf_buffer;
PipelineInfo pipeline_info;
std::size_t uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
lighting_lut_data{};
std::array<Common::Vec2f, 128> fog_lut_data{};
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
std::array<Common::Vec2f, 128> proctex_color_map_data{};
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
std::array<Common::Vec4f, 256> proctex_lut_data{};
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
};
} // namespace Vulkan

View File

@ -6,6 +6,7 @@
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan {
@ -30,7 +31,8 @@ vk::Format ToVkFormatDepth(u32 index) {
}
}
RenderpassCache::RenderpassCache(const Instance& instance) : instance{instance} {
RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
// Pre-create all needed renderpasses by the renderer
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
@ -75,6 +77,26 @@ RenderpassCache::~RenderpassCache() {
device.destroyRenderPass(present_renderpass);
}
void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) {
if (renderpass_active) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
renderpass_active = true;
}
void RenderpassCache::ExitRenderpass() {
if (!renderpass_active) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.endRenderPass();
renderpass_active = false;
}
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
if (!present_renderpass) {
present_renderpass = CreateRenderPass(format, vk::Format::eUndefined,

View File

@ -10,18 +10,21 @@
namespace Vulkan {
class Instance;
class Swapchain;
class TaskScheduler;
constexpr u32 MAX_COLOR_FORMATS = 5;
constexpr u32 MAX_DEPTH_FORMATS = 3;
class RenderpassCache {
public:
RenderpassCache(const Instance& instance);
RenderpassCache(const Instance& instance, TaskScheduler& scheduler);
~RenderpassCache();
/// Creates the renderpass used when rendering to the swapchain
void CreatePresentRenderpass(vk::Format format);
/// Begins a new renderpass only when no other renderpass is currently active
void EnterRenderpass(const vk::RenderPassBeginInfo begin_info);
/// Exits from any currently active renderpass instance
void ExitRenderpass();
/// Returns the renderpass associated with the color-depth format pair
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
@ -32,6 +35,9 @@ public:
return present_renderpass;
}
/// Creates the renderpass used when rendering to the swapchain
void CreatePresentRenderpass(vk::Format format);
private:
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
@ -39,8 +45,11 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
bool renderpass_active = false;
vk::RenderPass present_renderpass{};
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2];
};
} // namespace VideoCore::Vulkan
} // namespace Vulkan

View File

@ -1209,7 +1209,8 @@ float ProcTexNoiseCoef(vec2 x) {
std::string GenerateFragmentShader(const PicaFSConfig& config) {
const auto& state = config.state;
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n";
std::string out = "#version 450 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
out += GetVertexInterfaceDeclaration(false);
out += R"(
@ -1538,7 +1539,8 @@ do {
}
std::string GenerateTrivialVertexShader() {
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n";
std::string out = "#version 450 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
out +=
fmt::format("layout(location = {}) in vec4 vert_position;\n"
"layout(location = {}) in vec4 vert_color;\n"
@ -1723,7 +1725,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
};
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) {
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
std::string out = "#version 450 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
out += R"(
layout(triangles) in;

View File

@ -68,13 +68,14 @@ StagingBuffer::~StagingBuffer() {
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
}
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info)
: instance{instance}, scheduler{scheduler}, info{info},
staging{instance, info.size, vk::BufferUsageFlagBits::eTransferSrc} {
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
: instance{instance}, scheduler{scheduler}, staging{instance, size, vk::BufferUsageFlagBits::eTransferSrc},
usage{usage}, total_size{size} {
const vk::BufferCreateInfo buffer_info = {
.size = info.size,
.usage = info.usage | vk::BufferUsageFlagBits::eTransferDst
.size = total_size,
.usage = usage | vk::BufferUsageFlagBits::eTransferDst
};
const VmaAllocationCreateInfo alloc_create_info = {
@ -91,37 +92,36 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, c
buffer = vk::Buffer{unsafe_buffer};
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < info.views.size(); i++) {
if (info.views[i] == vk::Format::eUndefined) {
view_count = i;
break;
}
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
vk::Device device = instance.GetDevice();
for (std::size_t i = 0; i < view_formats.size(); i++) {
const vk::BufferViewCreateInfo view_info = {
.buffer = buffer,
.format = info.views[i],
.range = info.size
.format = view_formats[i],
.offset = 0,
.range = total_size
};
views[i] = device.createBufferView(view_info);
}
available_size = info.size;
available_size = total_size;
view_count = view_formats.size();
}
StreamBuffer::~StreamBuffer() {
if (buffer) {
vk::Device device = instance.GetDevice();
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
for (u32 i = 0; i < view_count; i++) {
for (std::size_t i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
}
}
}
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
ASSERT(size <= info.size && alignment <= info.size);
ASSERT(size <= total_size && alignment <= total_size);
if (alignment > 0) {
buffer_offset = Common::AlignUp(buffer_offset, alignment);
@ -134,7 +134,7 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
Flush();
// If we are at the end of the buffer, start over
if (buffer_offset + size > info.size) {
if (buffer_offset + size > total_size) {
Invalidate();
invalidate = true;
}
@ -145,7 +145,7 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
LOG_WARNING(Render_Vulkan, "Buffer GPU stall");
Invalidate();
regions.clear();
available_size = info.size;
available_size = total_size;
}
}
@ -156,7 +156,7 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
void StreamBuffer::Commit(u32 size) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
auto [access_mask, stage_mask] = ToVkAccessStageFlags(info.usage);
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,

View File

@ -15,12 +15,6 @@ class TaskScheduler;
constexpr u32 MAX_BUFFER_VIEWS = 3;
struct BufferInfo {
u32 size = 0;
vk::BufferUsageFlagBits usage{};
std::array<vk::Format, MAX_BUFFER_VIEWS> views{};
};
struct LockedRegion {
u32 size = 0;
u64 fence_counter = 0;
@ -38,7 +32,8 @@ struct StagingBuffer {
class StreamBuffer {
public:
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info);
StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
~StreamBuffer();
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
@ -54,6 +49,10 @@ public:
return buffer;
}
u32 GetBufferOffset() const {
return buffer_offset;
}
/// Returns an immutable reference to the requested buffer view
const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
@ -70,13 +69,14 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
BufferInfo info{};
StagingBuffer staging;
vk::Buffer buffer{};
VmaAllocation allocation{};
vk::BufferUsageFlagBits usage;
u32 total_size = 0;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
u32 view_count = 0;
std::size_t view_count = 0;
u32 buffer_offset = 0;
u32 flush_start = 0;

View File

@ -11,9 +11,8 @@
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance, CommandScheduler& scheduler,
RenderpassCache& renderpass_cache, vk::SurfaceKHR surface)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, surface{surface} {
Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache)
: instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} {
// Set the surface format early for RenderpassCache to create the present renderpass
Configure(0, 0);

View File

@ -11,13 +11,12 @@
namespace Vulkan {
class Instance;
class CommandScheduler;
class TaskScheduler;
class RenderpassCache;
class Swapchain {
public:
Swapchain(const Instance& instance, CommandScheduler& scheduler,
RenderpassCache& renderpass_cache,vk::SurfaceKHR surface);
Swapchain(const Instance& instance, RenderpassCache& renderpass_cache);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
@ -39,6 +38,11 @@ public:
return surface;
}
/// Returns the current framebuffe
vk::Framebuffer GetFramebuffer() const {
return swapchain_images[current_image].framebuffer;
}
/// Returns the swapchain format
vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
@ -69,7 +73,6 @@ private:
private:
const Instance& instance;
CommandScheduler& scheduler;
RenderpassCache& renderpass_cache;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};

View File

@ -24,6 +24,7 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 512},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024}
};

View File

@ -5,6 +5,7 @@
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
@ -54,10 +55,27 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
return vk::ImageAspectFlagBits::eColor;
}
vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill:
return vk::FormatFeatureFlagBits::eColorAttachment;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return vk::FormatFeatureFlagBits::eDepthStencilAttachment;
default:
UNREACHABLE_MSG("Invalid surface type!");
}
return vk::FormatFeatureFlagBits::eColorAttachment;
}
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
for (auto& buffer : staging_buffers) {
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
@ -66,6 +84,17 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule
}
}
TextureRuntime::~TextureRuntime() {
VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice();
for (auto& [key, alloc] : texture_recycler) {
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
device.destroyImageView(alloc.image_view);
}
texture_recycler.clear();
}
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const u32 current_slot = scheduler.GetCurrentSlotIndex();
const u32 offset = staging_offsets[current_slot];
@ -77,6 +106,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const auto& buffer = staging_buffers[current_slot];
return StagingData{
.buffer = buffer->buffer,
.size = size,
.mapped = buffer->mapped.subspan(offset, size),
.buffer_offset = offset
};
@ -108,6 +138,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
const u32 levels = std::bit_width(std::max(width, height));
const vk::ImageCreateInfo image_info = {
.flags = type == VideoCore::TextureType::CubeMap ?
vk::ImageCreateFlagBits::eCubeCompatible :
@ -115,7 +146,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
.imageType = vk::ImageType::e2D,
.format = vk_format,
.extent = {width, height, 1},
.mipLevels = std::bit_width(std::max(width, height)),
.mipLevels = levels,
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = GetImageUsage(aspect),
@ -160,13 +191,26 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
.image = image,
.image_view = image_view,
.allocation = allocation,
.levels = levels
};
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type);
if (!instance.IsFormatSupported(ToVkFormat(format), feature)) {
LOG_CRITICAL(Render_Vulkan, "Unimplemented format converion!");
UNREACHABLE();
}
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value) {
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
if (clear.texture_rect == surface.GetScaledRect()) {
@ -184,7 +228,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
.layerCount = 1
};
command_buffer.clearColorImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
clear_color, range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
const vk::ClearDepthStencilValue clear_depth = {
@ -200,7 +244,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
.layerCount = 1
};
command_buffer.clearDepthStencilImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
command_buffer.clearDepthStencilImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
clear_depth, range);
}
} else {
@ -211,6 +255,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
}
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
renderpass_cache.ExitRenderpass();
const vk::ImageCopy image_copy = {
.srcSubresource = {
.aspectMask = ToVkAspect(source.type),
@ -230,19 +276,21 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCor
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
command_buffer.copyImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
dest.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
return true;
}
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) {
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
const std::array source_offsets = {
vk::Offset3D{static_cast<s32>(blit.src_rect.left), static_cast<s32>(blit.src_rect.bottom), 0},
@ -271,14 +319,16 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCor
.dstOffsets = dest_offsets
};
command_buffer.blitImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
dest.image, vk::ImageLayout::eTransferDstOptimal,
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
return true;
}
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
renderpass_cache.ExitRenderpass();
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
s32 current_height = surface.GetScaledHeight();
@ -287,8 +337,8 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, i, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {
vk::Offset3D{0, 0, 0},
@ -318,166 +368,15 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
.dstOffsets = dest_offsets
};
command_buffer.blitImage(surface.image, vk::ImageLayout::eTransferSrcOptimal,
surface.image, vk::ImageLayout::eTransferDstOptimal,
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}
}
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
const ImageAlloc alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(),
params.pixel_format, texture_type);
allocation = alloc.allocation;
image_view = alloc.image_view;
image = alloc.image;
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledUpload(upload);
} else {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
command_buffer.copyBufferToImage(staging.buffer, image,
vk::ImageLayout::eTransferDstOptimal,
copy_region);
}
InvalidateAllWatcher();
}
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Download);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledDownload(download);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
if (aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(true);
}
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
// If filtering fails, resort to normal blitting
glBlitFramebuffer(0, 0, rect_width, rect_height,
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.right, upload.texture_rect.top,
MakeBufferMask(type), GL_LINEAR);
}*/
}
void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level, u32 level_count) {
if (new_layout == layout) {
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count) {
if (new_layout == alloc.layout || !alloc.image) {
return;
}
@ -540,23 +439,194 @@ void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout
return info;
};
LayoutInfo source = GetLayoutInfo(layout);
LayoutInfo source = GetLayoutInfo(alloc.layout);
LayoutInfo dest = GetLayoutInfo(new_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = layout,
.oldLayout = alloc.layout,
.newLayout = new_layout,
.image = image,
.subresourceRange = {aspect, level, level_count, 0, 1}
.image = alloc.image,
.subresourceRange = {
.aspectMask = alloc.aspect,
.baseMipLevel = level,
.levelCount = level_count,
.baseArrayLayer = 0,
.layerCount = 1
}
};
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion,
{}, {}, barrier);
layout = new_layout;
alloc.layout = new_layout;
}
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
}
Surface::~Surface() {
const VideoCore::HostTextureTag tag = {
.format = pixel_format,
.width = GetScaledWidth(),
.height = GetScaledHeight(),
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
};
runtime.texture_recycler.emplace(tag, std::move(alloc));
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledUpload(upload);
} else {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = alloc.aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
vk::ImageLayout::eTransferDstOptimal,
copy_region);
}
InvalidateAllWatcher();
// Lock this data until the next scheduler switch
const u32 current_slot = scheduler.GetCurrentSlotIndex();
runtime.staging_offsets[current_slot] += staging.size;
}
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Download);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledDownload(download);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = alloc.aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(true);
}
// Lock this data until the next scheduler switch
const u32 current_slot = scheduler.GetCurrentSlotIndex();
runtime.staging_offsets[current_slot] += staging.size;
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
// If filtering fails, resort to normal blitting
glBlitFramebuffer(0, 0, rect_width, rect_height,
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.right, upload.texture_rect.top,
MakeBufferMask(type), GL_LINEAR);
}*/
}
} // namespace Vulkan

View File

@ -15,6 +15,7 @@ namespace Vulkan {
struct StagingData {
vk::Buffer buffer;
u32 size = 0;
std::span<std::byte> mapped{};
u32 buffer_offset = 0;
};
@ -23,9 +24,13 @@ struct ImageAlloc {
vk::Image image;
vk::ImageView image_view;
VmaAllocation allocation;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
u32 levels = 1;
};
class Instance;
class RenderpassCache;
class Surface;
/**
@ -35,12 +40,25 @@ class Surface;
class TextureRuntime {
friend class Surface;
public:
TextureRuntime(const Instance& instance, TaskScheduler& scheduler);
~TextureRuntime() = default;
TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache);
~TextureRuntime();
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
StagingData FindStaging(u32 size, bool upload);
/// Allocates a vulkan image possibly resusing an existing one
ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Performs operations that need to be done on every scheduler slot switch
void OnSlotSwitch(u32 new_slot);
@ -58,10 +76,6 @@ public:
void GenerateMipmaps(Surface& surface, u32 max_level);
private:
/// Allocates a vulkan image possibly resusing an existing one
ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
/// Returns the current Vulkan instance
const Instance& GetInstance() const {
return instance;
@ -75,6 +89,7 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_map<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
@ -82,9 +97,10 @@ private:
class Surface : public VideoCore::SurfaceBase<Surface> {
friend class TextureRuntime;
friend class RasterizerVulkan;
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
~Surface() override = default;
~Surface() override;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
@ -102,21 +118,13 @@ private:
/// Overrides the image layout of the mip level range
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
/// Transitions the mip level range of the surface to new_layout
void TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level, u32 level_count);
private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
vk::Image image{};
vk::ImageView image_view{};
VmaAllocation allocation = nullptr;
ImageAlloc alloc{};
vk::Format internal_format = vk::Format::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
};
struct Traits {

View File

@ -40,7 +40,7 @@ public:
shaders.emplace(key, std::move(shader));
}
private:
public:
std::unordered_map<KeyType, ShaderType> shaders;
};
@ -89,7 +89,7 @@ public:
shader_map.insert_or_assign(key, &cached_shader);
}
private:
public:
std::unordered_map<KeyType, ShaderType*> shader_map;
std::unordered_map<std::string, ShaderType> shader_cache;
};

View File

@ -0,0 +1,25 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_uniforms.h"
namespace Pica::Shader {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? 1 : 0}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
[](const auto& value) -> Common::Vec4u {
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
[](const auto& value) -> Common::Vec4f {
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
value.w.ToFloat32()};
});
}
} // namespace Pica::Shader

View File

@ -0,0 +1,98 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
namespace Pica {
struct ShaderRegs;
}
namespace Pica::Shader {
class ShaderSetup;
enum class UniformBindings : u32 { Common, VS, GS };
struct LightSrc {
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4];
alignas(16) Common::Vec3f fog_color;
alignas(8) Common::Vec2f proctex_noise_f;
alignas(8) Common::Vec2f proctex_noise_a;
alignas(8) Common::Vec2f proctex_noise_p;
alignas(16) Common::Vec3f lighting_global_ambient;
LightSrc light_src[8];
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData does not match the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
} // namespace Pica::Shader

View File

@ -11,6 +11,7 @@
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/video_core.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -43,15 +44,26 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory)
g_memory = &memory;
Pica::Init();
OpenGL::GLES = Settings::values.graphics_api == Settings::GraphicsAPI::OpenGLES;
const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api;
switch (graphics_api) {
case Settings::GraphicsAPI::OpenGL:
case Settings::GraphicsAPI::OpenGLES:
OpenGL::GLES = Settings::values.graphics_api == Settings::GraphicsAPI::OpenGLES;
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
break;
case Settings::GraphicsAPI::Vulkan:
g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
break;
default:
LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api);
UNREACHABLE();
}
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
ResultStatus result = g_renderer->Init();
if (result != ResultStatus::Success) {
LOG_ERROR(Render, "initialization failed !");
LOG_ERROR(Render, "Video core initialization failed");
} else {
LOG_DEBUG(Render, "initialized OK");
LOG_INFO(Render, "Video core initialization OK");
}
return result;