Compare commits
19 Commits
fs-fixes
...
blit-scree
Author | SHA1 | Date | |
---|---|---|---|
797d0e45ce | |||
37ce0540ed | |||
e978931574 | |||
facddbfc8c | |||
68a3217d1e | |||
ddfae0025c | |||
75b6c5b4c5 | |||
32c6e76ab9 | |||
479c7ed162 | |||
009dfa5265 | |||
b225239e1f | |||
f887621921 | |||
407ae3972a | |||
0f0ca03551 | |||
9ae84fb6e8 | |||
215255d415 | |||
f5f88101e1 | |||
e511e51491 | |||
242047744e |
7
.github/workflows/ci.yml
vendored
7
.github/workflows/ci.yml
vendored
@ -95,6 +95,13 @@ jobs:
|
||||
env:
|
||||
MACOSX_DEPLOYMENT_TARGET: "10.13"
|
||||
ENABLE_COMPATIBILITY_REPORTING: "ON"
|
||||
- name: Pack
|
||||
run: ./.ci/macos/upload.sh
|
||||
- name: Upload
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: macos
|
||||
path: artifacts/
|
||||
windows:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -67,3 +67,6 @@
|
||||
[submodule "glm"]
|
||||
path = externals/glm
|
||||
url = https://github.com/g-truc/glm
|
||||
[submodule "sirit"]
|
||||
path = externals/sirit
|
||||
url = https://github.com/GPUCode/sirit
|
||||
|
3
externals/CMakeLists.txt
vendored
3
externals/CMakeLists.txt
vendored
@ -67,6 +67,9 @@ set(ENABLE_SPVREMAPPER OFF)
|
||||
set(ENABLE_CTEST OFF)
|
||||
add_subdirectory(glslang)
|
||||
|
||||
# Sirit
|
||||
add_subdirectory(sirit)
|
||||
|
||||
# glm
|
||||
add_subdirectory(glm)
|
||||
|
||||
|
1
externals/sirit
vendored
Submodule
1
externals/sirit
vendored
Submodule
Submodule externals/sirit added at 297d820eeb
@ -7,7 +7,7 @@ buildscript {
|
||||
jcenter()
|
||||
}
|
||||
dependencies {
|
||||
classpath 'com.android.tools.build:gradle:7.2.0'
|
||||
classpath 'com.android.tools.build:gradle:7.3.1'
|
||||
|
||||
// NOTE: Do not place your application dependencies here; they belong
|
||||
// in the individual module build.gradle files
|
||||
|
@ -320,6 +320,8 @@ static Frontend::WindowSystemType GetWindowSystemType() {
|
||||
return Frontend::WindowSystemType::X11;
|
||||
else if (platform_name == QStringLiteral("wayland"))
|
||||
return Frontend::WindowSystemType::Wayland;
|
||||
else if (platform_name == QStringLiteral("cocoa"))
|
||||
return Frontend::WindowSystemType::MacOS;
|
||||
|
||||
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
|
@ -486,6 +486,7 @@ void Config::ReadRendererValues() {
|
||||
.toUInt());
|
||||
Settings::values.physical_device = ReadSetting(QStringLiteral("physical_device"), 0).toUInt();
|
||||
Settings::values.async_command_recording = ReadSetting(QStringLiteral("async_command_recording"), true).toBool();
|
||||
Settings::values.spirv_shader_gen = ReadSetting(QStringLiteral("spirv_shader_gen"), false).toBool();
|
||||
Settings::values.use_hw_renderer =
|
||||
ReadSetting(QStringLiteral("use_hw_renderer"), true).toBool();
|
||||
Settings::values.use_hw_shader = ReadSetting(QStringLiteral("use_hw_shader"), true).toBool();
|
||||
@ -1006,6 +1007,7 @@ void Config::SaveRendererValues() {
|
||||
static_cast<u32>(Settings::GraphicsAPI::OpenGL));
|
||||
WriteSetting(QStringLiteral("physical_device"), Settings::values.physical_device, 0);
|
||||
WriteSetting(QStringLiteral("async_command_recording"), Settings::values.async_command_recording, true);
|
||||
WriteSetting(QStringLiteral("spirv_shader_gen"), Settings::values.spirv_shader_gen, false);
|
||||
WriteSetting(QStringLiteral("use_hw_renderer"), Settings::values.use_hw_renderer, true);
|
||||
WriteSetting(QStringLiteral("use_hw_shader"), Settings::values.use_hw_shader, true);
|
||||
#ifdef __APPLE__
|
||||
|
@ -85,6 +85,7 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
ui->graphics_api_combo->setCurrentIndex(static_cast<int>(Settings::values.graphics_api));
|
||||
ui->physical_device_combo->setCurrentIndex(static_cast<int>(Settings::values.physical_device));
|
||||
ui->toggle_async_recording->setChecked(Settings::values.async_command_recording);
|
||||
ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen);
|
||||
}
|
||||
|
||||
void ConfigureGraphics::ApplyConfiguration() {
|
||||
@ -99,6 +100,7 @@ void ConfigureGraphics::ApplyConfiguration() {
|
||||
static_cast<Settings::GraphicsAPI>(ui->graphics_api_combo->currentIndex());
|
||||
Settings::values.physical_device = static_cast<u16>(ui->physical_device_combo->currentIndex());
|
||||
Settings::values.async_command_recording = ui->toggle_async_recording->isChecked();
|
||||
Settings::values.spirv_shader_gen = ui->spirv_shader_gen->isChecked();
|
||||
}
|
||||
|
||||
void ConfigureGraphics::RetranslateUI() {
|
||||
@ -121,4 +123,5 @@ void ConfigureGraphics::SetPhysicalDeviceComboVisibility(int index) {
|
||||
const bool is_visible = graphics_api == Settings::GraphicsAPI::Vulkan;
|
||||
ui->physical_device_label->setVisible(is_visible);
|
||||
ui->physical_device_combo->setVisible(is_visible);
|
||||
ui->spirv_shader_gen->setVisible(is_visible);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>400</width>
|
||||
<height>430</height>
|
||||
<height>513</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="minimumSize">
|
||||
@ -70,6 +70,13 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="spirv_shader_gen">
|
||||
<property name="text">
|
||||
<string>SPIR-V Shader Generation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -10,6 +10,8 @@
|
||||
|
||||
namespace Common {
|
||||
|
||||
constexpr float PI = 3.14159265f;
|
||||
|
||||
template <class T>
|
||||
struct Rectangle {
|
||||
T left{};
|
||||
|
@ -16,7 +16,9 @@ namespace Frontend {
|
||||
/// WindowInformation
|
||||
enum class WindowSystemType : u8 {
|
||||
Headless,
|
||||
Android,
|
||||
Windows,
|
||||
MacOS,
|
||||
X11,
|
||||
Wayland,
|
||||
};
|
||||
|
@ -167,6 +167,7 @@ struct Values {
|
||||
// Renderer
|
||||
GraphicsAPI graphics_api;
|
||||
u16 physical_device;
|
||||
bool spirv_shader_gen;
|
||||
bool renderer_debug;
|
||||
bool dump_command_buffers;
|
||||
bool async_command_recording;
|
||||
|
@ -47,7 +47,7 @@ public:
|
||||
} else {
|
||||
tilt_direction = mouse_move.Cast<float>();
|
||||
tilt_angle = std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f,
|
||||
std::numbers::pi_v<float> * this->tilt_clamp / 180.0f);
|
||||
Common::PI * this->tilt_clamp / 180.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -110,7 +110,7 @@ private:
|
||||
|
||||
// Find the angular rate vector in world space
|
||||
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
|
||||
angular_rate *= 1000 / update_millisecond / std::numbers::pi_v<float> * 180;
|
||||
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
|
||||
|
||||
// Transform the two vectors from world space to 3DS space
|
||||
gravity = QuaternionRotate(inv_q, gravity);
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <SDL.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/param_package.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
@ -596,9 +598,9 @@ void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
|
||||
event.csensor.data[2] / SDL_STANDARD_GRAVITY);
|
||||
break;
|
||||
case SDL_SENSOR_GYRO:
|
||||
joystick->SetGyro(-event.csensor.data[0] * (180.0f / std::numbers::pi),
|
||||
event.csensor.data[1] * (180.0f / std::numbers::pi),
|
||||
-event.csensor.data[2] * (180.0f / std::numbers::pi));
|
||||
joystick->SetGyro(-event.csensor.data[0] * (180.0f / Common::PI),
|
||||
event.csensor.data[1] * (180.0f / Common::PI),
|
||||
-event.csensor.data[2] * (180.0f / Common::PI));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -86,6 +86,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_blit_helper.cpp
|
||||
renderer_vulkan/vk_blit_helper.h
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
renderer_vulkan/vk_blit_screen.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_descriptor_manager.cpp
|
||||
@ -111,6 +113,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader_gen_spv.cpp
|
||||
renderer_vulkan/vk_shader_gen_spv.h
|
||||
renderer_vulkan/vk_shader_util.cpp
|
||||
renderer_vulkan/vk_shader_util.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
@ -202,7 +206,8 @@ if (NOT MSVC)
|
||||
endif()
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad vma vulkan-headers glm::glm SPIRV glslang nihstro-headers Boost::serialization)
|
||||
target_link_libraries(video_core PRIVATE nihstro-headers Boost::serialization glm::glm)
|
||||
target_link_libraries(video_core PRIVATE vulkan-headers vma sirit SPIRV glslang glad)
|
||||
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
|
0
src/video_core/host_shaders/vulkan_present.frag
Normal file
0
src/video_core/host_shaders/vulkan_present.frag
Normal file
0
src/video_core/host_shaders/vulkan_present.vert
Normal file
0
src/video_core/host_shaders/vulkan_present.vert
Normal file
@ -91,8 +91,7 @@ public:
|
||||
u32 fill_size = 0;
|
||||
|
||||
public:
|
||||
u32 watcher_count = 0;
|
||||
std::array<std::weak_ptr<Watcher>, 8> watchers;
|
||||
std::vector<std::weak_ptr<Watcher>> watchers;
|
||||
};
|
||||
|
||||
template <class S>
|
||||
@ -190,7 +189,7 @@ template <class S>
|
||||
auto SurfaceBase<S>::CreateWatcher() -> std::shared_ptr<Watcher> {
|
||||
auto weak_ptr = reinterpret_cast<S*>(this)->weak_from_this();
|
||||
auto watcher = std::make_shared<Watcher>(std::move(weak_ptr));
|
||||
watchers[watcher_count++] = watcher;
|
||||
watchers.push_back(watcher);
|
||||
return watcher;
|
||||
}
|
||||
|
||||
@ -212,8 +211,7 @@ void SurfaceBase<S>::UnlinkAllWatcher() {
|
||||
}
|
||||
}
|
||||
|
||||
watchers = {};
|
||||
watcher_count = 0;
|
||||
watchers.clear();
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -10,7 +10,7 @@ namespace OpenGL {
|
||||
enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 };
|
||||
|
||||
enum class DriverBug {
|
||||
// AMD drivers sometimes freeze when one shader stage is changed but not the others.
|
||||
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
|
||||
ShaderStageChangeFreeze = 1 << 0,
|
||||
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
|
@ -20,19 +20,14 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static bool IsVendorAmd() {
|
||||
const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
static bool IsVendorIntel() {
|
||||
std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
return gpu_vendor == "Intel Inc.";
|
||||
}
|
||||
#endif
|
||||
constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver)
|
||||
: driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()),
|
||||
: driver{driver}, runtime{driver}, res_cache{*this, runtime},
|
||||
shader_program_manager{emu_window, driver, !driver.IsOpenGLES()},
|
||||
vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE},
|
||||
uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE},
|
||||
index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE},
|
||||
@ -44,8 +39,7 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
|
||||
|
||||
// Create a 1x1 clear texture to use in the NULL case,
|
||||
// instead of OpenGL's default of solid black
|
||||
glGenTextures(1, &default_texture);
|
||||
glBindTexture(GL_TEXTURE_2D, default_texture);
|
||||
default_texture.Create();
|
||||
// For some reason alpha 0 wraps around to 1.0, so use 1/255 instead
|
||||
u8 framebuffer_data[4] = {0, 0, 0, 1};
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
|
||||
@ -128,17 +122,6 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
|
||||
state.Apply();
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
|
||||
|
||||
#ifdef __APPLE__
|
||||
if (IsVendorIntel()) {
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(
|
||||
emu_window, VideoCore::g_separable_shader_enabled, is_amd);
|
||||
} else {
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
|
||||
}
|
||||
#else
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
|
||||
#endif
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
// Explicitly call the derived version to avoid warnings about calling virtual
|
||||
@ -150,7 +133,7 @@ RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
shader_program_manager->LoadDiskCache(stop_loading, callback);
|
||||
shader_program_manager.LoadDiskCache(stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncEntireState() {
|
||||
@ -285,7 +268,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
bool RasterizerOpenGL::SetupVertexShader() {
|
||||
MICROPROFILE_SCOPE(OpenGL_VS);
|
||||
return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs,
|
||||
return shader_program_manager.UseProgrammableVertexShader(Pica::g_state.regs,
|
||||
Pica::g_state.vs);
|
||||
}
|
||||
|
||||
@ -299,7 +282,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_program_manager->UseFixedGeometryShader(regs);
|
||||
shader_program_manager.UseFixedGeometryShader(regs);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -360,7 +343,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
|
||||
vertex_buffer.Unmap(vs_input_size);
|
||||
|
||||
shader_program_manager->ApplyTo(state);
|
||||
shader_program_manager.ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
if (is_indexed) {
|
||||
@ -623,7 +606,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
// the geometry in question.
|
||||
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
|
||||
// on the male character's face, which in the OpenGL default appear black.
|
||||
state.texture_units[texture_index].texture_2d = default_texture;
|
||||
state.texture_units[texture_index].texture_2d = default_texture.handle;
|
||||
}
|
||||
} else {
|
||||
state.texture_units[texture_index].texture_2d = 0;
|
||||
@ -687,9 +670,9 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
} else {
|
||||
state.draw.vertex_array = sw_vao.handle;
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
shader_program_manager->UseTrivialVertexShader();
|
||||
shader_program_manager->UseTrivialGeometryShader();
|
||||
shader_program_manager->ApplyTo(state);
|
||||
shader_program_manager.UseTrivialVertexShader();
|
||||
shader_program_manager.UseTrivialGeometryShader();
|
||||
shader_program_manager.ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
|
||||
@ -784,7 +767,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Blending
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
|
||||
if (GLES) {
|
||||
if (driver.IsOpenGLES()) {
|
||||
// With GLES, we need this in the fragment shader to emulate logic operations
|
||||
shader_dirty = true;
|
||||
}
|
||||
@ -908,7 +891,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Logic op
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
|
||||
if (GLES) {
|
||||
if (driver.IsOpenGLES()) {
|
||||
// With GLES, we need this in the fragment shader to emulate logic operations
|
||||
shader_dirty = true;
|
||||
}
|
||||
@ -1519,7 +1502,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetShader() {
|
||||
shader_program_manager->UseFragmentShader(Pica::g_state.regs);
|
||||
shader_program_manager.UseFragmentShader(Pica::g_state.regs);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncClipEnabled() {
|
||||
@ -1595,7 +1578,7 @@ void RasterizerOpenGL::SyncLogicOp() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
|
||||
|
||||
if (GLES) {
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (!regs.framebuffer.output_merger.alphablend_enable) {
|
||||
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
@ -1608,7 +1591,7 @@ void RasterizerOpenGL::SyncLogicOp() {
|
||||
|
||||
void RasterizerOpenGL::SyncColorWriteMask() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (GLES) {
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (!regs.framebuffer.output_merger.alphablend_enable) {
|
||||
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
|
@ -138,28 +138,15 @@ private:
|
||||
private:
|
||||
Driver& driver;
|
||||
OpenGLState state;
|
||||
GLuint default_texture;
|
||||
|
||||
TextureRuntime runtime;
|
||||
RasterizerCache res_cache;
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
|
||||
bool is_amd;
|
||||
bool shader_dirty = true;
|
||||
|
||||
std::unique_ptr<ShaderProgramManager> shader_program_manager;
|
||||
|
||||
// They shall be big enough for about one frame.
|
||||
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
ShaderProgramManager shader_program_manager;
|
||||
|
||||
OGLVertexArray sw_vao; // VAO for software shader draw
|
||||
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
|
||||
std::array<bool, 16> hw_vao_enabled_attributes{};
|
||||
|
||||
OGLTexture default_texture;
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
OGLStreamBuffer vertex_buffer;
|
||||
OGLStreamBuffer uniform_buffer;
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/renderer_opengl/gl_driver.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace OpenGL {
|
||||
@ -327,12 +327,13 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
|
||||
|
||||
class ShaderProgramManager::Impl {
|
||||
public:
|
||||
explicit Impl(bool separable, bool is_amd)
|
||||
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
|
||||
explicit Impl(bool separable)
|
||||
: separable(separable), programmable_vertex_shaders(separable),
|
||||
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
|
||||
fragment_shaders(separable), disk_cache(separable) {
|
||||
if (separable)
|
||||
if (separable) {
|
||||
pipeline.Create();
|
||||
}
|
||||
}
|
||||
|
||||
struct ShaderTuple {
|
||||
@ -361,25 +362,19 @@ public:
|
||||
static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
|
||||
"ShaderTuple layout changed!");
|
||||
|
||||
bool is_amd;
|
||||
bool separable;
|
||||
|
||||
ShaderTuple current;
|
||||
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
TrivialVertexShader trivial_vertex_shader;
|
||||
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
|
||||
FragmentShaders fragment_shaders;
|
||||
std::unordered_map<u64, OGLProgram> program_cache;
|
||||
OGLPipeline pipeline;
|
||||
ShaderDiskCache disk_cache;
|
||||
};
|
||||
|
||||
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
|
||||
bool is_amd)
|
||||
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
|
||||
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable)
|
||||
: impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver} {}
|
||||
|
||||
ShaderProgramManager::~ShaderProgramManager() = default;
|
||||
|
||||
@ -441,10 +436,7 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
|
||||
|
||||
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
|
||||
if (impl->separable) {
|
||||
if (impl->is_amd) {
|
||||
// Without this reseting, AMD sometimes freezes when one stage is changed but not
|
||||
// for the others. On the other hand, including this reset seems to introduce memory
|
||||
// leak in Intel Graphics.
|
||||
if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
|
||||
glUseProgramStages(
|
||||
impl->pipeline.handle,
|
||||
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
|
||||
|
@ -107,12 +107,13 @@ static_assert(sizeof(VSUniformData) == 1856,
|
||||
static_assert(sizeof(VSUniformData) < 16384,
|
||||
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class Driver;
|
||||
class OpenGLState;
|
||||
|
||||
/// A class that manage different shader stages and configures them with given config data.
|
||||
class ShaderProgramManager {
|
||||
public:
|
||||
ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
|
||||
ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable);
|
||||
~ShaderProgramManager();
|
||||
|
||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
@ -133,7 +134,7 @@ public:
|
||||
private:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
||||
Frontend::EmuWindow& emu_window;
|
||||
Driver& driver;
|
||||
};
|
||||
} // namespace OpenGL
|
||||
|
@ -121,12 +121,14 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
|
||||
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
const u32 levels = std::log2(std::max(width, height)) + 1;
|
||||
const GLenum target =
|
||||
type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
|
||||
|
||||
// Attempt to recycle an unused texture
|
||||
const VideoCore::HostTextureTag key = {
|
||||
.format = format, .width = width, .height = height, .layers = layers};
|
||||
|
||||
// Attempt to recycle an unused texture
|
||||
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
|
||||
OGLTexture texture = std::move(it->second);
|
||||
texture_recycler.erase(it);
|
||||
@ -144,8 +146,7 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(target, texture.handle);
|
||||
|
||||
glTexStorage2D(target, std::bit_width(std::max(width, height)), tuple.internal_format, width,
|
||||
height);
|
||||
glTexStorage2D(target, levels, tuple.internal_format, width, height);
|
||||
|
||||
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
|
@ -149,14 +149,12 @@ struct ScreenRectVertex {
|
||||
Common::Vec2f tex_coord;
|
||||
};
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192;
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
|
||||
: RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
|
||||
: RendererBase{window}, instance{window, Settings::values.physical_device},
|
||||
scheduler{instance, renderpass_cache, *this},
|
||||
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
|
||||
runtime{instance, scheduler, renderpass_cache, desc_manager},
|
||||
swapchain{instance, scheduler, renderpass_cache},
|
||||
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
||||
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
|
||||
|
||||
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
|
||||
@ -890,7 +888,6 @@ void RendererVulkan::SwapBuffers() {
|
||||
PrepareRendertarget();
|
||||
|
||||
const auto RecreateSwapchain = [&] {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Finish();
|
||||
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||
swapchain.Create(layout.width, layout.height);
|
||||
@ -900,6 +897,7 @@ void RendererVulkan::SwapBuffers() {
|
||||
if (swapchain.NeedsRecreation()) {
|
||||
RecreateSwapchain();
|
||||
}
|
||||
scheduler.WaitWorker();
|
||||
swapchain.AcquireNextImage();
|
||||
} while (swapchain.NeedsRecreation());
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "common/math_util.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
@ -23,42 +23,6 @@ struct FramebufferLayout;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Structure used for storing information about the textures for each 3DS screen
|
||||
struct TextureInfo {
|
||||
ImageAlloc alloc;
|
||||
u32 width;
|
||||
u32 height;
|
||||
GPU::Regs::PixelFormat format;
|
||||
};
|
||||
|
||||
/// Structure used for storing information about the display target for each 3DS screen
|
||||
struct ScreenInfo {
|
||||
ImageAlloc* display_texture = nullptr;
|
||||
Common::Rectangle<float> display_texcoords;
|
||||
TextureInfo texture;
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
// Uniform data used for presenting the 3DS screens
|
||||
struct PresentUniformData {
|
||||
glm::mat4 modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
|
||||
// Returns an immutable byte view of the uniform data
|
||||
auto AsBytes() const {
|
||||
return std::as_bytes(std::span{this, 1});
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
|
||||
|
||||
constexpr u32 PRESENT_PIPELINES = 3;
|
||||
|
||||
class RasterizerVulkan;
|
||||
|
||||
class RendererVulkan : public RendererBase {
|
||||
@ -109,25 +73,10 @@ private:
|
||||
DescriptorManager desc_manager;
|
||||
TextureRuntime runtime;
|
||||
Swapchain swapchain;
|
||||
StreamBuffer vertex_buffer;
|
||||
RasterizerVulkan rasterizer;
|
||||
|
||||
// Present pipelines (Normal, Anaglyph, Interlaced)
|
||||
vk::PipelineLayout present_pipeline_layout;
|
||||
vk::DescriptorSetLayout present_descriptor_layout;
|
||||
vk::DescriptorUpdateTemplate present_update_template;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
|
||||
std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
|
||||
std::array<vk::Sampler, 2> present_samplers;
|
||||
vk::ShaderModule present_vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
u32 current_sampler = 0;
|
||||
|
||||
/// Display information for top and bottom screens respectively
|
||||
// Display information for top and bottom screens respectively
|
||||
std::array<ScreenInfo, 3> screen_infos{};
|
||||
PresentUniformData draw_info{};
|
||||
vk::ClearColorValue clear_color{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
1015
src/video_core/renderer_vulkan/vk_blit_screen.cpp
Normal file
1015
src/video_core/renderer_vulkan/vk_blit_screen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
152
src/video_core/renderer_vulkan/vk_blit_screen.h
Normal file
152
src/video_core/renderer_vulkan/vk_blit_screen.h
Normal file
@ -0,0 +1,152 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <glm/glm.hpp>
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Instance;
|
||||
class RasterizerVulkan;
|
||||
class Scheduler;
|
||||
class Swapchain;
|
||||
class RenderpassCache;
|
||||
class DescriptorManager;
|
||||
|
||||
struct ScreenInfo {
|
||||
vk::ImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
Common::Rectangle<f32> texcoords;
|
||||
};
|
||||
|
||||
using Images = std::array<vk::Image, 3>;
|
||||
|
||||
struct PresentUniformData {
|
||||
glm::mat4 modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
|
||||
// Returns an immutable byte view of the uniform data
|
||||
auto AsBytes() const {
|
||||
return std::as_bytes(std::span{this, 1});
|
||||
}
|
||||
};
|
||||
|
||||
constexpr u32 PRESENT_PIPELINES = 3;
|
||||
|
||||
class BlitScreen {
|
||||
public:
|
||||
explicit BlitScreen(Frontend::EmuWindow& render_window, const Instance& instance,
|
||||
Scheduler& scheduler, Swapchain& swapchain, RenderpassCache& renderpass_cache,
|
||||
DescriptorManager& desc_manager, std::array<ScreenInfo, 3>& screen_infos);
|
||||
~BlitScreen();
|
||||
|
||||
void Recreate();
|
||||
|
||||
[[nodiscard]] vk::Semaphore Draw(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
const vk::Framebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, vk::Extent2D render_area,
|
||||
bool use_accelerated, u32 screen);
|
||||
|
||||
[[nodiscard]] vk::Semaphore DrawToSwapchain(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
|
||||
vk::Extent2D extent);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
|
||||
vk::Extent2D extent, vk::RenderPass& rd);
|
||||
|
||||
private:
|
||||
void CreateStaticResources();
|
||||
void CreateShaders();
|
||||
void CreateSemaphores();
|
||||
void CreateDescriptorPool();
|
||||
void CreateRenderPass();
|
||||
vk::RenderPass CreateRenderPassImpl(vk::Format format, bool is_present = true);
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreatePipelineLayout();
|
||||
void CreateGraphicsPipeline();
|
||||
void CreateSampler();
|
||||
|
||||
void CreateDynamicResources();
|
||||
void CreateFramebuffers();
|
||||
|
||||
void RefreshResources(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void ReleaseRawImages();
|
||||
void CreateStagingBuffer(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void CreateRawImages(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
|
||||
struct BufferData;
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, bool use_accelerated) const;
|
||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
void SetVertexData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
|
||||
private:
|
||||
Frontend::EmuWindow& render_window;
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
Swapchain& swapchain;
|
||||
RenderpassCache& renderpass_cache;
|
||||
DescriptorManager& desc_manager;
|
||||
Memory::MemorySystem& memory;
|
||||
std::array<ScreenInfo, 3>& screen_infos;
|
||||
std::size_t image_count;
|
||||
PresentUniformData draw_info{};
|
||||
StreamBuffer vertex_buffer;
|
||||
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::DescriptorUpdateTemplate update_template;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> pipelines;
|
||||
std::array<vk::DescriptorSet, PRESENT_PIPELINES> descriptor_sets;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> shaders;
|
||||
std::array<vk::Sampler, 2> samplers;
|
||||
vk::ShaderModule vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
u32 current_sampler = 0;
|
||||
|
||||
vk::RenderPass renderpass;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
std::vector<u64> resource_ticks;
|
||||
std::vector<vk::Semaphore> semaphores;
|
||||
std::vector<Images> raw_images;
|
||||
GPU::Regs::PixelFormat pixel_format;
|
||||
u32 raw_width;
|
||||
u32 raw_height;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/common_types.h"
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::DynamicLoader Instance::dl;
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
@ -253,6 +255,7 @@ bool Instance::CreateDevice() {
|
||||
|
||||
// Not having geometry shaders will cause issues with accelerated rendering.
|
||||
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
|
||||
device_features = available;
|
||||
if (!available.geometryShader) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Geometry shaders not availabe! Accelerated rendering not possible!");
|
||||
@ -284,7 +287,6 @@ bool Instance::CreateDevice() {
|
||||
};
|
||||
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
@ -85,6 +85,11 @@ public:
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Returns true if logic operations need shader emulation
|
||||
bool NeedsLogicOpEmulation() const {
|
||||
return !device_features.logicOp;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_timeline_semaphore is supported
|
||||
bool IsTimelineSemaphoreSupported() const {
|
||||
return timeline_semaphores;
|
||||
@ -139,12 +144,13 @@ private:
|
||||
void CreateAllocator();
|
||||
|
||||
private:
|
||||
vk::DynamicLoader dl;
|
||||
static vk::DynamicLoader dl;
|
||||
vk::Device device;
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::Instance instance;
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceProperties device_properties;
|
||||
vk::PhysicalDeviceFeatures device_features;
|
||||
VmaAllocator allocator;
|
||||
vk::Queue present_queue;
|
||||
vk::Queue graphics_queue;
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include <filesystem>
|
||||
#include "common/common_paths.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -17,21 +17,21 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
u32 AttribBytes(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
|
||||
switch (format) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
return sizeof(float) * attrib.size;
|
||||
return sizeof(float) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
return sizeof(u16) * attrib.size;
|
||||
return sizeof(u16) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
return sizeof(u8) * attrib.size;
|
||||
return sizeof(u8) * size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
|
||||
constexpr std::array attribute_formats = {
|
||||
std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
|
||||
vk::Format::eR8G8B8A8Sint},
|
||||
@ -42,8 +42,8 @@ vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat}};
|
||||
|
||||
ASSERT(attrib.size <= 4);
|
||||
return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
|
||||
ASSERT(size <= 4);
|
||||
return attribute_formats[static_cast<u32>(format)][size - 1];
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
@ -62,6 +62,21 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, const Instance& instance) {
|
||||
static std::unordered_map<vk::Format, bool> format_support_cache;
|
||||
|
||||
vk::PhysicalDevice physical_device = instance.GetPhysicalDevice();
|
||||
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
|
||||
auto [it, new_format] = format_support_cache.try_emplace(format, false);
|
||||
if (new_format) {
|
||||
LOG_INFO(Render_Vulkan, "Quering support for format {}", vk::to_string(format));
|
||||
const vk::FormatFeatureFlags features = physical_device.getFormatProperties(format).bufferFeatures;
|
||||
it->second = (features & vk::FormatFeatureFlagBits::eVertexBuffer) == vk::FormatFeatureFlagBits::eVertexBuffer;
|
||||
}
|
||||
|
||||
return it->second;
|
||||
};
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
|
||||
@ -85,7 +100,11 @@ PipelineCache::~PipelineCache() {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fragment_shaders.shaders) {
|
||||
for (auto& [key, module] : fragment_shaders_glsl.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fragment_shaders_spv.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
@ -179,21 +198,26 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
desc_manager.BindDescriptorSets();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout) {
|
||||
MICROPROFILE_SCOPE(Vulkan_VS);
|
||||
|
||||
PicaVSConfig config{regs.vs, setup};
|
||||
|
||||
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
|
||||
for (u32 i = 0; i < layout.attribute_count; i++) {
|
||||
const auto& attrib = layout.attributes[i];
|
||||
config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
|
||||
const u32 location = attrib.location.Value();
|
||||
const bool is_supported = IsAttribFormatSupported(attrib, instance);
|
||||
ASSERT(is_supported || attrib.size == 3);
|
||||
|
||||
config.state.attrib_types[location] = attrib.type.Value();
|
||||
config.state.emulated_attrib_locations[location] =
|
||||
is_supported ? 0 : emulated_attrib_loc++;
|
||||
}
|
||||
|
||||
auto [handle, result] =
|
||||
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
if (!handle) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
|
||||
return false;
|
||||
@ -218,8 +242,8 @@ void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
const PicaFixedGSConfig gs_config{regs};
|
||||
|
||||
scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
vk::ShaderModule handle = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
current_shaders[ProgramType::GS] = handle;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
});
|
||||
@ -232,12 +256,21 @@ void PipelineCache::UseTrivialGeometryShader() {
|
||||
});
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_FragmentGeneration, "Vulkan", "Fragment Shader Compilation", MP_RGB(255, 100, 100));
|
||||
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
|
||||
const PicaFSConfig config{regs};
|
||||
const PicaFSConfig config{regs, instance};
|
||||
|
||||
scheduler.Record([this, config](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
MICROPROFILE_SCOPE(Vulkan_FragmentGeneration);
|
||||
|
||||
vk::ShaderModule handle{};
|
||||
if (Settings::values.spirv_shader_gen) {
|
||||
handle = fragment_shaders_spv.Get(config, instance.GetDevice());
|
||||
} else {
|
||||
handle = fragment_shaders_glsl.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::FS] = handle;
|
||||
shader_hashes[ProgramType::FS] = config.Hash();
|
||||
});
|
||||
@ -272,27 +305,17 @@ void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
|
||||
}
|
||||
|
||||
void PipelineCache::SetViewport(float x, float y, float width, float height) {
|
||||
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
|
||||
const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
|
||||
|
||||
if (viewport != current_viewport || is_dirty) {
|
||||
scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setViewport(0, viewport);
|
||||
});
|
||||
current_viewport = viewport;
|
||||
}
|
||||
scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setViewport(0, viewport);
|
||||
});
|
||||
}
|
||||
|
||||
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
|
||||
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
|
||||
const vk::Rect2D scissor{{x, y}, {width, height}};
|
||||
|
||||
if (scissor != current_scissor || is_dirty) {
|
||||
scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setScissor(0, scissor);
|
||||
});
|
||||
current_scissor = scissor;
|
||||
}
|
||||
scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setScissor(0, scissor);
|
||||
});
|
||||
}
|
||||
|
||||
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
@ -396,10 +419,12 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
.stage = ToVkShaderStage(i), .module = shader, .pName = "main"};
|
||||
}
|
||||
|
||||
// Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
|
||||
// increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
|
||||
// as the input rate. Since one instance is all we render, the shader will always read the
|
||||
// single attribute.
|
||||
/**
|
||||
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
|
||||
* increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
|
||||
* as the input rate. Since one instance is all we render, the shader will always read the
|
||||
* single attribute.
|
||||
**/
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
|
||||
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
|
||||
const auto& binding = info.vertex_layout.bindings[i];
|
||||
@ -410,20 +435,37 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
: vk::VertexInputRate::eVertex};
|
||||
}
|
||||
|
||||
// Populate vertex attribute structures
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
u32 emulated_attrib_count = 0;
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES * 2> attributes;
|
||||
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
|
||||
const auto& attr = info.vertex_layout.attributes[i];
|
||||
attributes[i] = vk::VertexInputAttributeDescription{.location = attr.location,
|
||||
.binding = attr.binding,
|
||||
.format = ToVkAttributeFormat(attr),
|
||||
.offset = attr.offset};
|
||||
const VertexAttribute& attrib = info.vertex_layout.attributes[i];
|
||||
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
|
||||
const bool is_supported = IsAttribFormatSupported(attrib, instance);
|
||||
ASSERT_MSG(is_supported || attrib.size == 3);
|
||||
|
||||
attributes[i] = vk::VertexInputAttributeDescription{.location = attrib.location,
|
||||
.binding = attrib.binding,
|
||||
.format = is_supported ? format
|
||||
: ToVkAttributeFormat(attrib.type, 2),
|
||||
.offset = attrib.offset};
|
||||
|
||||
// When the requested 3-component vertex format is unsupported by the hardware
|
||||
// is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3
|
||||
// by the vertex shader.
|
||||
if (!is_supported) {
|
||||
const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++;
|
||||
LOG_WARNING(Render_Vulkan, "\nEmulating attrib {} at location {}\n", attrib.location, location);
|
||||
attributes[location] = vk::VertexInputAttributeDescription{.location = location,
|
||||
.binding = attrib.binding,
|
||||
.format = ToVkAttributeFormat(attrib.type, 1),
|
||||
.offset = attrib.offset + AttribBytes(attrib.type, 2)};
|
||||
}
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count + emulated_attrib_count,
|
||||
.pVertexAttributeDescriptions = attributes.data()};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
@ -452,7 +494,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = !info.blending.blend_enable.Value(),
|
||||
.logicOpEnable = !info.blending.blend_enable.Value() && !instance.NeedsLogicOpEmulation(),
|
||||
.logicOp = PicaToVK::LogicOp(info.blending.logic_op.Value()),
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
@ -463,11 +505,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
|
||||
const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {1, 1}};
|
||||
|
||||
vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {.negativeOneToOne =
|
||||
true};
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.pNext = &depth_clip_control,
|
||||
.viewportCount = 1,
|
||||
.pViewports = &viewport,
|
||||
.scissorCount = 1,
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
#include "video_core/shader/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -116,9 +116,12 @@ using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache<PicaVSConfig,
|
||||
using FixedGeometryShaders = Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule,
|
||||
&Compile, &GenerateFixedGeometryShader>;
|
||||
|
||||
using FragmentShaders =
|
||||
using FragmentShadersGLSL =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
|
||||
|
||||
using FragmentShadersSPV =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &CompileSPV, &GenerateFragmentShaderSPV>;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class RenderpassCache;
|
||||
@ -126,7 +129,6 @@ class DescriptorManager;
|
||||
|
||||
/**
|
||||
* Stores a collection of rasterizer pipelines used during rendering.
|
||||
* In addition handles descriptor set management.
|
||||
*/
|
||||
class PipelineCache {
|
||||
public:
|
||||
@ -210,8 +212,6 @@ private:
|
||||
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
|
||||
vk::Pipeline current_pipeline{};
|
||||
PipelineInfo current_info{};
|
||||
vk::Viewport current_viewport{};
|
||||
vk::Rect2D current_scissor{};
|
||||
|
||||
// Bound shader modules
|
||||
enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 };
|
||||
@ -220,7 +220,8 @@ private:
|
||||
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
FragmentShaders fragment_shaders;
|
||||
FragmentShadersGLSL fragment_shaders_glsl;
|
||||
FragmentShadersSPV fragment_shaders_spv;
|
||||
vk::ShaderModule trivial_vertex_shader;
|
||||
};
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK
|
||||
#define VK_USE_PLATFORM_METAL_EXT
|
||||
#else
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR
|
||||
@ -50,9 +49,7 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
} else if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
|
||||
.display = static_cast<wl_display*>(window_info.display_connection),
|
||||
.surface = static_cast<wl_surface*>(window_info.render_surface)};
|
||||
@ -63,10 +60,33 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif defined(VK_USE_PLATFORM_METAL_EXT)
|
||||
if (window_info.type == Frontend::WindowSystemType::MacOS) {
|
||||
const vk::MetalSurfaceCreateInfoEXT macos_ci = {
|
||||
.pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createMetalSurfaceEXT(&macos_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize MacOS surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
if (window_info.type == Frontend::WindowSystemType::Android) {
|
||||
vk::AndroidSurfaceCreateInfoKHR android_ci = {
|
||||
.window = reinterpret_cast<ANativeWindow*>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createAndroidSurfaceKHR(&android_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Android surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return surface;
|
||||
@ -98,6 +118,14 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif defined(VK_USE_PLATFORM_METAL_EXT)
|
||||
case Frontend::WindowSystemType::MacOS:
|
||||
extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
case Frontend::WindowSystemType::Android:
|
||||
extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
|
@ -109,7 +109,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
}
|
||||
|
||||
RasterizerVulkan::~RasterizerVulkan() {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Finish();
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
@ -178,21 +177,24 @@ void RasterizerVulkan::SyncFixedState() {
|
||||
|
||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||
u32 vs_input_index_max) {
|
||||
const u32 vertex_size = vs_input_size + sizeof(Common::Vec4f) * 16;
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vertex_size, 4);
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
||||
|
||||
// The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
// how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
|
||||
// address containing the vertex array data. The data for each attribute loader (i) can be found
|
||||
// by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
|
||||
// as something analogous to Vulkan bindings. The user can store attributes in separate loaders
|
||||
// or interleave them in the same loader.
|
||||
/**
|
||||
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
* how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
|
||||
* address containing the vertex array data. The data for each attribute loader (i) can be found
|
||||
* by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
|
||||
* as something analogous to Vulkan bindings. The user can store attributes in separate loaders
|
||||
* or interleave them in the same loader.
|
||||
**/
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
|
||||
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
VertexLayout layout{};
|
||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||
layout.attribute_count = 0;
|
||||
layout.binding_count = 0;
|
||||
enable_attributes.fill(false);
|
||||
|
||||
u32 buffer_offset = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
@ -250,12 +252,33 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
buffer_offset += Common::AlignUp(data_size, 16);
|
||||
}
|
||||
|
||||
array_ptr += buffer_offset;
|
||||
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
|
||||
vertex_buffer.Commit(buffer_offset);
|
||||
|
||||
// Assign the rest of the attributes to the last binding
|
||||
SetupFixedAttribs();
|
||||
|
||||
// Bind the generated bindings
|
||||
scheduler.Record([this, layout = pipeline_info.vertex_layout,
|
||||
offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
std::array<vk::Buffer, 16> buffers;
|
||||
buffers.fill(vertex_buffer.GetHandle());
|
||||
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
|
||||
offsets.data());
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupFixedAttribs() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||
|
||||
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
|
||||
|
||||
// Reserve the last binding for fixed and default attributes
|
||||
// Place the default attrib at offset zero for easy access
|
||||
const Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
|
||||
std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
|
||||
static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
|
||||
std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
|
||||
|
||||
// Find all fixed attributes and assign them to the last binding
|
||||
u32 offset = sizeof(Common::Vec4f);
|
||||
@ -268,7 +291,7 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
attr.w.ToFloat32()};
|
||||
|
||||
const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
|
||||
std::memcpy(array_ptr + offset, data.data(), data_size);
|
||||
std::memcpy(fixed_ptr + offset, data.data(), data_size);
|
||||
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
@ -299,26 +322,16 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
|
||||
// Define the fixed+default binding
|
||||
VertexBinding& binding = layout.bindings[layout.binding_count];
|
||||
binding.binding.Assign(layout.binding_count);
|
||||
binding.binding.Assign(layout.binding_count++);
|
||||
binding.fixed.Assign(1);
|
||||
binding.stride.Assign(offset);
|
||||
|
||||
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
|
||||
ASSERT(buffer_offset + offset <= vertex_size);
|
||||
vertex_buffer.Commit(buffer_offset + offset);
|
||||
|
||||
// Update the pipeline vertex layout
|
||||
pipeline_info.vertex_layout = layout;
|
||||
|
||||
scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
std::array<vk::Buffer, 16> buffers;
|
||||
buffers.fill(vertex_buffer.GetHandle());
|
||||
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
|
||||
offsets.data());
|
||||
});
|
||||
vertex_buffer.Commit(offset);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
bool RasterizerVulkan::SetupVertexShader() {
|
||||
MICROPROFILE_SCOPE(Vulkan_VS);
|
||||
return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs,
|
||||
pipeline_info.vertex_layout);
|
||||
}
|
||||
@ -354,7 +367,7 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
|
||||
bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
|
||||
const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
|
||||
|
||||
if (vs_input_size > VERTEX_BUFFER_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size);
|
||||
@ -448,6 +461,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
auto [color_surface, depth_surface, surfaces_rect] =
|
||||
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
|
||||
|
||||
if (!color_surface && shadow_rendering) {
|
||||
return true;
|
||||
}
|
||||
|
||||
pipeline_info.color_attachment =
|
||||
color_surface ? color_surface->pixel_format : VideoCore::PixelFormat::Invalid;
|
||||
pipeline_info.depth_attachment =
|
||||
@ -654,7 +671,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
// Sync and bind the shader
|
||||
if (shader_dirty) {
|
||||
SetShader();
|
||||
pipeline_cache.UseFragmentShader(regs);
|
||||
shader_dirty = false;
|
||||
}
|
||||
|
||||
@ -805,6 +822,9 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Blending
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
|
||||
if (instance.NeedsLogicOpEmulation()) {
|
||||
shader_dirty = true;
|
||||
}
|
||||
SyncBlendEnabled();
|
||||
break;
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
|
||||
@ -925,6 +945,9 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Logic op
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
|
||||
if (instance.NeedsLogicOpEmulation()) {
|
||||
shader_dirty = true;
|
||||
}
|
||||
SyncLogicOp();
|
||||
break;
|
||||
|
||||
@ -1542,10 +1565,6 @@ void RasterizerVulkan::FlushBuffers() {
|
||||
texture_lf_buffer.Flush();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetShader() {
|
||||
pipeline_cache.UseFragmentShader(Pica::g_state.regs);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncClipEnabled() {
|
||||
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||
}
|
||||
@ -1594,12 +1613,34 @@ void RasterizerVulkan::SyncBlendColor() {
|
||||
|
||||
void RasterizerVulkan::SyncLogicOp() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
|
||||
const bool is_logic_op_emulated =
|
||||
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
|
||||
const bool is_logic_op_noop =
|
||||
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
|
||||
if (is_logic_op_emulated && is_logic_op_noop) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
// color but allow depth write.
|
||||
pipeline_info.blending.color_write_mask.Assign(0);
|
||||
} else {
|
||||
pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncColorWriteMask() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const u32 color_mask = (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF;
|
||||
|
||||
const bool is_logic_op_emulated =
|
||||
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
|
||||
const bool is_logic_op_noop =
|
||||
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
|
||||
if (is_logic_op_emulated && is_logic_op_noop) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
// color but allow depth write. Return early to avoid overwriting this.
|
||||
return;
|
||||
}
|
||||
|
||||
pipeline_info.blending.color_write_mask.Assign(color_mask);
|
||||
}
|
||||
|
||||
|
@ -112,9 +112,6 @@ private:
|
||||
/// Syncs the clip coefficients to match the PICA register
|
||||
void SyncClipCoef();
|
||||
|
||||
/// Sets the OpenGL shader in accordance with the current PICA register state
|
||||
void SetShader();
|
||||
|
||||
/// Syncs the cull mode to match the PICA register
|
||||
void SyncCullMode();
|
||||
|
||||
@ -158,12 +155,12 @@ private:
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
/// Copies vertex data performing needed convertions and casts
|
||||
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
||||
|
||||
/// Setup the fixed attribute emulation in vulkan
|
||||
void SetupFixedAttribs();
|
||||
|
||||
/// Setup vertex shader for AccelerateDrawBatch
|
||||
bool SetupVertexShader();
|
||||
|
||||
@ -190,6 +187,7 @@ private:
|
||||
|
||||
VertexLayout software_layout;
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
vk::Sampler default_sampler;
|
||||
Surface null_surface;
|
||||
Surface null_storage_surface;
|
||||
|
@ -121,11 +121,12 @@ void DescriptorPool::RefreshTick() {
|
||||
}
|
||||
|
||||
void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
|
||||
LOG_INFO(Render_Vulkan, "Allocating new descriptor pool");
|
||||
vk::DescriptorPool& pool = pools.emplace_back();
|
||||
|
||||
// Choose a sane pool size good for most games
|
||||
static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
|
||||
{vk::DescriptorType::eUniformBuffer, 2048},
|
||||
{vk::DescriptorType::eUniformBuffer, 4096},
|
||||
{vk::DescriptorType::eSampledImage, 4096},
|
||||
{vk::DescriptorType::eSampler, 4096},
|
||||
{vk::DescriptorType::eUniformTexelBuffer, 2048},
|
||||
|
@ -16,7 +16,7 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(render_cmdbuf, upload_cmdbuf);
|
||||
std::destroy_at(command);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
submit = false;
|
||||
@ -25,17 +25,29 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
|
||||
: instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore},
|
||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, RendererVulkan& renderer)
|
||||
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, master_semaphore{instance},
|
||||
command_pool{instance, master_semaphore}, stop_requested{false},
|
||||
use_worker_thread{Settings::values.async_command_recording} {
|
||||
AllocateWorkerCommandBuffers();
|
||||
if (use_worker_thread) {
|
||||
AcquireNewChunk();
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
worker_thread = std::thread([this]() { WorkerThread(); });
|
||||
}
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
Scheduler::~Scheduler() {
|
||||
stop_requested = true;
|
||||
|
||||
// Push a dummy chunk to unblock the thread
|
||||
{
|
||||
std::scoped_lock lock{work_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
|
||||
work_cv.notify_one();
|
||||
worker_thread.join();
|
||||
}
|
||||
|
||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
SubmitExecution(signal, wait);
|
||||
@ -75,7 +87,7 @@ void Scheduler::DispatchWork() {
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
void Scheduler::WorkerThread() {
|
||||
do {
|
||||
std::unique_ptr<CommandChunk> work;
|
||||
bool has_submit{false};
|
||||
@ -84,8 +96,8 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
if (work_queue.empty()) {
|
||||
wait_cv.notify_all();
|
||||
}
|
||||
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
|
||||
if (stop_token.stop_requested()) {
|
||||
work_cv.wait(lock, [this] { return !work_queue.empty() || stop_requested; });
|
||||
if (stop_requested) {
|
||||
continue;
|
||||
}
|
||||
work = std::move(work_queue.front());
|
||||
@ -99,7 +111,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
}
|
||||
std::scoped_lock reserve_lock{reserve_mutex};
|
||||
chunk_reserve.push_back(std::move(work));
|
||||
} while (!stop_token.stop_requested());
|
||||
} while (!stop_requested);
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
@ -120,6 +132,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
|
||||
const u64 signal_value = master_semaphore.NextTick();
|
||||
state = StateFlags::AllDirty;
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this]
|
||||
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Submit);
|
||||
|
@ -27,13 +27,15 @@ enum class StateFlags {
|
||||
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class RendererVulkan;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
|
||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
||||
RendererVulkan& renderer);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
@ -149,7 +151,7 @@ private:
|
||||
return false;
|
||||
}
|
||||
Command* const current_last = last;
|
||||
last = std::construct_at(reinterpret_cast<FuncType*>(data.data() + command_offset), std::move(command));
|
||||
last = new (data.data() + command_offset) FuncType(std::move(command));
|
||||
|
||||
if (current_last) {
|
||||
current_last->SetNext(last);
|
||||
@ -183,7 +185,7 @@ private:
|
||||
};
|
||||
|
||||
private:
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
void WorkerThread();
|
||||
|
||||
void AllocateWorkerCommandBuffers();
|
||||
|
||||
@ -193,6 +195,7 @@ private:
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
RenderpassCache& renderpass_cache;
|
||||
RendererVulkan& renderer;
|
||||
MasterSemaphore master_semaphore;
|
||||
CommandPool command_pool;
|
||||
@ -206,7 +209,8 @@ private:
|
||||
std::mutex work_mutex;
|
||||
std::condition_variable_any work_cv;
|
||||
std::condition_variable wait_cv;
|
||||
std::jthread worker_thread;
|
||||
std::thread worker_thread;
|
||||
std::atomic_bool stop_requested;
|
||||
bool use_worker_thread;
|
||||
};
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
using Pica::FramebufferRegs;
|
||||
@ -100,7 +101,7 @@ out gl_PerVertex {
|
||||
return out;
|
||||
}
|
||||
|
||||
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) {
|
||||
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
|
||||
|
||||
state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
|
||||
@ -113,8 +114,16 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) {
|
||||
|
||||
state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
|
||||
|
||||
state.alphablend_enable.Assign(0);
|
||||
state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::Clear);
|
||||
// Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
|
||||
const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
|
||||
!Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
|
||||
|
||||
state.emulate_logic_op.Assign(emulate_logic_op);
|
||||
if (emulate_logic_op) {
|
||||
state.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
} else {
|
||||
state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp);
|
||||
}
|
||||
|
||||
// Copy relevant tev stages fields.
|
||||
// We don't sync const_color here because of the high variance, it is a
|
||||
@ -489,33 +498,33 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
|
||||
using Operation = TevStageConfig::Operation;
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
out += fmt::format("{}[0]", variable_name);
|
||||
out += "color_results_1";
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
out += fmt::format("{0}[0] * {0}[1]", variable_name);
|
||||
out += "color_results_1 * color_results_2";
|
||||
break;
|
||||
case Operation::Add:
|
||||
out += fmt::format("{0}[0] + {0}[1]", variable_name);
|
||||
out += "color_results_1 + color_results_2";
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name);
|
||||
out += "color_results_1 + color_results_2 - vec3(0.5)";
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name);
|
||||
out += "color_results_1 * color_results_3 + color_results_2 * (vec3(1.0) - color_results_3)";
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
out += fmt::format("{0}[0] - {0}[1]", variable_name);
|
||||
out += "color_results_1 - color_results_2";
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
|
||||
out += "color_results_1 * color_results_2 + color_results_3";
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name);
|
||||
out += "min(color_results_1 + color_results_2, vec3(1.0)) * color_results_3";
|
||||
break;
|
||||
case Operation::Dot3_RGB:
|
||||
case Operation::Dot3_RGBA:
|
||||
out +=
|
||||
fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name);
|
||||
"vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)";
|
||||
break;
|
||||
default:
|
||||
out += "vec3(0.0)";
|
||||
@ -532,28 +541,28 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
|
||||
using Operation = TevStageConfig::Operation;
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
out += fmt::format("{}[0]", variable_name);
|
||||
out += "alpha_results_1";
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
out += fmt::format("{0}[0] * {0}[1]", variable_name);
|
||||
out += "alpha_results_1 * alpha_results_2";
|
||||
break;
|
||||
case Operation::Add:
|
||||
out += fmt::format("{0}[0] + {0}[1]", variable_name);
|
||||
out += "alpha_results_1 + alpha_results_2";
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name);
|
||||
out += "alpha_results_1 + alpha_results_2 - 0.5";
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name);
|
||||
out += "alpha_results_1 * alpha_results_3 + alpha_results_2 * (1.0 - alpha_results_3)";
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
out += fmt::format("{0}[0] - {0}[1]", variable_name);
|
||||
out += "alpha_results_1 - alpha_results_2";
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
|
||||
out += "alpha_results_1 * alpha_results_2 + alpha_results_3";
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name);
|
||||
out += "min(alpha_results_1 + alpha_results_2, 1.0) * alpha_results_3";
|
||||
break;
|
||||
default:
|
||||
out += "0.0";
|
||||
@ -599,38 +608,34 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
const std::string index_name = std::to_string(index);
|
||||
|
||||
out += fmt::format("vec3 color_results_{}_1 = ", index_name);
|
||||
out += fmt::format("color_results_1 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
|
||||
out += fmt::format(";\ncolor_results_2 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
|
||||
out += fmt::format(";\ncolor_results_3 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
|
||||
"color_results_{}_2, color_results_{}_3);\n",
|
||||
index_name, index_name, index_name, index_name);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
out += fmt::format("vec3 color_output_{} = byteround(", index_name);
|
||||
AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
|
||||
out += fmt::format(";\nvec3 color_output_{} = byteround(", index_name);
|
||||
AppendColorCombiner(out, stage.color_op, "color_results");
|
||||
out += ");\n";
|
||||
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name);
|
||||
} else {
|
||||
out += fmt::format("float alpha_results_{}[3] = float[3](", index_name);
|
||||
out += fmt::format("alpha_results_1 = ", index_name);
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
|
||||
index_name);
|
||||
out += ", ";
|
||||
out += fmt::format(";\nalpha_results_2 = ", index_name);
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
|
||||
index_name);
|
||||
out += ", ";
|
||||
out += fmt::format(";\nalpha_results_3 = ", index_name);
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
|
||||
index_name);
|
||||
out += ");\n";
|
||||
|
||||
out += fmt::format("float alpha_output_{} = byteround(", index_name);
|
||||
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
|
||||
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index_name);
|
||||
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results");
|
||||
out += ");\n";
|
||||
}
|
||||
|
||||
@ -1466,6 +1471,14 @@ vec4 secondary_fragment_color = vec4(0.0);
|
||||
"vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"
|
||||
"vec4 last_tex_env_out = vec4(0.0);\n";
|
||||
|
||||
out += "vec3 color_results_1 = vec3(0.0);\n"
|
||||
"vec3 color_results_2 = vec3(0.0);\n"
|
||||
"vec3 color_results_3 = vec3(0.0);\n";
|
||||
|
||||
out += "float alpha_results_1 = 0.0;\n"
|
||||
"float alpha_results_2 = 0.0;\n"
|
||||
"float alpha_results_3 = 0.0;\n";
|
||||
|
||||
for (std::size_t index = 0; index < state.tev_stages.size(); ++index) {
|
||||
WriteTevStage(out, config, static_cast<u32>(index));
|
||||
}
|
||||
@ -1534,6 +1547,30 @@ do {
|
||||
out += "color = byteround(last_tex_env_out);\n";
|
||||
}
|
||||
|
||||
if (state.emulate_logic_op) {
|
||||
switch (state.logic_op) {
|
||||
case FramebufferRegs::LogicOp::Clear:
|
||||
out += "color = vec4(0);\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::Set:
|
||||
out += "color = vec4(1);\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::Copy:
|
||||
// Take the color output as-is
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::CopyInverted:
|
||||
out += "color = ~color;\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::NoOp:
|
||||
// We need to discard the color, but not necessarily the depth. This is not possible
|
||||
// with fragment shader alone, so we emulate this behavior with the color mask.
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<u32>(state.logic_op.Value()));
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
out += '}';
|
||||
return out;
|
||||
}
|
||||
@ -1568,6 +1605,7 @@ void main() {
|
||||
normquat = vert_normquat;
|
||||
view = vert_view;
|
||||
gl_Position = vert_position;
|
||||
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
|
||||
|
||||
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
|
||||
if (enable_clip1) {
|
||||
@ -1634,7 +1672,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
prefix = "u";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
@ -1642,12 +1680,42 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
|
||||
}
|
||||
}
|
||||
|
||||
// Some 3-component attributes might be emulated by breaking them to vec2 + scalar.
|
||||
// Define them here and combine them below
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (const u32 location = config.state.emulated_attrib_locations[i]; location != 0 && used_regs[i]) {
|
||||
std::string_view type;
|
||||
switch (config.state.attrib_types[i]) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
type = "float";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
type = "int";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
type = "uint";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, type, i);
|
||||
}
|
||||
}
|
||||
|
||||
out += '\n';
|
||||
|
||||
// cast input registers to float to avoid computational errors
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (used_regs[i]) {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
|
||||
if (config.state.emulated_attrib_locations[i] != 0) {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), float(vs_in_typed_reg{0}_part2), 0.f);\n", i);
|
||||
} else {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
out += '\n';
|
||||
@ -1707,6 +1775,7 @@ struct Vertex {
|
||||
semantic(VSOutputAttributes::POSITION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_W) + ");\n";
|
||||
out += " gl_Position = vtx_pos;\n";
|
||||
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
|
||||
out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
|
||||
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
|
||||
out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
@ -51,7 +53,7 @@ struct PicaFSConfigState {
|
||||
BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
|
||||
BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode;
|
||||
BitField<21, 1, u32> fog_flip;
|
||||
BitField<22, 1, u32> alphablend_enable;
|
||||
BitField<22, 1, u32> emulate_logic_op;
|
||||
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
BitField<27, 1, u32> shadow_rendering;
|
||||
BitField<28, 1, u32> shadow_texture_orthographic;
|
||||
@ -132,7 +134,7 @@ struct PicaFSConfigState {
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
PicaFSConfig(const Pica::Regs& regs);
|
||||
PicaFSConfig(const Pica::Regs& regs, const Instance& instance);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
@ -155,6 +157,7 @@ struct PicaShaderConfigCommon {
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
std::array<Pica::PipelineRegs::VertexAttributeFormat, 16> attrib_types;
|
||||
std::array<u8, 16> emulated_attrib_locations;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
|
958
src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
Normal file
958
src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
Normal file
@ -0,0 +1,958 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
using Pica::FramebufferRegs;
|
||||
using Pica::LightingRegs;
|
||||
using Pica::RasterizerRegs;
|
||||
using Pica::TexturingRegs;
|
||||
using TevStageConfig = TexturingRegs::TevStageConfig;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
FragmentModule::FragmentModule(const PicaFSConfig& config) : Sirit::Module{0x00010300}, config{config} {
|
||||
DefineArithmeticTypes();
|
||||
DefineUniformStructs();
|
||||
DefineInterface();
|
||||
DefineEntryPoint();
|
||||
}
|
||||
|
||||
FragmentModule::~FragmentModule() = default;
|
||||
|
||||
void FragmentModule::Generate() {
|
||||
AddLabel(OpLabel());
|
||||
|
||||
rounded_primary_color = Byteround(OpLoad(vec_ids.Get(4), primary_color_id), 4);
|
||||
primary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
secondary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
|
||||
if (config.state.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) {
|
||||
OpKill();
|
||||
OpFunctionEnd();
|
||||
return;
|
||||
}
|
||||
|
||||
// Write shader bytecode to emulate all enabled PICA lights
|
||||
if (config.state.lighting.enable) {
|
||||
WriteLighting();
|
||||
}
|
||||
|
||||
combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(27));
|
||||
last_tex_env_out = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
// Write shader bytecode to emulate PICA TEV stages
|
||||
for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) {
|
||||
WriteTevStage(static_cast<s32>(index));
|
||||
}
|
||||
|
||||
if (WriteAlphaTestCondition(config.state.alpha_test_func)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
|
||||
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
|
||||
// do our own transformation according to PICA specification.
|
||||
WriteDepth();
|
||||
|
||||
// Write output color
|
||||
OpStore(color_id, Byteround(last_tex_env_out, 4));
|
||||
OpReturn();
|
||||
OpFunctionEnd();
|
||||
}
|
||||
|
||||
void FragmentModule::WriteDepth() {
|
||||
const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)};
|
||||
const Id gl_frag_coord_z{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))};
|
||||
const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))};
|
||||
const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
|
||||
const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
|
||||
const Id depth{OpFma(f32_id, z_over_w, depth_scale, depth_offset)};
|
||||
if (config.state.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) {
|
||||
const Id gl_frag_coord_w{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(3u)))};
|
||||
const Id depth_over_w{OpFDiv(f32_id, depth, gl_frag_coord_w)};
|
||||
OpStore(gl_frag_depth_id, depth_over_w);
|
||||
} else {
|
||||
OpStore(gl_frag_depth_id, depth);
|
||||
}
|
||||
}
|
||||
|
||||
void FragmentModule::WriteLighting() {
|
||||
const auto& lighting = config.state.lighting;
|
||||
|
||||
// Define lighting globals
|
||||
Id diffuse_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
|
||||
Id specular_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
|
||||
Id light_vector{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id spot_dir{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id half_vector{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id dot_product{ConstF32(0.f)};
|
||||
Id clamp_highlights{ConstF32(1.f)};
|
||||
Id geo_factor{ConstF32(1.f)};
|
||||
Id surface_normal{};
|
||||
Id surface_tangent{};
|
||||
|
||||
// Compute fragment normals and tangents
|
||||
const auto Perturbation = [&]() -> Id {
|
||||
const Id texel{SampleTexture(lighting.bump_selector)};
|
||||
const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
|
||||
const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
|
||||
return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
|
||||
};
|
||||
|
||||
if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
|
||||
// Bump mapping is enabled using a normal map
|
||||
surface_normal = Perturbation();
|
||||
|
||||
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
|
||||
// precision result
|
||||
if (lighting.bump_renorm) {
|
||||
const Id normal_x{OpCompositeExtract(f32_id, surface_normal, 0)};
|
||||
const Id normal_y{OpCompositeExtract(f32_id, surface_normal, 1)};
|
||||
const Id y_mul_y{OpFMul(f32_id, normal_y, normal_y)};
|
||||
const Id val{OpFSub(f32_id, ConstF32(1.f), OpFma(f32_id, normal_x, normal_x, y_mul_y))};
|
||||
const Id normal_z{OpSqrt(f32_id, OpFMax(f32_id, val, ConstF32(0.f)))};
|
||||
surface_normal = OpCompositeConstruct(vec_ids.Get(3), normal_x, normal_y, normal_z);
|
||||
}
|
||||
|
||||
// The tangent vector is not perturbed by the normal map and is just a unit vector.
|
||||
surface_tangent = ConstF32(1.f, 0.f, 0.f);
|
||||
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
|
||||
// Bump mapping is enabled using a tangent map
|
||||
surface_tangent = Perturbation();
|
||||
|
||||
// Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
|
||||
// computation below, which is also confirmed on 3DS. So we don't bother recomputing here
|
||||
// even if 'renorm' is enabled.
|
||||
|
||||
// The normal vector is not perturbed by the tangent map and is just a unit vector.
|
||||
surface_normal = ConstF32(0.f, 0.f, 1.f);
|
||||
} else {
|
||||
// No bump mapping - surface local normal and tangent are just unit vectors
|
||||
surface_normal = ConstF32(0.f, 0.f, 1.f);
|
||||
surface_tangent = ConstF32(1.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
// Rotate the vector v by the quaternion q
|
||||
const auto QuaternionRotate = [this](Id q, Id v) -> Id {
|
||||
const Id q_xyz{OpVectorShuffle(vec_ids.Get(3), q, q, 0, 1, 2)};
|
||||
const Id q_xyz_cross_v{OpCross(vec_ids.Get(3), q_xyz, v)};
|
||||
const Id q_w{OpCompositeExtract(f32_id, q, 3)};
|
||||
const Id val1{OpFAdd(vec_ids.Get(3), q_xyz_cross_v, OpVectorTimesScalar(vec_ids.Get(3), v, q_w))};
|
||||
const Id val2{OpVectorTimesScalar(vec_ids.Get(3), OpCross(vec_ids.Get(3), q_xyz, val1), ConstF32(2.f))};
|
||||
return OpFAdd(vec_ids.Get(3), v, val2);
|
||||
};
|
||||
|
||||
// Rotate the surface-local normal by the interpolated normal quaternion to convert it to
|
||||
// eyespace.
|
||||
const Id normalized_normquat{OpNormalize(vec_ids.Get(4), OpLoad(vec_ids.Get(4), normquat_id))};
|
||||
const Id normal{QuaternionRotate(normalized_normquat, surface_normal)};
|
||||
const Id tangent{QuaternionRotate(normalized_normquat, surface_tangent)};
|
||||
|
||||
Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
|
||||
if (lighting.enable_shadow) {
|
||||
shadow = SampleTexture(lighting.shadow_selector);
|
||||
if (lighting.shadow_invert) {
|
||||
shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
|
||||
}
|
||||
}
|
||||
|
||||
const auto LookupLightingLUTUnsigned = [this](Id lut_index, Id pos) -> Id {
|
||||
const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(256.f)))};
|
||||
const Id index{OpSClamp(i32_id, pos_int, ConstS32(0), ConstS32(255))};
|
||||
const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
|
||||
const Id delta{OpFma(f32_id, pos, ConstF32(256.f), neg_index)};
|
||||
return LookupLightingLUT(lut_index, index, delta);
|
||||
};
|
||||
|
||||
const auto LookupLightingLUTSigned = [this](Id lut_index, Id pos) -> Id {
|
||||
const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(128.f)))};
|
||||
const Id index{OpSClamp(i32_id, pos_int, ConstS32(-128), ConstS32(127))};
|
||||
const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
|
||||
const Id delta{OpFma(f32_id, pos, ConstF32(128.f), neg_index)};
|
||||
const Id increment{OpSelect(i32_id, OpSLessThan(bool_id, index, ConstS32(0)), ConstS32(256), ConstS32(0))};
|
||||
return LookupLightingLUT(lut_index, OpIAdd(i32_id, index, increment), delta);
|
||||
};
|
||||
|
||||
// Samples the specified lookup table for specular lighting
|
||||
const Id view{OpLoad(vec_ids.Get(3), view_id)};
|
||||
const auto GetLutValue = [&](LightingRegs::LightingSampler sampler, u32 light_num,
|
||||
LightingRegs::LightingLutInput input, bool abs) -> Id {
|
||||
Id index{};
|
||||
switch (input) {
|
||||
case LightingRegs::LightingLutInput::NH:
|
||||
index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), half_vector));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::VH:
|
||||
index = OpDot(f32_id, OpNormalize(vec_ids.Get(3), view), OpNormalize(vec_ids.Get(3), half_vector));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::NV:
|
||||
index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), view));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::LN:
|
||||
index = OpDot(f32_id, light_vector, normal);
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::SP:
|
||||
index = OpDot(f32_id, light_vector, spot_dir);
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::CP:
|
||||
// CP input is only available with configuration 7
|
||||
if (lighting.config == LightingRegs::LightingConfig::Config7) {
|
||||
// Note: even if the normal vector is modified by normal map, which is not the
|
||||
// normal of the tangent plane anymore, the half angle vector is still projected
|
||||
// using the modified normal vector.
|
||||
const Id normalized_half_vector{OpNormalize(vec_ids.Get(3), half_vector)};
|
||||
const Id normal_dot_half_vector{OpDot(f32_id, normal, normalized_half_vector)};
|
||||
const Id normal_mul_dot{OpVectorTimesScalar(vec_ids.Get(3), normal, normal_dot_half_vector)};
|
||||
const Id half_angle_proj{OpFSub(vec_ids.Get(3), normalized_half_vector, normal_mul_dot)};
|
||||
|
||||
// Note: the half angle vector projection is confirmed not normalized before the dot
|
||||
// product. The result is in fact not cos(phi) as the name suggested.
|
||||
index = OpDot(f32_id, half_angle_proj, tangent);
|
||||
} else {
|
||||
index = ConstF32(0.f);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input);
|
||||
UNIMPLEMENTED();
|
||||
index = ConstF32(0.f);
|
||||
break;
|
||||
}
|
||||
|
||||
const Id sampler_index{ConstU32(static_cast<u32>(sampler))};
|
||||
if (abs) {
|
||||
// LUT index is in the range of (0.0, 1.0)
|
||||
index = lighting.light[light_num].two_sided_diffuse
|
||||
? OpFAbs(f32_id, index)
|
||||
: OpFMax(f32_id, index, ConstF32(0.f));
|
||||
return LookupLightingLUTUnsigned(sampler_index, index);
|
||||
} else {
|
||||
// LUT index is in the range of (-1.0, 1.0)
|
||||
return LookupLightingLUTSigned(sampler_index, index);
|
||||
}
|
||||
};
|
||||
|
||||
// Write the code to emulate each enabled light
|
||||
for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
|
||||
const auto& light_config = lighting.light[light_index];
|
||||
|
||||
const auto GetLightMember = [&](s32 member) -> Id {
|
||||
const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id;
|
||||
const Id light_num{ConstS32(static_cast<s32>(lighting.light[light_index].num.Value()))};
|
||||
return GetShaderDataMember(member_type, ConstS32(25), light_num, ConstS32(member));
|
||||
};
|
||||
|
||||
// Compute light vector (directional or positional)
|
||||
const Id light_position{GetLightMember(4)};
|
||||
if (light_config.directional) {
|
||||
light_vector = OpNormalize(vec_ids.Get(3), light_position);
|
||||
} else {
|
||||
light_vector = OpNormalize(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), light_position, view));
|
||||
}
|
||||
|
||||
spot_dir = GetLightMember(5);
|
||||
half_vector = OpFAdd(vec_ids.Get(3), OpNormalize(vec_ids.Get(3), view), light_vector);
|
||||
|
||||
// Compute dot product of light_vector and normal, adjust if lighting is one-sided or
|
||||
// two-sided
|
||||
if (light_config.two_sided_diffuse) {
|
||||
dot_product = OpFAbs(f32_id, OpDot(f32_id, light_vector, normal));
|
||||
} else {
|
||||
dot_product = OpFMax(f32_id, OpDot(f32_id, light_vector, normal), ConstF32(0.f));
|
||||
}
|
||||
|
||||
// If enabled, clamp specular component if lighting result is zero
|
||||
if (lighting.clamp_highlights) {
|
||||
clamp_highlights = OpFSign(f32_id, dot_product);
|
||||
}
|
||||
|
||||
// If enabled, compute spot light attenuation value
|
||||
Id spot_atten{ConstF32(1.f)};
|
||||
if (light_config.spot_atten_enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
|
||||
const Id value{GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
|
||||
light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input)};
|
||||
spot_atten = OpFMul(f32_id, ConstF32(lighting.lut_sp.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, compute distance attenuation value
|
||||
Id dist_atten{ConstF32(1.f)};
|
||||
if (light_config.dist_atten_enable) {
|
||||
const Id dist_atten_scale{GetLightMember(7)};
|
||||
const Id dist_atten_bias{GetLightMember(6)};
|
||||
const Id min_view_min_pos{OpFSub(vec_ids.Get(3), OpFNegate(vec_ids.Get(3), view), light_position)};
|
||||
const Id index{OpFma(f32_id, dist_atten_scale, OpLength(f32_id, min_view_min_pos), dist_atten_bias)};
|
||||
const Id clamped_index{OpFClamp(f32_id, index, ConstF32(0.f), ConstF32(1.f))};
|
||||
const Id sampler{ConstS32(static_cast<s32>(LightingRegs::DistanceAttenuationSampler(light_config.num)))};
|
||||
dist_atten = LookupLightingLUTUnsigned(sampler, clamped_index);
|
||||
}
|
||||
|
||||
if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
|
||||
geo_factor = OpDot(f32_id, half_vector, half_vector);
|
||||
const Id dot_div_geo{OpFMin(f32_id, OpFDiv(f32_id, dot_product, geo_factor), ConstF32(1.f))};
|
||||
const Id is_geo_factor_zero{OpFOrdEqual(bool_id, geo_factor, ConstF32(0.f))};
|
||||
geo_factor = OpSelect(f32_id, is_geo_factor_zero, ConstF32(0.f), dot_div_geo);
|
||||
}
|
||||
|
||||
// Specular 0 component
|
||||
Id d0_lut_value{ConstF32(1.f)};
|
||||
if (lighting.lut_d0.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::Distribution0)) {
|
||||
// Lookup specular "distribution 0" LUT value
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
|
||||
lighting.lut_d0.type, lighting.lut_d0.abs_input)};
|
||||
d0_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d0.scale), value);
|
||||
}
|
||||
|
||||
Id specular_0{OpVectorTimesScalar(vec_ids.Get(3), GetLightMember(0), d0_lut_value)};
|
||||
if (light_config.geometric_factor_0) {
|
||||
specular_0 = OpVectorTimesScalar(vec_ids.Get(3), specular_0, geo_factor);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
|
||||
Id refl_value_r{ConstF32(1.f)};
|
||||
if (lighting.lut_rr.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectRed)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
|
||||
lighting.lut_rr.type, lighting.lut_rr.abs_input)};
|
||||
|
||||
refl_value_r = OpFMul(f32_id, ConstF32(lighting.lut_rr.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
|
||||
Id refl_value_g{refl_value_r};
|
||||
if (lighting.lut_rg.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectGreen)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
|
||||
lighting.lut_rg.type, lighting.lut_rg.abs_input)};
|
||||
|
||||
refl_value_g = OpFMul(f32_id, ConstF32(lighting.lut_rg.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
|
||||
Id refl_value_b{refl_value_r};
|
||||
if (lighting.lut_rb.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectBlue)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
|
||||
lighting.lut_rb.type, lighting.lut_rb.abs_input)};
|
||||
refl_value_b = OpFMul(f32_id, ConstF32(lighting.lut_rb.scale), value);
|
||||
}
|
||||
|
||||
// Specular 1 component
|
||||
Id d1_lut_value{ConstF32(1.f)};
|
||||
if (lighting.lut_d1.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::Distribution1)) {
|
||||
// Lookup specular "distribution 1" LUT value
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
|
||||
lighting.lut_d1.type, lighting.lut_d1.abs_input)};
|
||||
d1_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d1.scale), value);
|
||||
}
|
||||
|
||||
const Id refl_value{OpCompositeConstruct(vec_ids.Get(3), refl_value_r, refl_value_g, refl_value_b)};
|
||||
const Id light_specular_1{GetLightMember(1)};
|
||||
Id specular_1{OpFMul(vec_ids.Get(3), OpVectorTimesScalar(vec_ids.Get(3), refl_value, d1_lut_value), light_specular_1)};
|
||||
if (light_config.geometric_factor_1) {
|
||||
specular_1 = OpVectorTimesScalar(vec_ids.Get(3), specular_1, geo_factor);
|
||||
}
|
||||
|
||||
// Fresnel
|
||||
// Note: only the last entry in the light slots applies the Fresnel factor
|
||||
if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::Fresnel)) {
|
||||
// Lookup fresnel LUT value
|
||||
Id value{GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
|
||||
lighting.lut_fr.type, lighting.lut_fr.abs_input)};
|
||||
value = OpFMul(f32_id, ConstF32(lighting.lut_fr.scale), value);
|
||||
|
||||
// Enabled for diffuse lighting alpha component
|
||||
if (lighting.enable_primary_alpha) {
|
||||
diffuse_sum = OpCompositeInsert(vec_ids.Get(4), value, diffuse_sum, 3);
|
||||
}
|
||||
|
||||
// Enabled for the specular lighting alpha component
|
||||
if (lighting.enable_secondary_alpha) {
|
||||
specular_sum = OpCompositeInsert(vec_ids.Get(4), value, specular_sum, 3);
|
||||
}
|
||||
}
|
||||
|
||||
const bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable;
|
||||
const bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable;
|
||||
const Id shadow_rgb{OpVectorShuffle(vec_ids.Get(3), shadow, shadow, 0, 1, 2)};
|
||||
|
||||
const Id light_diffuse{GetLightMember(2)};
|
||||
const Id light_ambient{GetLightMember(3)};
|
||||
const Id diffuse_mul_dot{OpVectorTimesScalar(vec_ids.Get(3),light_diffuse, dot_product)};
|
||||
|
||||
// Compute primary fragment color (diffuse lighting) function
|
||||
Id diffuse_sum_rgb{OpFAdd(vec_ids.Get(3), diffuse_mul_dot, light_ambient)};
|
||||
diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, dist_atten);
|
||||
diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, spot_atten);
|
||||
if (shadow_primary_enable) {
|
||||
diffuse_sum_rgb = OpFMul(vec_ids.Get(3), diffuse_sum_rgb, shadow_rgb);
|
||||
}
|
||||
|
||||
// Compute secondary fragment color (specular lighting) function
|
||||
const Id specular_01{OpFAdd(vec_ids.Get(3), specular_0, specular_1)};
|
||||
Id specular_sum_rgb{OpVectorTimesScalar(vec_ids.Get(3), specular_01, clamp_highlights)};
|
||||
specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, dist_atten);
|
||||
specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, spot_atten);
|
||||
if (shadow_secondary_enable) {
|
||||
specular_sum_rgb = OpFMul(vec_ids.Get(3), specular_sum_rgb, shadow_rgb);
|
||||
}
|
||||
|
||||
// Accumulate the fragment colors
|
||||
const Id diffuse_sum_rgba{PadVectorF32(diffuse_sum_rgb, vec_ids.Get(4), 0.f)};
|
||||
const Id specular_sum_rgba{PadVectorF32(specular_sum_rgb, vec_ids.Get(4), 0.f)};
|
||||
diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, diffuse_sum_rgba);
|
||||
specular_sum = OpFAdd(vec_ids.Get(4), specular_sum, specular_sum_rgba);
|
||||
}
|
||||
|
||||
// Apply shadow attenuation to alpha components if enabled
|
||||
if (lighting.shadow_alpha) {
|
||||
const Id shadow_a{OpCompositeExtract(f32_id, shadow, 3)};
|
||||
const Id shadow_a_vec{OpCompositeConstruct(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f), shadow_a)};
|
||||
if (lighting.enable_primary_alpha) {
|
||||
diffuse_sum = OpFMul(vec_ids.Get(4), diffuse_sum, shadow_a_vec);
|
||||
}
|
||||
if (lighting.enable_secondary_alpha) {
|
||||
specular_sum = OpFMul(vec_ids.Get(4), specular_sum, shadow_a_vec);
|
||||
}
|
||||
}
|
||||
|
||||
// Sum final lighting result
|
||||
const Id lighting_global_ambient{GetShaderDataMember(vec_ids.Get(3), ConstS32(24))};
|
||||
const Id lighting_global_ambient_rgba{PadVectorF32(lighting_global_ambient, vec_ids.Get(4), 0.f)};
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f, 1.f)};
|
||||
diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, lighting_global_ambient_rgba);
|
||||
primary_fragment_color = OpFClamp(vec_ids.Get(4), diffuse_sum, zero_vec, one_vec);
|
||||
secondary_fragment_color = OpFClamp(vec_ids.Get(4), specular_sum, zero_vec, one_vec);
|
||||
}
|
||||
|
||||
void FragmentModule::WriteTevStage(s32 index) {
|
||||
const TexturingRegs::TevStageConfig stage =
|
||||
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
|
||||
|
||||
// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
|
||||
const auto IsPassThroughTevStage = [](const TevStageConfig& stage) {
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
};
|
||||
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
|
||||
Id alpha_output{};
|
||||
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
|
||||
} else {
|
||||
alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
|
||||
}
|
||||
|
||||
color_output = OpVectorTimesScalar(vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
|
||||
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), ConstF32(1.f, 1.f, 1.f));
|
||||
alpha_output = OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
|
||||
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
|
||||
last_tex_env_out = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
|
||||
}
|
||||
|
||||
combiner_buffer = next_combiner_buffer;
|
||||
if (config.TevStageUpdatesCombinerBufferColor(index)) {
|
||||
next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), last_tex_env_out, next_combiner_buffer, 0, 1, 2, 7);
|
||||
}
|
||||
|
||||
if (config.TevStageUpdatesCombinerBufferAlpha(index)) {
|
||||
next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), next_combiner_buffer, last_tex_env_out, 0, 1, 2, 7);
|
||||
}
|
||||
}
|
||||
|
||||
bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
|
||||
using CompareFunc = FramebufferRegs::CompareFunc;
|
||||
|
||||
const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
|
||||
switch (func) {
|
||||
case CompareFunc::Equal:
|
||||
return OpINotEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::NotEqual:
|
||||
return OpIEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::LessThan:
|
||||
return OpSGreaterThanEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::LessThanOrEqual:
|
||||
return OpSGreaterThan(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::GreaterThan:
|
||||
return OpSLessThanEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::GreaterThanOrEqual:
|
||||
return OpSLessThan(bool_id, alpha, alphatest_ref);
|
||||
default:
|
||||
return Id{};
|
||||
}
|
||||
};
|
||||
|
||||
switch (func) {
|
||||
case CompareFunc::Never: // Kill the fragment
|
||||
OpKill();
|
||||
OpFunctionEnd();
|
||||
return true;
|
||||
case CompareFunc::Always: // Do nothing
|
||||
return false;
|
||||
case CompareFunc::Equal:
|
||||
case CompareFunc::NotEqual:
|
||||
case CompareFunc::LessThan:
|
||||
case CompareFunc::LessThanOrEqual:
|
||||
case CompareFunc::GreaterThan:
|
||||
case CompareFunc::GreaterThanOrEqual: {
|
||||
const Id alpha_scaled{OpFMul(f32_id, OpCompositeExtract(f32_id, last_tex_env_out, 3), ConstF32(255.f))};
|
||||
const Id alpha_int{OpConvertFToS(i32_id, alpha_scaled)};
|
||||
const Id alphatest_ref{GetShaderDataMember(i32_id, ConstS32(1))};
|
||||
const Id alpha_comp_ref{Compare(alpha_int, alphatest_ref)};
|
||||
const Id kill_label{OpLabel()};
|
||||
const Id keep_label{OpLabel()};
|
||||
OpSelectionMerge(keep_label, spv::SelectionControlMask::MaskNone);
|
||||
OpBranchConditional(alpha_comp_ref, kill_label, keep_label);
|
||||
AddLabel(kill_label);
|
||||
OpKill();
|
||||
AddLabel(keep_label);
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha test condition {}", func);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::SampleTexture(u32 texture_unit) {
|
||||
const PicaFSConfigState& state = config.state;
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
|
||||
// PICA's LOD formula for 2D textures.
|
||||
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
|
||||
// f(x, y) >= max{m_u, m_v, m_w}
|
||||
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
|
||||
const auto SampleLod = [this](Id tex_id, Id tex_sampler_id, Id texcoord_id) {
|
||||
const Id tex{OpLoad(image2d_id, tex_id)};
|
||||
const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
|
||||
const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
|
||||
const Id tex_image{OpImage(image2d_id, sampled_image)};
|
||||
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
|
||||
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
|
||||
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
|
||||
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
|
||||
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
|
||||
const Id dx_dy_max{OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
|
||||
const Id lod{OpLog2(f32_id, dx_dy_max)};
|
||||
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, spv::ImageOperandsMask::Lod, lod);
|
||||
};
|
||||
|
||||
const auto Sample = [this](Id tex_id, Id tex_sampler_id, bool projection) {
|
||||
const Id tex{OpLoad(image2d_id, tex_id)};
|
||||
const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
|
||||
const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
|
||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||
const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord0, 0),
|
||||
OpCompositeExtract(f32_id, texcoord0, 1),
|
||||
texcoord0_w)};
|
||||
if (projection) {
|
||||
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
} else {
|
||||
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
}
|
||||
};
|
||||
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case Pica::TexturingRegs::TextureConfig::Texture2D:
|
||||
return SampleLod(tex0_id, tex0_sampler_id, texcoord0_id);
|
||||
case Pica::TexturingRegs::TextureConfig::Projection2D:
|
||||
return Sample(tex0_id, tex0_sampler_id, true);
|
||||
case Pica::TexturingRegs::TextureConfig::TextureCube:
|
||||
return Sample(tex_cube_id, tex_cube_sampler_id, false);
|
||||
//case Pica::TexturingRegs::TextureConfig::Shadow2D:
|
||||
//return "shadowTexture(texcoord0, texcoord0_w)";
|
||||
//case Pica::TexturingRegs::TextureConfig::ShadowCube:
|
||||
//return "shadowTextureCube(texcoord0, texcoord0_w)";
|
||||
case Pica::TexturingRegs::TextureConfig::Disabled:
|
||||
return zero_vec;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
return zero_vec;
|
||||
}
|
||||
case 1:
|
||||
return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
|
||||
case 2:
|
||||
if (state.texture2_use_coord1)
|
||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
|
||||
else
|
||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
|
||||
case 3:
|
||||
if (false && state.proctex.enable) {
|
||||
//return "ProcTex()";
|
||||
} else {
|
||||
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
|
||||
return zero_vec;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return void_id;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
||||
if (size > 1) {
|
||||
const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
|
||||
const Id rounded_id{OpRound(vec_ids.Get(size), scaled_vec_id)};
|
||||
return OpVectorTimesScalar(vec_ids.Get(size), rounded_id, ConstF32(1.f / 255.f));
|
||||
} else {
|
||||
const Id rounded_id{OpRound(f32_id, OpFMul(f32_id, variable_id, ConstF32(255.f)))};
|
||||
return OpFMul(f32_id, rounded_id, ConstF32(1.f / 255.f));
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
|
||||
// Only load the texture buffer lut once
|
||||
if (!Sirit::ValidId(texture_buffer_lut_lf)) {
|
||||
const Id sampled_image{TypeSampledImage(image_buffer_id)};
|
||||
texture_buffer_lut_lf = OpLoad(sampled_image, texture_buffer_lut_lf_id);
|
||||
}
|
||||
|
||||
const Id lut_index_x{OpShiftRightArithmetic(i32_id, lut_index, ConstS32(2))};
|
||||
const Id lut_index_y{OpBitwiseAnd(i32_id, lut_index, ConstS32(3))};
|
||||
const Id lut_offset{GetShaderDataMember(i32_id, ConstS32(19), lut_index_x, lut_index_y)};
|
||||
const Id coord{OpIAdd(i32_id, lut_offset, index)};
|
||||
const Id entry{OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_lf), coord)};
|
||||
const Id entry_r{OpCompositeExtract(f32_id, entry, 0)};
|
||||
const Id entry_g{OpCompositeExtract(f32_id, entry, 1)};
|
||||
return OpFma(f32_id, entry_g, delta, entry_r);
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
|
||||
using Source = TevStageConfig::Source;
|
||||
switch (source) {
|
||||
case Source::PrimaryColor:
|
||||
return rounded_primary_color;
|
||||
case Source::PrimaryFragmentColor:
|
||||
return primary_fragment_color;
|
||||
case Source::SecondaryFragmentColor:
|
||||
return secondary_fragment_color;
|
||||
case Source::Texture0:
|
||||
return SampleTexture(0);
|
||||
case Source::Texture1:
|
||||
return SampleTexture(1);
|
||||
case Source::Texture2:
|
||||
return SampleTexture(2);
|
||||
case Source::Texture3:
|
||||
return SampleTexture(3);
|
||||
case Source::PreviousBuffer:
|
||||
return combiner_buffer;
|
||||
case Source::Constant:
|
||||
return GetShaderDataMember(vec_ids.Get(4), ConstS32(26), ConstS32(index));
|
||||
case Source::Previous:
|
||||
return last_tex_env_out;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown source op {}", source);
|
||||
return ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendColorModifier(TevStageConfig::ColorModifier modifier,
|
||||
TevStageConfig::Source source, s32 index) {
|
||||
using ColorModifier = TevStageConfig::ColorModifier;
|
||||
const Id source_color{AppendSource(source, index)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
|
||||
|
||||
const auto Shuffle = [&](s32 r, s32 g, s32 b) -> Id {
|
||||
return OpVectorShuffle(vec_ids.Get(3), source_color, source_color, r, g, b);
|
||||
};
|
||||
|
||||
switch (modifier) {
|
||||
case ColorModifier::SourceColor:
|
||||
return Shuffle(0, 1, 2);
|
||||
case ColorModifier::OneMinusSourceColor:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 1, 2));
|
||||
case ColorModifier::SourceRed:
|
||||
return Shuffle(0, 0, 0);
|
||||
case ColorModifier::OneMinusSourceRed:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 0, 0));
|
||||
case ColorModifier::SourceGreen:
|
||||
return Shuffle(1, 1, 1);
|
||||
case ColorModifier::OneMinusSourceGreen:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(1, 1, 1));
|
||||
case ColorModifier::SourceBlue:
|
||||
return Shuffle(2, 2, 2);
|
||||
case ColorModifier::OneMinusSourceBlue:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(2, 2, 2));
|
||||
case ColorModifier::SourceAlpha:
|
||||
return Shuffle(3, 3, 3);
|
||||
case ColorModifier::OneMinusSourceAlpha:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(3, 3, 3));
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown color modifier op {}", modifier);
|
||||
return one_vec;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendAlphaModifier(TevStageConfig::AlphaModifier modifier,
|
||||
TevStageConfig::Source source, s32 index) {
|
||||
using AlphaModifier = TevStageConfig::AlphaModifier;
|
||||
const Id source_color{AppendSource(source, index)};
|
||||
const Id one_f32{ConstF32(1.f)};
|
||||
|
||||
const auto Component = [&](s32 c) -> Id {
|
||||
return OpCompositeExtract(f32_id, source_color, c);
|
||||
};
|
||||
|
||||
switch (modifier) {
|
||||
case AlphaModifier::SourceAlpha:
|
||||
return Component(3);
|
||||
case AlphaModifier::OneMinusSourceAlpha:
|
||||
return OpFSub(f32_id, one_f32, Component(3));
|
||||
case AlphaModifier::SourceRed:
|
||||
return Component(0);
|
||||
case AlphaModifier::OneMinusSourceRed:
|
||||
return OpFSub(f32_id, one_f32, Component(0));
|
||||
case AlphaModifier::SourceGreen:
|
||||
return Component(1);
|
||||
case AlphaModifier::OneMinusSourceGreen:
|
||||
return OpFSub(f32_id, one_f32, Component(1));
|
||||
case AlphaModifier::SourceBlue:
|
||||
return Component(2);
|
||||
case AlphaModifier::OneMinusSourceBlue:
|
||||
return OpFSub(f32_id, one_f32, Component(2));
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha modifier op {}", modifier);
|
||||
return one_f32;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
const Id half_vec{ConstF32(0.5f, 0.5f, 0.5f)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id color{};
|
||||
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
color = color_results_1;
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
color = OpFMul(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::Add:
|
||||
color = OpFAdd(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
color = OpFSub(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), half_vec);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
color = OpFMix(vec_ids.Get(3), color_results_2, color_results_1, color_results_3);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
color = OpFSub(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
color = OpFma(vec_ids.Get(3), color_results_1, color_results_2, color_results_3);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
color = OpFMin(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), one_vec);
|
||||
color = OpFMul(vec_ids.Get(3), color, color_results_3);
|
||||
break;
|
||||
case Operation::Dot3_RGB:
|
||||
case Operation::Dot3_RGBA:
|
||||
color = OpDot(f32_id, OpFSub(vec_ids.Get(3), color_results_1, half_vec),
|
||||
OpFSub(vec_ids.Get(3), color_results_2, half_vec));
|
||||
color = OpFMul(f32_id, color, ConstF32(4.f));
|
||||
color = OpCompositeConstruct(vec_ids.Get(3), color, color, color);
|
||||
break;
|
||||
default:
|
||||
color = zero_vec;
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown color combiner operation: {}", operation);
|
||||
break;
|
||||
}
|
||||
|
||||
// Clamp result to 0.0, 1.0
|
||||
return OpFClamp(vec_ids.Get(3), color, zero_vec, one_vec);
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendAlphaCombiner(TevStageConfig::Operation operation) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
Id color{};
|
||||
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
color = alpha_results_1;
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
color = OpFMul(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::Add:
|
||||
color = OpFAdd(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
color = OpFSub(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(0.5f));
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
color = OpFMix(f32_id, alpha_results_2, alpha_results_1, alpha_results_3);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
color = OpFSub(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
color = OpFma(f32_id, alpha_results_1, alpha_results_2, alpha_results_3);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
color = OpFMin(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(1.f));
|
||||
color = OpFMul(f32_id, color, alpha_results_3);
|
||||
break;
|
||||
default:
|
||||
color = ConstF32(0.f);
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha combiner operation: {}", operation);
|
||||
break;
|
||||
}
|
||||
|
||||
return OpFClamp(f32_id, color, ConstF32(0.f), ConstF32(1.f));
|
||||
}
|
||||
|
||||
void FragmentModule::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
bool_id = Name(TypeBool(), "bool_id");
|
||||
f32_id = Name(TypeFloat(32), "f32_id");
|
||||
i32_id = Name(TypeSInt(32), "i32_id");
|
||||
u32_id = Name(TypeUInt(32), "u32_id");
|
||||
|
||||
for (u32 size = 2; size <= 4; size++) {
|
||||
const u32 i = size - 2;
|
||||
vec_ids.ids[i] = Name(TypeVector(f32_id, size), fmt::format("vec{}_id", size));
|
||||
ivec_ids.ids[i] = Name(TypeVector(i32_id, size), fmt::format("ivec{}_id", size));
|
||||
uvec_ids.ids[i] = Name(TypeVector(u32_id, size), fmt::format("uvec{}_id", size));
|
||||
}
|
||||
}
|
||||
|
||||
void FragmentModule::DefineEntryPoint() {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
AddCapability(spv::Capability::SampledBuffer);
|
||||
AddCapability(spv::Capability::ImageQuery);
|
||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||
|
||||
const Id main_type{TypeFunction(TypeVoid())};
|
||||
const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
|
||||
AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id,
|
||||
texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id,
|
||||
gl_frag_coord_id, gl_frag_depth_id);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
|
||||
void FragmentModule::DefineUniformStructs() {
|
||||
const Id light_src_struct_id{TypeStruct(vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3),
|
||||
vec_ids.Get(3), vec_ids.Get(3), f32_id, f32_id)};
|
||||
|
||||
const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
|
||||
const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
|
||||
const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
|
||||
|
||||
const Id shader_data_struct_id{TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id,
|
||||
i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id,
|
||||
i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, vec_ids.Get(3),
|
||||
vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3),
|
||||
light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(4))};
|
||||
|
||||
constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
|
||||
constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u,
|
||||
52u, 56u, 60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u,
|
||||
224u, 240u, 1136u, 1232u, 1248u};
|
||||
|
||||
Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
|
||||
Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
|
||||
MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
|
||||
}
|
||||
for (u32 i = 0; i < static_cast<u32>(shader_data_offsets.size()); i++) {
|
||||
MemberDecorate(shader_data_struct_id, i, spv::Decoration::Offset, shader_data_offsets[i]);
|
||||
}
|
||||
Decorate(shader_data_struct_id, spv::Decoration::Block);
|
||||
|
||||
shader_data_id = AddGlobalVariable(TypePointer(spv::StorageClass::Uniform, shader_data_struct_id),
|
||||
spv::StorageClass::Uniform);
|
||||
Decorate(shader_data_id, spv::Decoration::DescriptorSet, 0);
|
||||
Decorate(shader_data_id, spv::Decoration::Binding, 1);
|
||||
}
|
||||
|
||||
void FragmentModule::DefineInterface() {
|
||||
// Define interface block
|
||||
primary_color_id = DefineInput(vec_ids.Get(4), 1);
|
||||
texcoord0_id = DefineInput(vec_ids.Get(2), 2);
|
||||
texcoord1_id = DefineInput(vec_ids.Get(2), 3);
|
||||
texcoord2_id = DefineInput(vec_ids.Get(2), 4);
|
||||
texcoord0_w_id = DefineInput(f32_id, 5);
|
||||
normquat_id = DefineInput(vec_ids.Get(4), 6);
|
||||
view_id = DefineInput(vec_ids.Get(3), 7);
|
||||
color_id = DefineOutput(vec_ids.Get(4), 0);
|
||||
|
||||
// Define the texture unit samplers/uniforms
|
||||
image_buffer_id = TypeImage(f32_id, spv::Dim::Buffer, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
image2d_id = TypeImage(f32_id, spv::Dim::Dim2D, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
image_cube_id = TypeImage(f32_id, spv::Dim::Cube, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
sampler_id = TypeSampler();
|
||||
|
||||
texture_buffer_lut_lf_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 2);
|
||||
texture_buffer_lut_rg_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 3);
|
||||
texture_buffer_lut_rgba_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 4);
|
||||
tex0_id = DefineUniformConst(image2d_id, 1, 0);
|
||||
tex1_id = DefineUniformConst(image2d_id, 1, 1);
|
||||
tex2_id = DefineUniformConst(image2d_id, 1, 2);
|
||||
tex_cube_id = DefineUniformConst(image_cube_id, 1, 3);
|
||||
tex0_sampler_id = DefineUniformConst(sampler_id, 2, 0);
|
||||
tex1_sampler_id = DefineUniformConst(sampler_id, 2, 1);
|
||||
tex2_sampler_id = DefineUniformConst(sampler_id, 2, 2);
|
||||
tex_cube_sampler_id = DefineUniformConst(sampler_id, 2, 3);
|
||||
|
||||
// Define built-ins
|
||||
gl_frag_coord_id = DefineVar(vec_ids.Get(4), spv::StorageClass::Input);
|
||||
gl_frag_depth_id = DefineVar(f32_id, spv::StorageClass::Output);
|
||||
Decorate(gl_frag_coord_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragCoord);
|
||||
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
|
||||
}
|
||||
|
||||
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
|
||||
FragmentModule module{config};
|
||||
module.Generate();
|
||||
return module.Assemble();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
227
src/video_core/renderer_vulkan/vk_shader_gen_spv.h
Normal file
227
src/video_core/renderer_vulkan/vk_shader_gen_spv.h
Normal file
@ -0,0 +1,227 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <sirit/sirit.h>
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Sirit::Id;
|
||||
|
||||
struct VectorIds {
|
||||
/// Returns the type id of the vector with the provided size
|
||||
[[nodiscard]] constexpr Id Get(u32 size) const {
|
||||
return ids[size - 2];
|
||||
}
|
||||
|
||||
std::array<Id, 3> ids;
|
||||
};
|
||||
|
||||
class FragmentModule : public Sirit::Module {
|
||||
static constexpr u32 NUM_TEV_STAGES = 6;
|
||||
static constexpr u32 NUM_LIGHTS = 8;
|
||||
static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
|
||||
public:
|
||||
FragmentModule(const PicaFSConfig& config);
|
||||
~FragmentModule();
|
||||
|
||||
/// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration
|
||||
void Generate();
|
||||
|
||||
/// Undos the vulkan perspective transformation and applies the pica one
|
||||
void WriteDepth();
|
||||
|
||||
/// Writes the code to emulate fragment lighting
|
||||
void WriteLighting();
|
||||
|
||||
/// Writes the code to emulate the specified TEV stage
|
||||
void WriteTevStage(s32 index);
|
||||
|
||||
/// Writes the if-statement condition used to evaluate alpha testing.
|
||||
/// Returns true if the fragment was discarded
|
||||
[[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
|
||||
|
||||
/// Samples the current fragment texel from the provided texture unit
|
||||
[[nodiscard]] Id SampleTexture(u32 texture_unit);
|
||||
|
||||
/// Rounds the provided variable to the nearest 1/255th
|
||||
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
|
||||
|
||||
/// Lookups the lighting LUT at the provided lut_index
|
||||
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
|
||||
|
||||
/// Writes the specified TEV stage source component(s)
|
||||
[[nodiscard]] Id AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the color components to use for the specified TEV stage color modifier
|
||||
[[nodiscard]] Id AppendColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier modifier,
|
||||
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the alpha component to use for the specified TEV stage alpha modifier
|
||||
[[nodiscard]] Id AppendAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier,
|
||||
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the combiner function for the color components for the specified TEV stage operation
|
||||
[[nodiscard]] Id AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
||||
|
||||
/// Writes the combiner function for the alpha component for the specified TEV stage operation
|
||||
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
||||
|
||||
/// Loads the member specified from the shader_data uniform struct
|
||||
template <typename... Ids>
|
||||
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
|
||||
const Id uniform_ptr{TypePointer(spv::StorageClass::Uniform, type)};
|
||||
return OpLoad(type, OpAccessChain(uniform_ptr, shader_data_id, ids...));
|
||||
}
|
||||
|
||||
/// Pads the provided vector by inserting args at the end
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id PadVectorF32(Id vector, Id pad_type_id, Args&&... args) {
|
||||
return OpCompositeConstruct(pad_type_id, vector, ConstF32(args...));
|
||||
}
|
||||
|
||||
/// Defines a input variable
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
Decorate(input_id, spv::Decoration::Location, location);
|
||||
return input_id;
|
||||
}
|
||||
|
||||
/// Defines a input variable
|
||||
[[nodiscard]] Id DefineOutput(Id type, u32 location) {
|
||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
||||
Decorate(output_id, spv::Decoration::Location, location);
|
||||
return output_id;
|
||||
}
|
||||
|
||||
/// Defines a uniform constant variable
|
||||
[[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding) {
|
||||
const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(uniform_id, spv::Decoration::DescriptorSet, set);
|
||||
Decorate(uniform_id, spv::Decoration::Binding, binding);
|
||||
return uniform_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
||||
const Id pointer_type_id{TypePointer(storage_class, type)};
|
||||
return AddGlobalVariable(pointer_type_id, storage_class);
|
||||
}
|
||||
|
||||
/// Returns the id of a signed integer constant of value
|
||||
[[nodiscard]] Id ConstU32(u32 value) {
|
||||
return Constant(u32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstU32(Args&&... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(u32_id, values)...};
|
||||
return ConstantComposite(uvec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
/// Returns the id of a signed integer constant of value
|
||||
[[nodiscard]] Id ConstS32(s32 value) {
|
||||
return Constant(i32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstS32(Args&&... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(i32_id, values)...};
|
||||
return ConstantComposite(ivec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
/// Returns the id of a float constant of value
|
||||
[[nodiscard]] Id ConstF32(float value) {
|
||||
return Constant(f32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstF32(Args... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(f32_id, values)...};
|
||||
return ConstantComposite(vec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineEntryPoint();
|
||||
void DefineUniformStructs();
|
||||
void DefineInterface();
|
||||
|
||||
private:
|
||||
PicaFSConfig config;
|
||||
Id void_id{};
|
||||
Id bool_id{};
|
||||
Id f32_id{};
|
||||
Id i32_id{};
|
||||
Id u32_id{};
|
||||
|
||||
VectorIds vec_ids{};
|
||||
VectorIds ivec_ids{};
|
||||
VectorIds uvec_ids{};
|
||||
|
||||
Id image2d_id{};
|
||||
Id image_cube_id{};
|
||||
Id image_buffer_id{};
|
||||
Id sampler_id{};
|
||||
Id shader_data_id{};
|
||||
|
||||
Id primary_color_id{};
|
||||
Id texcoord0_id{};
|
||||
Id texcoord1_id{};
|
||||
Id texcoord2_id{};
|
||||
Id texcoord0_w_id{};
|
||||
Id normquat_id{};
|
||||
Id view_id{};
|
||||
Id color_id{};
|
||||
|
||||
Id gl_frag_coord_id{};
|
||||
Id gl_frag_depth_id{};
|
||||
|
||||
Id tex0_id{};
|
||||
Id tex1_id{};
|
||||
Id tex2_id{};
|
||||
Id tex_cube_id{};
|
||||
Id tex0_sampler_id{};
|
||||
Id tex1_sampler_id{};
|
||||
Id tex2_sampler_id{};
|
||||
Id tex_cube_sampler_id{};
|
||||
Id texture_buffer_lut_lf_id{};
|
||||
Id texture_buffer_lut_rg_id{};
|
||||
Id texture_buffer_lut_rgba_id{};
|
||||
|
||||
Id texture_buffer_lut_lf{};
|
||||
|
||||
Id rounded_primary_color{};
|
||||
Id primary_fragment_color{};
|
||||
Id secondary_fragment_color{};
|
||||
Id combiner_buffer{};
|
||||
Id next_combiner_buffer{};
|
||||
Id last_tex_env_out{};
|
||||
|
||||
Id color_results_1{};
|
||||
Id color_results_2{};
|
||||
Id color_results_3{};
|
||||
Id alpha_results_1{};
|
||||
Id alpha_results_2{};
|
||||
Id alpha_results_3{};
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates the SPIR-V fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config);
|
||||
|
||||
} // namespace Vulkan
|
@ -6,6 +6,7 @@
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
@ -178,6 +179,8 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
||||
includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
|
||||
shader->getInfoDebugLog());
|
||||
LOG_CRITICAL(Render_Vulkan, "{}", code);
|
||||
ASSERT(false);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
@ -215,10 +218,22 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
const vk::ShaderModuleCreateInfo shader_info = {.codeSize = out_code.size() * sizeof(u32),
|
||||
.pCode = out_code.data()};
|
||||
return CompileSPV(out_code, device);
|
||||
}
|
||||
|
||||
return device.createShaderModule(shader_info);
|
||||
MICROPROFILE_DEFINE(Vulkan_SPVCompilation, "Vulkan", "SPIR-V Shader Compilation", MP_RGB(100, 255, 52));
|
||||
vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device) {
|
||||
MICROPROFILE_SCOPE(Vulkan_SPVCompilation);
|
||||
const vk::ShaderModuleCreateInfo shader_info = {.codeSize = code.size() * sizeof(u32),
|
||||
.pCode = code.data()};
|
||||
try {
|
||||
return device.createShaderModule(shader_info);
|
||||
} catch (vk::SystemError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "{}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -13,4 +13,6 @@ enum class ShaderOptimization { High = 0, Debug = 1 };
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
|
||||
ShaderOptimization level);
|
||||
|
||||
vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
@ -114,7 +114,7 @@ void Swapchain::Present() {
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &index};
|
||||
|
||||
MICROPROFILE_SCOPE(Vulkan_Present);
|
||||
vk::Queue present_queue = instance.GetPresentQueue();
|
||||
try {
|
||||
[[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info);
|
||||
|
@ -31,40 +31,48 @@ public:
|
||||
void Present();
|
||||
|
||||
/// Returns true when the swapchain should be recreated
|
||||
[[nodiscard]] bool NeedsRecreation() const {
|
||||
bool NeedsRecreation() const {
|
||||
return is_suboptimal || is_outdated;
|
||||
}
|
||||
|
||||
std::size_t GetImageCount() const {
|
||||
return image_count;
|
||||
}
|
||||
|
||||
std::size_t GetImageIndex() const {
|
||||
return image_index;
|
||||
}
|
||||
|
||||
/// Returns current swapchain state
|
||||
[[nodiscard]] vk::Extent2D GetExtent() const {
|
||||
vk::Extent2D GetExtent() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
/// Returns the swapchain surface
|
||||
[[nodiscard]] vk::SurfaceKHR GetSurface() const {
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the current framebuffe
|
||||
[[nodiscard]] vk::Framebuffer GetFramebuffer() const {
|
||||
vk::Framebuffer GetFramebuffer() const {
|
||||
return framebuffers[image_index];
|
||||
}
|
||||
|
||||
/// Returns the swapchain format
|
||||
[[nodiscard]] vk::SurfaceFormatKHR GetSurfaceFormat() const {
|
||||
return surface_format;
|
||||
vk::Format GetImageFormat() const {
|
||||
return surface_format.format;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan swapchain handle
|
||||
[[nodiscard]] vk::SwapchainKHR GetHandle() const {
|
||||
vk::SwapchainKHR GetHandle() const {
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const {
|
||||
vk::Semaphore GetImageAcquiredSemaphore() const {
|
||||
return image_acquired[frame_index];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const {
|
||||
vk::Semaphore GetPresentReadySemaphore() const {
|
||||
return present_ready[image_index];
|
||||
}
|
||||
|
||||
@ -100,9 +108,9 @@ private:
|
||||
std::vector<u64> resource_ticks;
|
||||
std::vector<vk::Semaphore> image_acquired;
|
||||
std::vector<vk::Semaphore> present_ready;
|
||||
u32 image_count = 0;
|
||||
u32 image_index = 0;
|
||||
u32 frame_index = 0;
|
||||
std::size_t image_count = 0;
|
||||
std::size_t image_index = 0;
|
||||
std::size_t frame_index = 0;
|
||||
bool is_outdated = true;
|
||||
bool is_suboptimal = true;
|
||||
};
|
||||
|
@ -60,7 +60,7 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
||||
return depth_offset;
|
||||
}
|
||||
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
||||
@ -124,7 +124,6 @@ void TextureRuntime::FlushBuffers() {
|
||||
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
|
||||
void TextureRuntime::Finish() {
|
||||
MICROPROFILE_SCOPE(Vulkan_Finish);
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Finish();
|
||||
download_buffer.Invalidate();
|
||||
}
|
||||
@ -151,7 +150,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
|
||||
ImageAlloc alloc{};
|
||||
alloc.format = format;
|
||||
alloc.levels = std::bit_width(std::max(width, height));
|
||||
alloc.levels = std::log2(std::max(width, height)) + 1;
|
||||
alloc.layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
alloc.aspect = GetImageAspect(format);
|
||||
|
||||
@ -190,7 +189,14 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = usage};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE};
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
.requiredFlags = 0,
|
||||
.preferredFlags = 0,
|
||||
.pool = VK_NULL_HANDLE,
|
||||
.pUserData = nullptr,
|
||||
};
|
||||
|
||||
VkImage unsafe_image{};
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
|
@ -14,8 +14,7 @@ namespace Pica::Shader {
|
||||
template <typename ShaderType>
|
||||
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
|
||||
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::string (*CodeGenerator)(const KeyType&)>
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
|
||||
class ShaderCache {
|
||||
public:
|
||||
ShaderCache() {}
|
||||
@ -23,17 +22,17 @@ public:
|
||||
|
||||
/// Returns a shader handle generated from the provided config
|
||||
template <typename... Args>
|
||||
auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult<ShaderType> {
|
||||
auto Get(const KeyType& config, Args&&... args) {
|
||||
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_shader) {
|
||||
std::string code = CodeGenerator(config);
|
||||
const auto code = CodeGenerator(config);
|
||||
shader = ModuleCompiler(code, args...);
|
||||
return std::make_pair(shader, code);
|
||||
return shader;
|
||||
}
|
||||
|
||||
return std::make_pair(shader, std::nullopt);
|
||||
return shader;
|
||||
}
|
||||
|
||||
void Inject(const KeyType& key, ShaderType&& shader) {
|
||||
@ -51,9 +50,7 @@ public:
|
||||
* program buffer from the previous shader, which is hashed into the config, resulting several
|
||||
* different config values from the same shader program.
|
||||
*/
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::optional<std::string> (*CodeGenerator)(const Pica::Shader::ShaderSetup&,
|
||||
const KeyType&)>
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
|
||||
class ShaderDoubleCache {
|
||||
public:
|
||||
ShaderDoubleCache() = default;
|
||||
|
Reference in New Issue
Block a user