Compare commits
123 Commits
blit-scree
...
vulkan-sch
Author | SHA1 | Date | |
---|---|---|---|
cfd7bfbb20 | |||
fbe471693d | |||
4670114077 | |||
b5d4473bbb | |||
97455ae5f8 | |||
69cfff9022 | |||
7dd5049ae1 | |||
1396d39cf7 | |||
55220d70df | |||
aebb7f2d06 | |||
c3956ee207 | |||
9b582f2ba5 | |||
1f16ecad1a | |||
2d5e588d89 | |||
d551e2adc3 | |||
561398bbcd | |||
23cff45251 | |||
0223fa756c | |||
b7fa091db0 | |||
46ae192c05 | |||
e059fc5f4f | |||
0fe4225b22 | |||
3a60a6687d | |||
74b8081114 | |||
69db7d9d0d | |||
caf596e5eb | |||
9950e2aab5 | |||
13fe59ae55 | |||
d700e2c4cc | |||
59aeeca8ca | |||
54414d5a8f | |||
6086bfabca | |||
069df7741d | |||
8ec86d07d7 | |||
c625a5a0b4 | |||
04a188c96d | |||
8152881f06 | |||
0f4dc90acc | |||
d77574100d | |||
c10cf4414f | |||
4d7a00f324 | |||
e56d069ed5 | |||
e33adc1b11 | |||
11de7700aa | |||
b0fc94f155 | |||
0d4e530805 | |||
9a1cf869f9 | |||
523120e03d | |||
7f26562dce | |||
f750da1508 | |||
711b699689 | |||
7f7408b81e | |||
2da4c9ca90 | |||
20ccb995b1 | |||
601aac2a26 | |||
924257b2cc | |||
9e34ff40ed | |||
ba3c84168a | |||
77d6be6bde | |||
a6e2bcd986 | |||
2c30889fdc | |||
67a76bec5c | |||
98b7c33f62 | |||
dbfa06c6b1 | |||
88d0b7de13 | |||
de149e3ee9 | |||
99877f9465 | |||
075090569f | |||
f6af97fc16 | |||
5fa4a32cf6 | |||
fd77483a5f | |||
1bf1217a18 | |||
40db7b90fa | |||
8e1a23d971 | |||
9e8c403793 | |||
0e047a7a6e | |||
66158841cb | |||
634e6427a8 | |||
c1f46ed710 | |||
4776e21dd9 | |||
9e7b3bfa16 | |||
62d561c004 | |||
34a0571dc3 | |||
e46970a84a | |||
ccb1872604 | |||
1cd0b04399 | |||
ab3a228e5e | |||
7ae0d0ef27 | |||
19c82a76a3 | |||
366cdc854f | |||
d2fd8030dd | |||
a932a9f662 | |||
ca81c5a5f3 | |||
5d62b033df | |||
36d584cf3c | |||
7eb590153b | |||
e99ef32c6b | |||
066bdcfc40 | |||
dd1c06a55b | |||
fa7edc4a9c | |||
994b27ab5b | |||
77a99506cb | |||
5f8a884c2c | |||
a7cfe99ca1 | |||
424ed2df04 | |||
e22e641736 | |||
c080ed35c2 | |||
6dacd66f40 | |||
25a6da50ef | |||
38a5cc634f | |||
f26d00fbb4 | |||
b03c3b0d7d | |||
1f450d6d1d | |||
e464507b7a | |||
7158952ae7 | |||
841dee8ed8 | |||
01e53fe9d2 | |||
9762e24696 | |||
937c7e67a4 | |||
1d4f8db60d | |||
bc0c9f6eb7 | |||
667d978480 | |||
389d1862bb |
7
.github/workflows/ci.yml
vendored
7
.github/workflows/ci.yml
vendored
@ -95,13 +95,6 @@ jobs:
|
||||
env:
|
||||
MACOSX_DEPLOYMENT_TARGET: "10.13"
|
||||
ENABLE_COMPATIBILITY_REPORTING: "ON"
|
||||
- name: Pack
|
||||
run: ./.ci/macos/upload.sh
|
||||
- name: Upload
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: macos
|
||||
path: artifacts/
|
||||
windows:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -67,6 +67,3 @@
|
||||
[submodule "glm"]
|
||||
path = externals/glm
|
||||
url = https://github.com/g-truc/glm
|
||||
[submodule "sirit"]
|
||||
path = externals/sirit
|
||||
url = https://github.com/GPUCode/sirit
|
||||
|
@ -138,13 +138,13 @@ if (NOT ENABLE_GENERIC)
|
||||
if (MSVC)
|
||||
detect_architecture("_M_AMD64" x86_64)
|
||||
detect_architecture("_M_IX86" x86)
|
||||
detect_architecture("_M_ARM" arm)
|
||||
detect_architecture("_M_ARM64" arm64)
|
||||
detect_architecture("_M_ARM" ARM)
|
||||
detect_architecture("_M_ARM64" ARM64)
|
||||
else()
|
||||
detect_architecture("__x86_64__" x86_64)
|
||||
detect_architecture("__i386__" x86)
|
||||
detect_architecture("__arm__" arm)
|
||||
detect_architecture("__aarch64__" arm64)
|
||||
detect_architecture("__arm__" ARM)
|
||||
detect_architecture("__aarch64__" ARM64)
|
||||
endif()
|
||||
endif()
|
||||
if (NOT DEFINED ARCHITECTURE)
|
||||
|
16
externals/CMakeLists.txt
vendored
16
externals/CMakeLists.txt
vendored
@ -31,27 +31,24 @@ add_subdirectory(catch2)
|
||||
# Crypto++
|
||||
add_subdirectory(cryptopp)
|
||||
|
||||
# fmt and Xbyak need to be added before dynarmic
|
||||
# libfmt
|
||||
add_subdirectory(fmt)
|
||||
|
||||
# Xbyak
|
||||
if (ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
|
||||
# Dynarmic
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
|
||||
set(DYNARMIC_TESTS OFF)
|
||||
set(DYNARMIC_NO_BUNDLED_FMT ON)
|
||||
set(DYNARMIC_FRONTENDS "A32")
|
||||
add_subdirectory(dynarmic)
|
||||
endif()
|
||||
|
||||
# libfmt
|
||||
add_subdirectory(fmt)
|
||||
|
||||
# getopt
|
||||
if (MSVC)
|
||||
add_subdirectory(getopt)
|
||||
@ -67,9 +64,6 @@ set(ENABLE_SPVREMAPPER OFF)
|
||||
set(ENABLE_CTEST OFF)
|
||||
add_subdirectory(glslang)
|
||||
|
||||
# Sirit
|
||||
add_subdirectory(sirit)
|
||||
|
||||
# glm
|
||||
add_subdirectory(glm)
|
||||
|
||||
|
2
externals/dynarmic
vendored
2
externals/dynarmic
vendored
Submodule externals/dynarmic updated: 3946dcf005...4606179019
1
externals/sirit
vendored
1
externals/sirit
vendored
Submodule externals/sirit deleted from 297d820eeb
2
externals/xbyak
vendored
2
externals/xbyak
vendored
Submodule externals/xbyak updated: 48457bfa0d...c306b8e578
@ -16,7 +16,6 @@
|
||||
android:required="false" />
|
||||
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
||||
|
@ -7,7 +7,7 @@ buildscript {
|
||||
jcenter()
|
||||
}
|
||||
dependencies {
|
||||
classpath 'com.android.tools.build:gradle:7.3.1'
|
||||
classpath 'com.android.tools.build:gradle:7.2.0'
|
||||
|
||||
// NOTE: Do not place your application dependencies here; they belong
|
||||
// in the individual module build.gradle files
|
||||
|
@ -204,15 +204,14 @@ void Config::ReadValues() {
|
||||
Settings::values.use_virtual_sd =
|
||||
sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
|
||||
|
||||
Settings::values.use_custom_storage =
|
||||
sdl2_config->GetBoolean("Data Storage", "use_custom_storage", false);
|
||||
|
||||
if (Settings::values.use_custom_storage) {
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir,
|
||||
sdl2_config->GetString("Data Storage", "nand_directory", ""));
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir,
|
||||
sdl2_config->GetString("Data Storage", "sdmc_directory", ""));
|
||||
}
|
||||
const std::string default_nand_dir = FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir);
|
||||
FileUtil::UpdateUserPath(
|
||||
FileUtil::UserPath::NANDDir,
|
||||
sdl2_config->GetString("Data Storage", "nand_directory", default_nand_dir));
|
||||
const std::string default_sdmc_dir = FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir);
|
||||
FileUtil::UpdateUserPath(
|
||||
FileUtil::UserPath::SDMCDir,
|
||||
sdl2_config->GetString("Data Storage", "sdmc_directory", default_sdmc_dir));
|
||||
|
||||
// System
|
||||
Settings::values.is_new_3ds = sdl2_config->GetBoolean("System", "is_new_3ds", true);
|
||||
|
@ -250,10 +250,6 @@ volume =
|
||||
# 1 (default): Yes, 0: No
|
||||
use_virtual_sd =
|
||||
|
||||
# Whether to use custom storage locations
|
||||
# 1: Yes, 0 (default): No
|
||||
use_custom_storage =
|
||||
|
||||
# The path of the virtual SD card directory.
|
||||
# empty (default) will use the user_path
|
||||
sdmc_directory =
|
||||
|
@ -320,8 +320,6 @@ static Frontend::WindowSystemType GetWindowSystemType() {
|
||||
return Frontend::WindowSystemType::X11;
|
||||
else if (platform_name == QStringLiteral("wayland"))
|
||||
return Frontend::WindowSystemType::Wayland;
|
||||
else if (platform_name == QStringLiteral("cocoa"))
|
||||
return Frontend::WindowSystemType::MacOS;
|
||||
|
||||
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
|
@ -304,17 +304,21 @@ void Config::ReadDataStorageValues() {
|
||||
|
||||
Settings::values.use_virtual_sd = ReadSetting(QStringLiteral("use_virtual_sd"), true).toBool();
|
||||
|
||||
Settings::values.use_custom_storage =
|
||||
ReadSetting(QStringLiteral("use_custom_storage"), false).toBool();
|
||||
const std::string nand_dir =
|
||||
ReadSetting(QStringLiteral("nand_directory"), QStringLiteral("")).toString().toStdString();
|
||||
ReadSetting(
|
||||
QStringLiteral("nand_directory"),
|
||||
QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir)))
|
||||
.toString()
|
||||
.toStdString();
|
||||
const std::string sdmc_dir =
|
||||
ReadSetting(QStringLiteral("sdmc_directory"), QStringLiteral("")).toString().toStdString();
|
||||
ReadSetting(
|
||||
QStringLiteral("sdmc_directory"),
|
||||
QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir)))
|
||||
.toString()
|
||||
.toStdString();
|
||||
|
||||
if (Settings::values.use_custom_storage) {
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir, nand_dir);
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir, sdmc_dir);
|
||||
}
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir, nand_dir);
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir, sdmc_dir);
|
||||
|
||||
qt_config->endGroup();
|
||||
}
|
||||
@ -485,8 +489,6 @@ void Config::ReadRendererValues() {
|
||||
ReadSetting(QStringLiteral("graphics_api"), static_cast<u32>(Settings::GraphicsAPI::OpenGL))
|
||||
.toUInt());
|
||||
Settings::values.physical_device = ReadSetting(QStringLiteral("physical_device"), 0).toUInt();
|
||||
Settings::values.async_command_recording = ReadSetting(QStringLiteral("async_command_recording"), true).toBool();
|
||||
Settings::values.spirv_shader_gen = ReadSetting(QStringLiteral("spirv_shader_gen"), false).toBool();
|
||||
Settings::values.use_hw_renderer =
|
||||
ReadSetting(QStringLiteral("use_hw_renderer"), true).toBool();
|
||||
Settings::values.use_hw_shader = ReadSetting(QStringLiteral("use_hw_shader"), true).toBool();
|
||||
@ -874,13 +876,12 @@ void Config::SaveDataStorageValues() {
|
||||
qt_config->beginGroup(QStringLiteral("Data Storage"));
|
||||
|
||||
WriteSetting(QStringLiteral("use_virtual_sd"), Settings::values.use_virtual_sd, true);
|
||||
WriteSetting(QStringLiteral("use_custom_storage"), Settings::values.use_custom_storage, false);
|
||||
WriteSetting(QStringLiteral("nand_directory"),
|
||||
QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
|
||||
QStringLiteral(""));
|
||||
QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir)));
|
||||
WriteSetting(QStringLiteral("sdmc_directory"),
|
||||
QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
|
||||
QStringLiteral(""));
|
||||
QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir)));
|
||||
|
||||
qt_config->endGroup();
|
||||
}
|
||||
@ -1006,8 +1007,6 @@ void Config::SaveRendererValues() {
|
||||
WriteSetting(QStringLiteral("graphics_api"), static_cast<u32>(Settings::values.graphics_api),
|
||||
static_cast<u32>(Settings::GraphicsAPI::OpenGL));
|
||||
WriteSetting(QStringLiteral("physical_device"), Settings::values.physical_device, 0);
|
||||
WriteSetting(QStringLiteral("async_command_recording"), Settings::values.async_command_recording, true);
|
||||
WriteSetting(QStringLiteral("spirv_shader_gen"), Settings::values.spirv_shader_gen, false);
|
||||
WriteSetting(QStringLiteral("use_hw_renderer"), Settings::values.use_hw_renderer, true);
|
||||
WriteSetting(QStringLiteral("use_hw_shader"), Settings::values.use_hw_shader, true);
|
||||
#ifdef __APPLE__
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include <QDesktopServices>
|
||||
#include <QUrl>
|
||||
#include <QMessageBox>
|
||||
#include "citra_qt/configuration/configure_debug.h"
|
||||
#include "citra_qt/debugger/console.h"
|
||||
#include "citra_qt/uisettings.h"
|
||||
@ -12,9 +11,7 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/settings.h"
|
||||
#include "qcheckbox.h"
|
||||
#include "ui_configure_debug.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
ConfigureDebug::ConfigureDebug(QWidget* parent)
|
||||
: QWidget(parent), ui(std::make_unique<Ui::ConfigureDebug>()) {
|
||||
@ -26,36 +23,6 @@ ConfigureDebug::ConfigureDebug(QWidget* parent)
|
||||
QDesktopServices::openUrl(QUrl::fromLocalFile(path));
|
||||
});
|
||||
|
||||
connect(ui->toggle_renderer_debug, &QCheckBox::clicked, this, [this](bool checked) {
|
||||
if (checked && Settings::values.graphics_api == Settings::GraphicsAPI::Vulkan) {
|
||||
try {
|
||||
Vulkan::Instance debug_inst{true};
|
||||
} catch (vk::LayerNotPresentError& err) {
|
||||
ui->toggle_renderer_debug->toggle();
|
||||
QMessageBox::warning(
|
||||
this, tr("Validation layer not available"),
|
||||
tr("Unable to enable debug renderer because the layer "
|
||||
"<strong>VK_LAYER_KHRONOS_validation</strong> is missing. "
|
||||
"Please install the Vulkan SDK or the appropriate package of your distribution"));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
connect(ui->toggle_dump_command_buffers, &QCheckBox::clicked, this, [this](bool checked) {
|
||||
if (checked && Settings::values.graphics_api == Settings::GraphicsAPI::Vulkan) {
|
||||
try {
|
||||
Vulkan::Instance debug_inst{false, true};
|
||||
} catch (vk::LayerNotPresentError& err) {
|
||||
ui->toggle_dump_command_buffers->toggle();
|
||||
QMessageBox::warning(
|
||||
this, tr("Command buffer dumping not available"),
|
||||
tr("Unable to enable command buffer dumping because the layer "
|
||||
"<strong>VK_LAYER_LUNARG_api_dump</strong> is missing. "
|
||||
"Please install the Vulkan SDK or the appropriate package of your distribution"));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const bool is_powered_on = Core::System::GetInstance().IsPoweredOn();
|
||||
ui->toggle_cpu_jit->setEnabled(!is_powered_on);
|
||||
ui->toggle_renderer_debug->setEnabled(!is_powered_on);
|
||||
|
@ -22,6 +22,5 @@ public:
|
||||
void RetranslateUI();
|
||||
void SetConfiguration();
|
||||
|
||||
private:
|
||||
std::unique_ptr<Ui::ConfigureDebug> ui;
|
||||
};
|
||||
|
@ -26,7 +26,6 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
ui->graphics_api_combo->setEnabled(not_running);
|
||||
ui->toggle_shader_jit->setEnabled(not_running);
|
||||
ui->toggle_disk_shader_cache->setEnabled(hw_renderer_enabled && not_running);
|
||||
ui->toggle_async_recording->setEnabled(hw_renderer_enabled && not_running);
|
||||
ui->physical_device_combo->setEnabled(not_running);
|
||||
SetPhysicalDeviceComboVisibility(ui->graphics_api_combo->currentIndex());
|
||||
|
||||
@ -84,8 +83,6 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new);
|
||||
ui->graphics_api_combo->setCurrentIndex(static_cast<int>(Settings::values.graphics_api));
|
||||
ui->physical_device_combo->setCurrentIndex(static_cast<int>(Settings::values.physical_device));
|
||||
ui->toggle_async_recording->setChecked(Settings::values.async_command_recording);
|
||||
ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen);
|
||||
}
|
||||
|
||||
void ConfigureGraphics::ApplyConfiguration() {
|
||||
@ -99,8 +96,6 @@ void ConfigureGraphics::ApplyConfiguration() {
|
||||
Settings::values.graphics_api =
|
||||
static_cast<Settings::GraphicsAPI>(ui->graphics_api_combo->currentIndex());
|
||||
Settings::values.physical_device = static_cast<u16>(ui->physical_device_combo->currentIndex());
|
||||
Settings::values.async_command_recording = ui->toggle_async_recording->isChecked();
|
||||
Settings::values.spirv_shader_gen = ui->spirv_shader_gen->isChecked();
|
||||
}
|
||||
|
||||
void ConfigureGraphics::RetranslateUI() {
|
||||
@ -123,5 +118,4 @@ void ConfigureGraphics::SetPhysicalDeviceComboVisibility(int index) {
|
||||
const bool is_visible = graphics_api == Settings::GraphicsAPI::Vulkan;
|
||||
ui->physical_device_label->setVisible(is_visible);
|
||||
ui->physical_device_combo->setVisible(is_visible);
|
||||
ui->spirv_shader_gen->setVisible(is_visible);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>400</width>
|
||||
<height>513</height>
|
||||
<height>430</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="minimumSize">
|
||||
@ -70,13 +70,6 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="spirv_shader_gen">
|
||||
<property name="text">
|
||||
<string>SPIR-V Shader Generation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
@ -178,16 +171,6 @@
|
||||
<string>Advanced</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_2">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="toggle_async_recording">
|
||||
<property name="toolTip">
|
||||
<string><html><head/><body><p>Offloads command buffer recording and fragment shader generation to a worker thread. Can improve performance especially on weaker systems. Disable if you notice better performance. If unsure leave it enabled,</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Async Command Recording</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="toggle_disk_shader_cache">
|
||||
<property name="toolTip">
|
||||
|
@ -51,42 +51,28 @@ ConfigureStorage::ConfigureStorage(QWidget* parent)
|
||||
ApplyConfiguration();
|
||||
SetConfiguration();
|
||||
});
|
||||
connect(ui->toggle_custom_storage, &QCheckBox::clicked, this, [this]() {
|
||||
ApplyConfiguration();
|
||||
SetConfiguration();
|
||||
});
|
||||
}
|
||||
|
||||
ConfigureStorage::~ConfigureStorage() = default;
|
||||
|
||||
void ConfigureStorage::SetConfiguration() {
|
||||
ui->nand_group->setVisible(Settings::values.use_custom_storage);
|
||||
ui->nand_group->setVisible(Settings::values.use_virtual_sd);
|
||||
QString nand_path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
|
||||
ui->nand_dir_path->setText(nand_path);
|
||||
ui->open_nand_dir->setEnabled(!nand_path.isEmpty());
|
||||
|
||||
ui->sdmc_group->setVisible(Settings::values.use_virtual_sd &&
|
||||
Settings::values.use_custom_storage);
|
||||
ui->sdmc_group->setVisible(Settings::values.use_virtual_sd);
|
||||
QString sdmc_path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
|
||||
ui->sdmc_dir_path->setText(sdmc_path);
|
||||
ui->open_sdmc_dir->setEnabled(!sdmc_path.isEmpty());
|
||||
|
||||
ui->toggle_virtual_sd->setChecked(Settings::values.use_virtual_sd);
|
||||
ui->toggle_custom_storage->setChecked(Settings::values.use_custom_storage);
|
||||
|
||||
ui->storage_group->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
}
|
||||
|
||||
void ConfigureStorage::ApplyConfiguration() {
|
||||
Settings::values.use_virtual_sd = ui->toggle_virtual_sd->isChecked();
|
||||
Settings::values.use_custom_storage = ui->toggle_custom_storage->isChecked();
|
||||
|
||||
if (!Settings::values.use_custom_storage) {
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir,
|
||||
GetDefaultUserPath(FileUtil::UserPath::NANDDir));
|
||||
FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir,
|
||||
GetDefaultUserPath(FileUtil::UserPath::SDMCDir));
|
||||
}
|
||||
}
|
||||
|
||||
void ConfigureStorage::RetranslateUI() {
|
||||
|
@ -34,147 +34,131 @@
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="custom_storage_group">
|
||||
<widget class="QGroupBox" name="nand_group">
|
||||
<property name="title">
|
||||
<string>Custom Storage</string>
|
||||
<string/>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_6">
|
||||
<layout class="QVBoxLayout" name="verticalLayout">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="toggle_custom_storage">
|
||||
<property name="text">
|
||||
<string>Use Custom Storage</string>
|
||||
</property>
|
||||
</widget>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<item>
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>NAND Directory</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="nand_dir_path">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="open_nand_dir">
|
||||
<property name="text">
|
||||
<string>Open</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="nand_group">
|
||||
<property name="title">
|
||||
<string/>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<item>
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>NAND Directory</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="nand_dir_path">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="open_nand_dir">
|
||||
<property name="text">
|
||||
<string>Open</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="text">
|
||||
<string>NOTE: This does not move the contents of the previous directory to the new one.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="horizontalSpacer_3">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>40</width>
|
||||
<height>20</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="change_nand_dir">
|
||||
<property name="text">
|
||||
<string>Change</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="text">
|
||||
<string>NOTE: This does not move the contents of the previous directory to the new one.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="horizontalSpacer_3">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>40</width>
|
||||
<height>20</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="change_nand_dir">
|
||||
<property name="text">
|
||||
<string>Change</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="sdmc_group">
|
||||
<property name="title">
|
||||
<string/>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_4">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_3">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>SDMC Directory</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="sdmc_dir_path">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="open_sdmc_dir">
|
||||
<property name="text">
|
||||
<string>Open</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="sdmc_group">
|
||||
<property name="title">
|
||||
<string/>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_4">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_3">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>SDMC Directory</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="sdmc_dir_path">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="open_sdmc_dir">
|
||||
<property name="text">
|
||||
<string>Open</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_4">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="text">
|
||||
<string>NOTE: This does not move the contents of the previous directory to the new one.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="horizontalSpacer_4">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>40</width>
|
||||
<height>20</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="change_sdmc_dir">
|
||||
<property name="text">
|
||||
<string>Change</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_4">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="text">
|
||||
<string>NOTE: This does not move the contents of the previous directory to the new one.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="horizontalSpacer_4">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>40</width>
|
||||
<height>20</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="change_sdmc_dir">
|
||||
<property name="text">
|
||||
<string>Change</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
|
@ -3,7 +3,6 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <QApplication>
|
||||
#include <QDir>
|
||||
#include <QFileInfo>
|
||||
#include <QFileSystemWatcher>
|
||||
#include <QHBoxLayout>
|
||||
@ -32,8 +31,6 @@
|
||||
#include "core/file_sys/archive_extsavedata.h"
|
||||
#include "core/file_sys/archive_source_sd_savedata.h"
|
||||
#include "core/hle/service/fs/archive.h"
|
||||
#include "core/settings.h"
|
||||
#include "qcursor.h"
|
||||
|
||||
GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist, QObject* parent)
|
||||
: QObject(parent), gamelist{gamelist} {}
|
||||
@ -465,7 +462,6 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location));
|
||||
}
|
||||
|
||||
@ -479,27 +475,19 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
|
||||
QAction* open_texture_load_location =
|
||||
context_menu.addAction(tr("Open Custom Texture Location"));
|
||||
QAction* open_mods_location = context_menu.addAction(tr("Open Mods Location"));
|
||||
QMenu* shader_menu = context_menu.addMenu(tr("Disk Shader Cache"));
|
||||
QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
|
||||
QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
|
||||
|
||||
QAction* open_shader_cache_location = shader_menu->addAction(tr("Open Shader Cache Location"));
|
||||
shader_menu->addSeparator();
|
||||
QAction* delete_opengl_disk_shader_cache =
|
||||
shader_menu->addAction(tr("Delete OpenGL Shader Cache"));
|
||||
QAction* delete_vulkan_disk_shader_cache =
|
||||
shader_menu->addAction(tr("Delete Vulkan Shader Cache"));
|
||||
|
||||
const bool is_application =
|
||||
0x0004000000000000 <= program_id && program_id <= 0x00040000FFFFFFFF;
|
||||
|
||||
std::string sdmc_dir = FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir);
|
||||
open_save_location->setEnabled(
|
||||
open_save_location->setVisible(
|
||||
is_application && FileUtil::Exists(FileSys::ArchiveSource_SDSaveData::GetSaveDataPathFor(
|
||||
sdmc_dir, program_id)));
|
||||
|
||||
if (extdata_id) {
|
||||
open_extdata_location->setEnabled(
|
||||
open_extdata_location->setVisible(
|
||||
is_application &&
|
||||
FileUtil::Exists(FileSys::GetExtDataPathFromId(sdmc_dir, extdata_id)));
|
||||
} else {
|
||||
@ -507,9 +495,9 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
|
||||
}
|
||||
|
||||
auto media_type = Service::AM::GetTitleMediaType(program_id);
|
||||
open_application_location->setEnabled(path.toStdString() ==
|
||||
open_application_location->setVisible(path.toStdString() ==
|
||||
Service::AM::GetTitleContentPath(media_type, program_id));
|
||||
open_update_location->setEnabled(
|
||||
open_update_location->setVisible(
|
||||
is_application && FileUtil::Exists(Service::AM::GetTitlePath(Service::FS::MediaType::SDMC,
|
||||
program_id + 0xe00000000) +
|
||||
"content/"));
|
||||
@ -548,13 +536,6 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
|
||||
emit OpenFolderRequested(program_id, GameListOpenTarget::TEXTURE_LOAD);
|
||||
}
|
||||
});
|
||||
connect(open_texture_load_location, &QAction::triggered, this, [this, program_id] {
|
||||
if (FileUtil::CreateFullPath(fmt::format("{}textures/{:016X}/",
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
|
||||
program_id))) {
|
||||
emit OpenFolderRequested(program_id, GameListOpenTarget::TEXTURE_LOAD);
|
||||
}
|
||||
});
|
||||
connect(open_mods_location, &QAction::triggered, this, [this, program_id] {
|
||||
if (FileUtil::CreateFullPath(fmt::format("{}mods/{:016X}/",
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
|
||||
@ -567,26 +548,6 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
|
||||
connect(navigate_to_gamedb_entry, &QAction::triggered, this, [this, program_id]() {
|
||||
emit NavigateToGamedbEntryRequested(program_id, compatibility_list);
|
||||
});
|
||||
connect(open_shader_cache_location, &QAction::triggered, this, [this, program_id] {
|
||||
if (FileUtil::CreateFullPath(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir))) {
|
||||
emit OpenFolderRequested(program_id, GameListOpenTarget::SHADER_CACHE);
|
||||
}
|
||||
});
|
||||
connect(delete_opengl_disk_shader_cache, &QAction::triggered, this, [program_id] {
|
||||
const std::string_view cache_type =
|
||||
Settings::values.separable_shader ? "separable" : "conventional";
|
||||
const std::string path = fmt::format("{}opengl/precompiled/{}/{:016X}.bin",
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir),
|
||||
cache_type, program_id);
|
||||
QFile file{QString::fromStdString(path)};
|
||||
file.remove();
|
||||
});
|
||||
connect(delete_vulkan_disk_shader_cache, &QAction::triggered, this, [] {
|
||||
const std::string path =
|
||||
fmt::format("{}vulkan", FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir));
|
||||
QDir dir{QString::fromStdString(path)};
|
||||
dir.removeRecursively();
|
||||
});
|
||||
};
|
||||
|
||||
void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) {
|
||||
|
@ -37,7 +37,6 @@ enum class GameListOpenTarget {
|
||||
TEXTURE_DUMP = 4,
|
||||
TEXTURE_LOAD = 5,
|
||||
MODS = 6,
|
||||
SHADER_CACHE = 7
|
||||
};
|
||||
|
||||
class GameList : public QWidget {
|
||||
|
@ -1340,35 +1340,26 @@ void GMainWindow::OnGameListOpenFolder(u64 data_id, GameListOpenTarget target) {
|
||||
path = Service::AM::GetTitlePath(media_type, data_id) + "content/";
|
||||
break;
|
||||
}
|
||||
case GameListOpenTarget::UPDATE_DATA: {
|
||||
case GameListOpenTarget::UPDATE_DATA:
|
||||
open_target = "Update Data";
|
||||
path = Service::AM::GetTitlePath(Service::FS::MediaType::SDMC, data_id + 0xe00000000) +
|
||||
"content/";
|
||||
break;
|
||||
}
|
||||
case GameListOpenTarget::TEXTURE_DUMP: {
|
||||
case GameListOpenTarget::TEXTURE_DUMP:
|
||||
open_target = "Dumped Textures";
|
||||
path = fmt::format("{}textures/{:016X}/",
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), data_id);
|
||||
break;
|
||||
}
|
||||
case GameListOpenTarget::TEXTURE_LOAD: {
|
||||
case GameListOpenTarget::TEXTURE_LOAD:
|
||||
open_target = "Custom Textures";
|
||||
path = fmt::format("{}textures/{:016X}/",
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::LoadDir), data_id);
|
||||
break;
|
||||
}
|
||||
case GameListOpenTarget::MODS: {
|
||||
case GameListOpenTarget::MODS:
|
||||
open_target = "Mods";
|
||||
path = fmt::format("{}mods/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
|
||||
data_id);
|
||||
break;
|
||||
}
|
||||
case GameListOpenTarget::SHADER_CACHE: {
|
||||
open_target = "Shader Cache";
|
||||
path = FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Frontend, "Unexpected target {}", static_cast<int>(target));
|
||||
return;
|
||||
|
@ -58,7 +58,6 @@ add_library(common STATIC
|
||||
announce_multiplayer_room.h
|
||||
archives.h
|
||||
assert.h
|
||||
atomic_ops.h
|
||||
detached_tasks.cpp
|
||||
detached_tasks.h
|
||||
bit_field.h
|
||||
@ -129,7 +128,7 @@ if(ARCHITECTURE_x86_64)
|
||||
x64/xbyak_abi.h
|
||||
x64/xbyak_util.h
|
||||
)
|
||||
elseif(ARCHITECTURE_arm64)
|
||||
elseif(ARCHITECTURE_ARM64)
|
||||
target_sources(common
|
||||
PRIVATE
|
||||
aarch64/cpu_detect.cpp
|
||||
|
@ -1,166 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#if _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <cstring>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
#if _MSC_VER
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||
const u8 result =
|
||||
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
||||
return result == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||
const u16 result =
|
||||
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
||||
return result == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||
const u32 result =
|
||||
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
||||
return result == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||
const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
|
||||
value, expected);
|
||||
return result == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||
return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
||||
value[0],
|
||||
reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
|
||||
u8& actual) {
|
||||
actual =
|
||||
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
|
||||
u16& actual) {
|
||||
actual =
|
||||
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
|
||||
u32& actual) {
|
||||
actual =
|
||||
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
|
||||
u64& actual) {
|
||||
actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
|
||||
expected);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
|
||||
u128& actual) {
|
||||
const bool result =
|
||||
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
||||
value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||
actual = expected;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
||||
u128 result{};
|
||||
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
|
||||
result[0], reinterpret_cast<__int64*>(result.data()));
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||
unsigned __int128 value_a;
|
||||
unsigned __int128 expected_a;
|
||||
std::memcpy(&value_a, value.data(), sizeof(u128));
|
||||
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
||||
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
|
||||
u8& actual) {
|
||||
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
|
||||
u16& actual) {
|
||||
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
|
||||
u32& actual) {
|
||||
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
|
||||
u64& actual) {
|
||||
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
|
||||
u128& actual) {
|
||||
unsigned __int128 value_a;
|
||||
unsigned __int128 expected_a;
|
||||
unsigned __int128 actual_a;
|
||||
std::memcpy(&value_a, value.data(), sizeof(u128));
|
||||
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
||||
actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||
std::memcpy(actual.data(), &actual_a, sizeof(u128));
|
||||
return actual_a == expected_a;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
||||
unsigned __int128 zeros_a = 0;
|
||||
unsigned __int128 result_a =
|
||||
__sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a);
|
||||
|
||||
u128 result;
|
||||
std::memcpy(result.data(), &result_a, sizeof(u128));
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace Common
|
@ -24,7 +24,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
@ -51,9 +50,6 @@ typedef double f64; ///< 64-bit floating point
|
||||
typedef u32 VAddr; ///< Represents a pointer in the userspace virtual address space.
|
||||
typedef u32 PAddr; ///< Represents a pointer in the ARM11 physical address space.
|
||||
|
||||
using u128 = std::array<std::uint64_t, 2>;
|
||||
static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
|
||||
|
||||
// An inheritable class to disallow the copy constructor and operator= functions
|
||||
class NonCopyable {
|
||||
protected:
|
||||
|
@ -776,9 +776,6 @@ const std::string& GetDefaultUserPath(UserPath path) {
|
||||
}
|
||||
|
||||
const void UpdateUserPath(UserPath path, const std::string& filename) {
|
||||
if (filename.empty()) {
|
||||
return;
|
||||
}
|
||||
if (!FileUtil::IsDirectory(filename)) {
|
||||
LOG_ERROR(Common_Filesystem, "Path is not a directory. UserPath: {} filename: {}", path,
|
||||
filename);
|
||||
|
@ -10,8 +10,6 @@
|
||||
|
||||
namespace Common {
|
||||
|
||||
constexpr float PI = 3.14159265f;
|
||||
|
||||
template <class T>
|
||||
struct Rectangle {
|
||||
T left{};
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <bitset>
|
||||
#include <initializer_list>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak.h>
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
@ -5,7 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak.h>
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
@ -12,8 +12,6 @@ add_library(core STATIC
|
||||
arm/dyncom/arm_dyncom_thumb.h
|
||||
arm/dyncom/arm_dyncom_trans.cpp
|
||||
arm/dyncom/arm_dyncom_trans.h
|
||||
arm/exclusive_monitor.cpp
|
||||
arm/exclusive_monitor.h
|
||||
arm/skyeye_common/arm_regformat.h
|
||||
arm/skyeye_common/armstate.cpp
|
||||
arm/skyeye_common/armstate.h
|
||||
@ -482,14 +480,12 @@ if (ENABLE_WEB_SERVICE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
|
||||
target_sources(core PRIVATE
|
||||
arm/dynarmic/arm_dynarmic.cpp
|
||||
arm/dynarmic/arm_dynarmic.h
|
||||
arm/dynarmic/arm_dynarmic_cp15.cpp
|
||||
arm/dynarmic/arm_dynarmic_cp15.h
|
||||
arm/dynarmic/arm_exclusive_monitor.cpp
|
||||
arm/dynarmic/arm_exclusive_monitor.h
|
||||
)
|
||||
target_link_libraries(core PRIVATE dynarmic)
|
||||
endif()
|
||||
|
@ -122,9 +122,6 @@ public:
|
||||
*/
|
||||
virtual void InvalidateCacheRange(u32 start_address, std::size_t length) = 0;
|
||||
|
||||
/// Clears the exclusive monitor's state.
|
||||
virtual void ClearExclusiveState() = 0;
|
||||
|
||||
/// Notify CPU emulation that page tables have changed
|
||||
virtual void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) = 0;
|
||||
|
||||
|
@ -3,14 +3,12 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <dynarmic/interface/A32/a32.h>
|
||||
#include <dynarmic/interface/A32/context.h>
|
||||
#include <dynarmic/interface/optimization_flags.h>
|
||||
#include <dynarmic/A32/a32.h>
|
||||
#include <dynarmic/A32/context.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
||||
#include "core/arm/dynarmic/arm_exclusive_monitor.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/gdbstub/gdbstub.h"
|
||||
@ -102,23 +100,10 @@ public:
|
||||
memory.Write64(vaddr, value);
|
||||
}
|
||||
|
||||
bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
|
||||
return memory.WriteExclusive8(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
|
||||
return memory.WriteExclusive16(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
|
||||
return memory.WriteExclusive32(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
|
||||
return memory.WriteExclusive64(vaddr, value, expected);
|
||||
}
|
||||
|
||||
void InterpreterFallback(VAddr pc, std::size_t num_instructions) override {
|
||||
// Should never happen.
|
||||
UNREACHABLE_MSG("InterpeterFallback reached with pc = 0x{:08x}, code = 0x{:08x}, num = {}",
|
||||
pc, MemoryReadCode(pc).value(), num_instructions);
|
||||
pc, MemoryReadCode(pc), num_instructions);
|
||||
}
|
||||
|
||||
void CallSVC(std::uint32_t swi) override {
|
||||
@ -129,8 +114,6 @@ public:
|
||||
switch (exception) {
|
||||
case Dynarmic::A32::Exception::UndefinedInstruction:
|
||||
case Dynarmic::A32::Exception::UnpredictableInstruction:
|
||||
case Dynarmic::A32::Exception::DecodeError:
|
||||
case Dynarmic::A32::Exception::NoExecuteFault:
|
||||
break;
|
||||
case Dynarmic::A32::Exception::Breakpoint:
|
||||
if (GDBStub::IsConnected()) {
|
||||
@ -147,11 +130,10 @@ public:
|
||||
case Dynarmic::A32::Exception::Yield:
|
||||
case Dynarmic::A32::Exception::PreloadData:
|
||||
case Dynarmic::A32::Exception::PreloadDataWithIntentToWrite:
|
||||
case Dynarmic::A32::Exception::PreloadInstruction:
|
||||
return;
|
||||
}
|
||||
ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", exception,
|
||||
pc, MemoryReadCode(pc).value());
|
||||
pc, MemoryReadCode(pc));
|
||||
}
|
||||
|
||||
void AddTicks(std::uint64_t ticks) override {
|
||||
@ -167,12 +149,10 @@ public:
|
||||
Memory::MemorySystem& memory;
|
||||
};
|
||||
|
||||
ARM_Dynarmic::ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
|
||||
std::shared_ptr<Core::Timing::Timer> timer_,
|
||||
Core::ExclusiveMonitor& exclusive_monitor_)
|
||||
: ARM_Interface(core_id_, timer_), system(*system_), memory(memory_),
|
||||
cb(std::make_unique<DynarmicUserCallbacks>(*this)),
|
||||
exclusive_monitor{dynamic_cast<Core::DynarmicExclusiveMonitor&>(exclusive_monitor_)} {
|
||||
ARM_Dynarmic::ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
|
||||
std::shared_ptr<Core::Timing::Timer> timer)
|
||||
: ARM_Interface(id, timer), system(*system), memory(memory),
|
||||
cb(std::make_unique<DynarmicUserCallbacks>(*this)) {
|
||||
SetPageTable(memory.GetCurrentPageTable());
|
||||
}
|
||||
|
||||
@ -228,7 +208,8 @@ u32 ARM_Dynarmic::GetVFPSystemReg(VFPSystemRegister reg) const {
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown VFP system register: {}", reg);
|
||||
}
|
||||
return UINT_MAX;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::SetVFPSystemReg(VFPSystemRegister reg, u32 value) {
|
||||
@ -314,10 +295,6 @@ void ARM_Dynarmic::InvalidateCacheRange(u32 start_address, std::size_t length) {
|
||||
jit->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::ClearExclusiveState() {
|
||||
jit->ClearExclusiveState();
|
||||
}
|
||||
|
||||
std::shared_ptr<Memory::PageTable> ARM_Dynarmic::GetPageTable() const {
|
||||
return current_page_table;
|
||||
}
|
||||
@ -355,11 +332,6 @@ std::unique_ptr<Dynarmic::A32::Jit> ARM_Dynarmic::MakeJit() {
|
||||
config.page_table = ¤t_page_table->GetPointerArray();
|
||||
config.coprocessors[15] = std::make_shared<DynarmicCP15>(cp15_state);
|
||||
config.define_unpredictable_behaviour = true;
|
||||
|
||||
// Multi-process state
|
||||
config.processor_id = GetID();
|
||||
config.global_monitor = &exclusive_monitor.monitor;
|
||||
|
||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <dynarmic/interface/A32/a32.h>
|
||||
#include <dynarmic/A32/a32.h>
|
||||
#include "common/common_types.h"
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
||||
@ -17,18 +17,15 @@ class MemorySystem;
|
||||
} // namespace Memory
|
||||
|
||||
namespace Core {
|
||||
class DynarmicExclusiveMonitor;
|
||||
class ExclusiveMonitor;
|
||||
class System;
|
||||
} // namespace Core
|
||||
}
|
||||
|
||||
class DynarmicUserCallbacks;
|
||||
|
||||
class ARM_Dynarmic final : public ARM_Interface {
|
||||
public:
|
||||
explicit ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
|
||||
std::shared_ptr<Core::Timing::Timer> timer,
|
||||
Core::ExclusiveMonitor& exclusive_monitor_);
|
||||
ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
|
||||
std::shared_ptr<Core::Timing::Timer> timer);
|
||||
~ARM_Dynarmic() override;
|
||||
|
||||
void Run() override;
|
||||
@ -55,7 +52,6 @@ public:
|
||||
|
||||
void ClearInstructionCache() override;
|
||||
void InvalidateCacheRange(u32 start_address, std::size_t length) override;
|
||||
void ClearExclusiveState() override;
|
||||
void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) override;
|
||||
void PurgeState() override;
|
||||
|
||||
@ -73,7 +69,6 @@ private:
|
||||
|
||||
u32 fpexc = 0;
|
||||
CP15State cp15_state;
|
||||
Core::DynarmicExclusiveMonitor& exclusive_monitor;
|
||||
|
||||
Dynarmic::A32::Jit* jit = nullptr;
|
||||
std::shared_ptr<Memory::PageTable> current_page_table = nullptr;
|
||||
|
@ -5,7 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <dynarmic/interface/A32/coprocessor.h>
|
||||
#include <dynarmic/A32/coprocessor.h>
|
||||
#include "common/common_types.h"
|
||||
|
||||
struct CP15State {
|
||||
|
@ -1,59 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/arm/dynarmic/arm_exclusive_monitor.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::MemorySystem& memory_,
|
||||
std::size_t core_count_)
|
||||
: monitor{core_count_}, memory{memory_} {}
|
||||
|
||||
DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
|
||||
|
||||
u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
|
||||
return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
|
||||
}
|
||||
|
||||
u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
|
||||
return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
|
||||
}
|
||||
|
||||
u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
|
||||
return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
|
||||
}
|
||||
|
||||
u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
|
||||
return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
|
||||
}
|
||||
|
||||
void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
|
||||
monitor.ClearProcessor(core_index);
|
||||
}
|
||||
|
||||
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
|
||||
return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
|
||||
return memory.WriteExclusive8(vaddr, value, expected);
|
||||
});
|
||||
}
|
||||
|
||||
bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
|
||||
return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
|
||||
return memory.WriteExclusive16(vaddr, value, expected);
|
||||
});
|
||||
}
|
||||
|
||||
bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
|
||||
return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
|
||||
return memory.WriteExclusive32(vaddr, value, expected);
|
||||
});
|
||||
}
|
||||
|
||||
bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
|
||||
return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
|
||||
return memory.WriteExclusive64(vaddr, value, expected);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Core
|
@ -1,40 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <dynarmic/interface/exclusive_monitor.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic.h"
|
||||
#include "core/arm/exclusive_monitor.h"
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
|
||||
public:
|
||||
explicit DynarmicExclusiveMonitor(Memory::MemorySystem& memory_, std::size_t core_count_);
|
||||
~DynarmicExclusiveMonitor() override;
|
||||
|
||||
u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
|
||||
u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
|
||||
u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
|
||||
u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
|
||||
void ClearExclusive(std::size_t core_index) override;
|
||||
|
||||
bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
|
||||
bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
|
||||
bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
|
||||
bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
|
||||
|
||||
private:
|
||||
friend class ::ARM_Dynarmic;
|
||||
Dynarmic::ExclusiveMonitor monitor;
|
||||
Memory::MemorySystem& memory;
|
||||
};
|
||||
|
||||
} // namespace Core
|
@ -30,7 +30,6 @@ public:
|
||||
|
||||
void ClearInstructionCache() override;
|
||||
void InvalidateCacheRange(u32 start_address, std::size_t length) override;
|
||||
void ClearExclusiveState() override{};
|
||||
|
||||
void SetPC(u32 pc) override;
|
||||
u32 GetPC() const override;
|
||||
|
@ -1,26 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
|
||||
#include "core/arm/dynarmic/arm_exclusive_monitor.h"
|
||||
#endif
|
||||
#include "core/arm/exclusive_monitor.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
ExclusiveMonitor::~ExclusiveMonitor() = default;
|
||||
|
||||
std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
|
||||
std::size_t num_cores) {
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
|
||||
if (Settings::values.use_cpu_jit) {
|
||||
return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores);
|
||||
}
|
||||
#endif
|
||||
// TODO(merry): Passthrough exclusive monitor
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace Core
|
@ -1,35 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ExclusiveMonitor {
|
||||
public:
|
||||
virtual ~ExclusiveMonitor();
|
||||
|
||||
virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual void ClearExclusive(std::size_t core_index) = 0;
|
||||
|
||||
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
|
||||
virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
|
||||
virtual bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) = 0;
|
||||
virtual bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) = 0;
|
||||
};
|
||||
|
||||
std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
|
||||
std::size_t num_cores);
|
||||
|
||||
} // namespace Core
|
@ -13,8 +13,7 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "common/texture.h"
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/arm/exclusive_monitor.h"
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
|
||||
#include "core/arm/dynarmic/arm_dynarmic.h"
|
||||
#endif
|
||||
#include "core/arm/dyncom/arm_dyncom.h"
|
||||
@ -365,12 +364,11 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
|
||||
kernel = std::make_unique<Kernel::KernelSystem>(
|
||||
*memory, *timing, [this] { PrepareReschedule(); }, system_mode, num_cores, n3ds_mode);
|
||||
|
||||
exclusive_monitor = MakeExclusiveMonitor(*memory, num_cores);
|
||||
if (Settings::values.use_cpu_jit) {
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
|
||||
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
|
||||
for (u32 i = 0; i < num_cores; ++i) {
|
||||
cpu_cores.push_back(std::make_shared<ARM_Dynarmic>(
|
||||
this, *memory, i, timing->GetTimer(i), *exclusive_monitor));
|
||||
cpu_cores.push_back(
|
||||
std::make_shared<ARM_Dynarmic>(this, *memory, i, timing->GetTimer(i)));
|
||||
}
|
||||
#else
|
||||
for (u32 i = 0; i < num_cores; ++i) {
|
||||
@ -543,7 +541,6 @@ void System::Shutdown(bool is_deserializing) {
|
||||
dsp_core.reset();
|
||||
kernel.reset();
|
||||
cpu_cores.clear();
|
||||
exclusive_monitor.reset();
|
||||
timing.reset();
|
||||
|
||||
if (video_dumper && video_dumper->IsDumping()) {
|
||||
|
@ -61,7 +61,6 @@ class RendererBase;
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ExclusiveMonitor;
|
||||
class Timing;
|
||||
|
||||
class System {
|
||||
@ -362,8 +361,6 @@ private:
|
||||
std::unique_ptr<Kernel::KernelSystem> kernel;
|
||||
std::unique_ptr<Timing> timing;
|
||||
|
||||
std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
|
||||
|
||||
private:
|
||||
static System s_instance;
|
||||
|
||||
|
@ -174,22 +174,6 @@ void Timing::Timer::MoveEvents() {
|
||||
}
|
||||
}
|
||||
|
||||
u32 Timing::Timer::StartAdjust() {
|
||||
ASSERT((adjust_value_curr_handle & 1) == 0); // Should always be even
|
||||
adjust_value_last = std::chrono::steady_clock::now();
|
||||
return ++adjust_value_curr_handle;
|
||||
}
|
||||
|
||||
void Timing::Timer::EndAdjust(u32 start_adjust_handle) {
|
||||
std::chrono::time_point<std::chrono::steady_clock> new_timer = std::chrono::steady_clock::now();
|
||||
ASSERT(new_timer >= adjust_value_last && start_adjust_handle == adjust_value_curr_handle);
|
||||
AddTicks(nsToCycles(static_cast<float>(
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(new_timer - adjust_value_last)
|
||||
.count() /
|
||||
cpu_clock_scale)));
|
||||
++adjust_value_curr_handle;
|
||||
}
|
||||
|
||||
s64 Timing::Timer::GetMaxSliceLength() const {
|
||||
const auto& next_event = event_queue.begin();
|
||||
if (next_event != event_queue.end()) {
|
||||
|
@ -203,11 +203,6 @@ public:
|
||||
|
||||
void MoveEvents();
|
||||
|
||||
// Use these two functions to adjust the guest system tick on host blocking operations, so
|
||||
// that the guest can tell how much time passed during the host call.
|
||||
u32 StartAdjust();
|
||||
void EndAdjust(u32 start_adjust_handle);
|
||||
|
||||
private:
|
||||
friend class Timing;
|
||||
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
|
||||
@ -232,9 +227,6 @@ public:
|
||||
s64 downcount = MAX_SLICE_LENGTH;
|
||||
s64 executed_ticks = 0;
|
||||
u64 idled_cycles = 0;
|
||||
|
||||
std::chrono::time_point<std::chrono::steady_clock> adjust_value_last;
|
||||
u32 adjust_value_curr_handle = 0;
|
||||
// Stores a scaling for the internal clockspeed. Changing this number results in
|
||||
// under/overclocking the guest cpu
|
||||
double cpu_clock_scale = 1.0;
|
||||
|
@ -16,9 +16,7 @@ namespace Frontend {
|
||||
/// WindowInformation
|
||||
enum class WindowSystemType : u8 {
|
||||
Headless,
|
||||
Android,
|
||||
Windows,
|
||||
MacOS,
|
||||
X11,
|
||||
Wayland,
|
||||
};
|
||||
|
@ -849,14 +849,14 @@ static void ReadMemory() {
|
||||
SendReply("E01");
|
||||
}
|
||||
|
||||
if (!Memory::IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
|
||||
addr)) {
|
||||
auto& memory = Core::System::GetInstance().Memory();
|
||||
if (!memory.IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
|
||||
addr)) {
|
||||
return SendReply("E00");
|
||||
}
|
||||
|
||||
std::vector<u8> data(len);
|
||||
Core::System::GetInstance().Memory().ReadBlock(
|
||||
*Core::System::GetInstance().Kernel().GetCurrentProcess(), addr, data.data(), len);
|
||||
memory.ReadBlock(addr, data.data(), len);
|
||||
|
||||
MemToGdbHex(reply, data.data(), len);
|
||||
reply[len * 2] = '\0';
|
||||
@ -873,16 +873,16 @@ static void WriteMemory() {
|
||||
auto len_pos = std::find(start_offset, command_buffer + command_length, ':');
|
||||
u32 len = HexToInt(start_offset, static_cast<u32>(len_pos - start_offset));
|
||||
|
||||
if (!Memory::IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
|
||||
addr)) {
|
||||
auto& memory = Core::System::GetInstance().Memory();
|
||||
if (!memory.IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
|
||||
addr)) {
|
||||
return SendReply("E00");
|
||||
}
|
||||
|
||||
std::vector<u8> data(len);
|
||||
|
||||
GdbHexToMem(data.data(), len_pos + 1, len);
|
||||
Core::System::GetInstance().Memory().WriteBlock(
|
||||
*Core::System::GetInstance().Kernel().GetCurrentProcess(), addr, data.data(), len);
|
||||
memory.WriteBlock(addr, data.data(), len);
|
||||
Core::GetRunningCore().ClearInstructionCache();
|
||||
SendReply("OK");
|
||||
}
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
@ -39,7 +38,6 @@
|
||||
#include "core/hle/kernel/wait_object.h"
|
||||
#include "core/hle/lock.h"
|
||||
#include "core/hle/result.h"
|
||||
#include "core/hle/service/service.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@ -374,7 +372,7 @@ ResultCode SVC::UnmapMemoryBlock(Handle handle, u32 addr) {
|
||||
|
||||
/// Connect to an OS service given the port name, returns the handle to the port to out
|
||||
ResultCode SVC::ConnectToPort(Handle* out_handle, VAddr port_name_address) {
|
||||
if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), port_name_address))
|
||||
if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), port_name_address))
|
||||
return ERR_NOT_FOUND;
|
||||
|
||||
static constexpr std::size_t PortNameMaxLength = 11;
|
||||
@ -541,7 +539,7 @@ ResultCode SVC::WaitSynchronizationN(s32* out, VAddr handles_address, s32 handle
|
||||
bool wait_all, s64 nano_seconds) {
|
||||
Thread* thread = kernel.GetCurrentThreadManager().GetCurrentThread();
|
||||
|
||||
if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
|
||||
if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
|
||||
return ERR_INVALID_POINTER;
|
||||
|
||||
// NOTE: on real hardware, there is no nullptr check for 'out' (tested with firmware 4.4). If
|
||||
@ -687,7 +685,7 @@ static ResultCode ReceiveIPCRequest(Kernel::KernelSystem& kernel, Memory::Memory
|
||||
/// In a single operation, sends a IPC reply and waits for a new request.
|
||||
ResultCode SVC::ReplyAndReceive(s32* index, VAddr handles_address, s32 handle_count,
|
||||
Handle reply_target) {
|
||||
if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
|
||||
if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
|
||||
return ERR_INVALID_POINTER;
|
||||
|
||||
// Check if 'handle_count' is invalid
|
||||
|
@ -337,8 +337,7 @@ ResultVal<std::shared_ptr<Thread>> KernelSystem::CreateThread(
|
||||
}
|
||||
|
||||
// TODO(yuriks): Other checks, returning 0xD9001BEA
|
||||
|
||||
if (!Memory::IsValidVirtualAddress(*owner_process, entry_point)) {
|
||||
if (!memory.IsValidVirtualAddress(*owner_process, entry_point)) {
|
||||
LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:08x}", name, entry_point);
|
||||
// TODO: Verify error
|
||||
return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
|
||||
|
@ -492,10 +492,7 @@ void NWM_UDS::HandleDeauthenticationFrame(const Network::WifiPacket& packet) {
|
||||
auto node_it = std::find_if(node_info.begin(), node_info.end(), [&node](const NodeInfo& info) {
|
||||
return info.network_node_id == node.node_id;
|
||||
});
|
||||
if (node_it == node_info.end()) {
|
||||
LOG_ERROR(Service_NWM, "node_it is last node of node_info");
|
||||
return;
|
||||
}
|
||||
ASSERT(node_it != node_info.end());
|
||||
|
||||
connection_status.node_bitmask &= ~(1 << (node.node_id - 1));
|
||||
connection_status.changed_nodes |= 1 << (node.node_id - 1);
|
||||
@ -1100,6 +1097,9 @@ void NWM_UDS::SendTo(Kernel::HLERequestContext& ctx) {
|
||||
u32 data_size = rp.Pop<u32>();
|
||||
u8 flags = rp.Pop<u8>();
|
||||
|
||||
// There should never be a dest_node_id of 0
|
||||
ASSERT(dest_node_id != 0);
|
||||
|
||||
std::vector<u8> input_buffer = rp.PopStaticBuffer();
|
||||
ASSERT(input_buffer.size() >= data_size);
|
||||
input_buffer.resize(data_size);
|
||||
@ -1114,14 +1114,6 @@ void NWM_UDS::SendTo(Kernel::HLERequestContext& ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
// There should never be a dest_node_id of 0
|
||||
if (dest_node_id == 0) {
|
||||
rb.Push(ResultCode(ErrorDescription::NotFound, ErrorModule::UDS,
|
||||
ErrorSummary::WrongArgument, ErrorLevel::Status));
|
||||
LOG_ERROR(Service_NWM, "dest_node_id is 0");
|
||||
return;
|
||||
}
|
||||
|
||||
if (dest_node_id == connection_status.network_node_id) {
|
||||
LOG_ERROR(Service_NWM, "tried to send packet to itself");
|
||||
rb.Push(ResultCode(ErrorDescription::NotFound, ErrorModule::UDS,
|
||||
|
@ -212,25 +212,19 @@ struct CTRPollFD {
|
||||
|
||||
/// Translates the resulting events of a Poll operation from 3ds specific to platform
|
||||
/// specific
|
||||
static u32 TranslateToPlatform(Events input_event, bool isOutput) {
|
||||
#if _WIN32
|
||||
constexpr bool isWin = true;
|
||||
#else
|
||||
constexpr bool isWin = false;
|
||||
#endif
|
||||
|
||||
static u32 TranslateToPlatform(Events input_event) {
|
||||
u32 ret = 0;
|
||||
if (input_event.pollin)
|
||||
ret |= POLLIN;
|
||||
if (input_event.pollpri && !isWin)
|
||||
if (input_event.pollpri)
|
||||
ret |= POLLPRI;
|
||||
if (input_event.pollhup && (!isWin || isOutput))
|
||||
if (input_event.pollhup)
|
||||
ret |= POLLHUP;
|
||||
if (input_event.pollerr && (!isWin || isOutput))
|
||||
if (input_event.pollerr)
|
||||
ret |= POLLERR;
|
||||
if (input_event.pollout)
|
||||
ret |= POLLOUT;
|
||||
if (input_event.pollnval && (isWin && isOutput))
|
||||
if (input_event.pollnval)
|
||||
ret |= POLLNVAL;
|
||||
return ret;
|
||||
}
|
||||
@ -239,26 +233,20 @@ struct CTRPollFD {
|
||||
Events revents; ///< Events received (output)
|
||||
|
||||
/// Converts a platform-specific pollfd to a 3ds specific structure
|
||||
static CTRPollFD FromPlatform(SOC::SOC_U& socu, pollfd const& fd) {
|
||||
static CTRPollFD FromPlatform(pollfd const& fd) {
|
||||
CTRPollFD result;
|
||||
result.events.hex = Events::TranslateTo3DS(fd.events).hex;
|
||||
result.revents.hex = Events::TranslateTo3DS(fd.revents).hex;
|
||||
for (auto iter = socu.open_sockets.begin(); iter != socu.open_sockets.end(); ++iter) {
|
||||
if (iter->second.socket_fd == fd.fd) {
|
||||
result.fd = iter->first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
result.fd = static_cast<u32>(fd.fd);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Converts a 3ds specific pollfd to a platform-specific structure
|
||||
static pollfd ToPlatform(SOC::SOC_U& socu, CTRPollFD const& fd) {
|
||||
static pollfd ToPlatform(CTRPollFD const& fd) {
|
||||
pollfd result;
|
||||
result.events = Events::TranslateToPlatform(fd.events, false);
|
||||
result.revents = Events::TranslateToPlatform(fd.revents, true);
|
||||
auto iter = socu.open_sockets.find(fd.fd);
|
||||
result.fd = (iter != socu.open_sockets.end()) ? iter->second.socket_fd : 0;
|
||||
result.events = Events::TranslateToPlatform(fd.events);
|
||||
result.revents = Events::TranslateToPlatform(fd.revents);
|
||||
result.fd = fd.fd;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
@ -354,14 +342,6 @@ struct CTRAddrInfo {
|
||||
|
||||
static_assert(sizeof(CTRAddrInfo) == 0x130, "Size of CTRAddrInfo is not correct");
|
||||
|
||||
void SOC_U::PreTimerAdjust() {
|
||||
timer_adjust_handle = Core::System::GetInstance().GetRunningCore().GetTimer().StartAdjust();
|
||||
}
|
||||
|
||||
void SOC_U::PostTimerAdjust() {
|
||||
Core::System::GetInstance().GetRunningCore().GetTimer().EndAdjust(timer_adjust_handle);
|
||||
}
|
||||
|
||||
void SOC_U::CleanupSockets() {
|
||||
for (auto sock : open_sockets)
|
||||
closesocket(sock.second.socket_fd);
|
||||
@ -396,28 +376,21 @@ void SOC_U::Socket(Kernel::HLERequestContext& ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
u64 ret = static_cast<u64>(::socket(domain, type, protocol));
|
||||
u32 socketHandle = GetNextSocketID();
|
||||
u32 ret = static_cast<u32>(::socket(domain, type, protocol));
|
||||
|
||||
if ((s64)ret != SOCKET_ERROR_VALUE)
|
||||
open_sockets[socketHandle] = {static_cast<decltype(SocketHolder::socket_fd)>(ret), true};
|
||||
if ((s32)ret != SOCKET_ERROR_VALUE)
|
||||
open_sockets[ret] = {ret, true};
|
||||
|
||||
if ((s64)ret == SOCKET_ERROR_VALUE)
|
||||
if ((s32)ret == SOCKET_ERROR_VALUE)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push(socketHandle);
|
||||
rb.Push(ret);
|
||||
}
|
||||
|
||||
void SOC_U::Bind(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x05, 2, 4);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 len = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
auto sock_addr_buf = rp.PopStaticBuffer();
|
||||
@ -427,7 +400,7 @@ void SOC_U::Bind(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
sockaddr sock_addr = CTRSockAddr::ToPlatform(ctr_sock_addr);
|
||||
|
||||
s32 ret = ::bind(fd_info->second.socket_fd, &sock_addr, std::max<u32>(sizeof(sock_addr), len));
|
||||
s32 ret = ::bind(socket_handle, &sock_addr, std::max<u32>(sizeof(sock_addr), len));
|
||||
|
||||
if (ret != 0)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
@ -440,12 +413,6 @@ void SOC_U::Bind(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x13, 3, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 ctr_cmd = rp.Pop<u32>();
|
||||
u32 ctr_arg = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
@ -460,10 +427,11 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
if (ctr_cmd == 3) { // F_GETFL
|
||||
#ifdef _WIN32
|
||||
posix_ret = 0;
|
||||
if (fd_info->second.blocking == false)
|
||||
auto iter = open_sockets.find(socket_handle);
|
||||
if (iter != open_sockets.end() && iter->second.blocking == false)
|
||||
posix_ret |= 4; // O_NONBLOCK
|
||||
#else
|
||||
int ret = ::fcntl(fd_info->second.socket_fd, F_GETFL, 0);
|
||||
int ret = ::fcntl(socket_handle, F_GETFL, 0);
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
posix_ret = TranslateError(GET_ERRNO);
|
||||
return;
|
||||
@ -475,7 +443,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
} else if (ctr_cmd == 4) { // F_SETFL
|
||||
#ifdef _WIN32
|
||||
unsigned long tmp = (ctr_arg & 4 /* O_NONBLOCK */) ? 1 : 0;
|
||||
int ret = ioctlsocket(fd_info->second.socket_fd, FIONBIO, &tmp);
|
||||
int ret = ioctlsocket(socket_handle, FIONBIO, &tmp);
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
posix_ret = TranslateError(GET_ERRNO);
|
||||
return;
|
||||
@ -484,7 +452,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
if (iter != open_sockets.end())
|
||||
iter->second.blocking = (tmp == 0);
|
||||
#else
|
||||
int flags = ::fcntl(fd_info->second.socket_fd, F_GETFL, 0);
|
||||
int flags = ::fcntl(socket_handle, F_GETFL, 0);
|
||||
if (flags == SOCKET_ERROR_VALUE) {
|
||||
posix_ret = TranslateError(GET_ERRNO);
|
||||
return;
|
||||
@ -494,7 +462,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
if (ctr_arg & 4) // O_NONBLOCK
|
||||
flags |= O_NONBLOCK;
|
||||
|
||||
int ret = ::fcntl(fd_info->second.socket_fd, F_SETFL, flags);
|
||||
int ret = ::fcntl(socket_handle, F_SETFL, flags);
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
posix_ret = TranslateError(GET_ERRNO);
|
||||
return;
|
||||
@ -510,16 +478,10 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::Listen(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x03, 2, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 backlog = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
|
||||
s32 ret = ::listen(fd_info->second.socket_fd, backlog);
|
||||
s32 ret = ::listen(socket_handle, backlog);
|
||||
if (ret != 0)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
|
||||
@ -534,19 +496,11 @@ void SOC_U::Accept(Kernel::HLERequestContext& ctx) {
|
||||
// performing nonblocking operations and spinlock until the data is available
|
||||
IPC::RequestParser rp(ctx, 0x04, 2, 2);
|
||||
const auto socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
[[maybe_unused]] const auto max_addr_len = static_cast<socklen_t>(rp.Pop<u32>());
|
||||
rp.PopPID();
|
||||
sockaddr addr;
|
||||
socklen_t addr_len = sizeof(addr);
|
||||
PreTimerAdjust();
|
||||
u32 ret = static_cast<u32>(::accept(fd_info->second.socket_fd, &addr, &addr_len));
|
||||
PostTimerAdjust();
|
||||
u32 ret = static_cast<u32>(::accept(socket_handle, &addr, &addr_len));
|
||||
|
||||
if (static_cast<s32>(ret) != SOCKET_ERROR_VALUE) {
|
||||
open_sockets[ret] = {ret, true};
|
||||
@ -589,22 +543,13 @@ void SOC_U::GetHostId(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::Close(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x0B, 1, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
rp.PopPID();
|
||||
|
||||
s32 ret = 0;
|
||||
|
||||
PreTimerAdjust();
|
||||
ret = closesocket(fd_info->second.socket_fd);
|
||||
PostTimerAdjust();
|
||||
|
||||
open_sockets.erase(socket_handle);
|
||||
|
||||
ret = closesocket(socket_handle);
|
||||
|
||||
if (ret != 0)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
|
||||
@ -616,12 +561,6 @@ void SOC_U::Close(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x0A, 4, 6);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 len = rp.Pop<u32>();
|
||||
u32 flags = rp.Pop<u32>();
|
||||
u32 addr_len = rp.Pop<u32>();
|
||||
@ -630,18 +569,16 @@ void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
|
||||
auto dest_addr_buff = rp.PopStaticBuffer();
|
||||
|
||||
s32 ret = -1;
|
||||
PreTimerAdjust();
|
||||
if (addr_len > 0) {
|
||||
CTRSockAddr ctr_dest_addr;
|
||||
std::memcpy(&ctr_dest_addr, dest_addr_buff.data(), sizeof(ctr_dest_addr));
|
||||
sockaddr dest_addr = CTRSockAddr::ToPlatform(ctr_dest_addr);
|
||||
ret = ::sendto(fd_info->second.socket_fd, reinterpret_cast<const char*>(input_buff.data()),
|
||||
len, flags, &dest_addr, sizeof(dest_addr));
|
||||
ret = ::sendto(socket_handle, reinterpret_cast<const char*>(input_buff.data()), len, flags,
|
||||
&dest_addr, sizeof(dest_addr));
|
||||
} else {
|
||||
ret = ::sendto(fd_info->second.socket_fd, reinterpret_cast<const char*>(input_buff.data()),
|
||||
len, flags, nullptr, 0);
|
||||
ret = ::sendto(socket_handle, reinterpret_cast<const char*>(input_buff.data()), len, flags,
|
||||
nullptr, 0);
|
||||
}
|
||||
PostTimerAdjust();
|
||||
|
||||
if (ret == SOCKET_ERROR_VALUE)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
@ -654,12 +591,6 @@ void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::RecvFromOther(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x7, 4, 4);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 len = rp.Pop<u32>();
|
||||
u32 flags = rp.Pop<u32>();
|
||||
u32 addr_len = rp.Pop<u32>();
|
||||
@ -673,20 +604,19 @@ void SOC_U::RecvFromOther(Kernel::HLERequestContext& ctx) {
|
||||
socklen_t src_addr_len = sizeof(src_addr);
|
||||
|
||||
s32 ret = -1;
|
||||
PreTimerAdjust();
|
||||
if (addr_len > 0) {
|
||||
ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
|
||||
len, flags, &src_addr, &src_addr_len);
|
||||
ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
|
||||
&src_addr, &src_addr_len);
|
||||
if (ret >= 0 && src_addr_len > 0) {
|
||||
ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
|
||||
std::memcpy(addr_buff.data(), &ctr_src_addr, sizeof(ctr_src_addr));
|
||||
}
|
||||
} else {
|
||||
ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
|
||||
len, flags, NULL, 0);
|
||||
ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
|
||||
NULL, 0);
|
||||
addr_buff.resize(0);
|
||||
}
|
||||
PostTimerAdjust();
|
||||
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
} else {
|
||||
@ -706,12 +636,6 @@ void SOC_U::RecvFrom(Kernel::HLERequestContext& ctx) {
|
||||
// performing nonblocking operations and spinlock until the data is available
|
||||
IPC::RequestParser rp(ctx, 0x08, 4, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 len = rp.Pop<u32>();
|
||||
u32 flags = rp.Pop<u32>();
|
||||
u32 addr_len = rp.Pop<u32>();
|
||||
@ -724,21 +648,19 @@ void SOC_U::RecvFrom(Kernel::HLERequestContext& ctx) {
|
||||
socklen_t src_addr_len = sizeof(src_addr);
|
||||
|
||||
s32 ret = -1;
|
||||
PreTimerAdjust();
|
||||
if (addr_len > 0) {
|
||||
// Only get src adr if input adr available
|
||||
ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
|
||||
len, flags, &src_addr, &src_addr_len);
|
||||
ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
|
||||
&src_addr, &src_addr_len);
|
||||
if (ret >= 0 && src_addr_len > 0) {
|
||||
ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
|
||||
std::memcpy(addr_buff.data(), &ctr_src_addr, sizeof(ctr_src_addr));
|
||||
}
|
||||
} else {
|
||||
ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
|
||||
len, flags, NULL, 0);
|
||||
ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
|
||||
NULL, 0);
|
||||
addr_buff.resize(0);
|
||||
}
|
||||
PostTimerAdjust();
|
||||
|
||||
s32 total_received = ret;
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
@ -769,32 +691,21 @@ void SOC_U::Poll(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
// The 3ds_pollfd and the pollfd structures may be different (Windows/Linux have different
|
||||
// sizes)
|
||||
// so we have to copy the data in order
|
||||
// so we have to copy the data
|
||||
std::vector<pollfd> platform_pollfd(nfds);
|
||||
for (u32 i = 0; i < nfds; i++) {
|
||||
platform_pollfd[i] = CTRPollFD::ToPlatform(*this, ctr_fds[i]);
|
||||
}
|
||||
std::transform(ctr_fds.begin(), ctr_fds.end(), platform_pollfd.begin(), CTRPollFD::ToPlatform);
|
||||
|
||||
PreTimerAdjust();
|
||||
s32 ret = ::poll(platform_pollfd.data(), nfds, timeout);
|
||||
PostTimerAdjust();
|
||||
|
||||
// Now update the output 3ds_pollfd structure
|
||||
for (u32 i = 0; i < nfds; i++) {
|
||||
ctr_fds[i] = CTRPollFD::FromPlatform(*this, platform_pollfd[i]);
|
||||
}
|
||||
// Now update the output pollfd structure
|
||||
std::transform(platform_pollfd.begin(), platform_pollfd.end(), ctr_fds.begin(),
|
||||
CTRPollFD::FromPlatform);
|
||||
|
||||
std::vector<u8> output_fds(nfds * sizeof(CTRPollFD));
|
||||
std::memcpy(output_fds.data(), ctr_fds.data(), nfds * sizeof(CTRPollFD));
|
||||
|
||||
if (ret == SOCKET_ERROR_VALUE) {
|
||||
int err = GET_ERRNO;
|
||||
LOG_ERROR(Service_SOC, "Socket error: {}", err);
|
||||
|
||||
if (ret == SOCKET_ERROR_VALUE)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
}
|
||||
|
||||
size_t test = platform_pollfd.size();
|
||||
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(2, 2);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
@ -805,18 +716,12 @@ void SOC_U::Poll(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::GetSockName(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x17, 2, 2);
|
||||
const auto socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
[[maybe_unused]] const auto max_addr_len = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
|
||||
sockaddr dest_addr;
|
||||
socklen_t dest_addr_len = sizeof(dest_addr);
|
||||
s32 ret = ::getsockname(fd_info->second.socket_fd, &dest_addr, &dest_addr_len);
|
||||
s32 ret = ::getsockname(socket_handle, &dest_addr, &dest_addr_len);
|
||||
|
||||
CTRSockAddr ctr_dest_addr = CTRSockAddr::FromPlatform(dest_addr);
|
||||
std::vector<u8> dest_addr_buff(sizeof(ctr_dest_addr));
|
||||
@ -834,16 +739,10 @@ void SOC_U::GetSockName(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::Shutdown(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x0C, 2, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
s32 how = rp.Pop<s32>();
|
||||
rp.PopPID();
|
||||
|
||||
s32 ret = ::shutdown(fd_info->second.socket_fd, how);
|
||||
s32 ret = ::shutdown(socket_handle, how);
|
||||
if (ret != 0)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(2, 0);
|
||||
@ -854,18 +753,12 @@ void SOC_U::Shutdown(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::GetPeerName(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x18, 2, 2);
|
||||
const auto socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
[[maybe_unused]] const auto max_addr_len = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
|
||||
sockaddr dest_addr;
|
||||
socklen_t dest_addr_len = sizeof(dest_addr);
|
||||
const int ret = ::getpeername(fd_info->second.socket_fd, &dest_addr, &dest_addr_len);
|
||||
const int ret = ::getpeername(socket_handle, &dest_addr, &dest_addr_len);
|
||||
|
||||
CTRSockAddr ctr_dest_addr = CTRSockAddr::FromPlatform(dest_addr);
|
||||
std::vector<u8> dest_addr_buff(sizeof(ctr_dest_addr));
|
||||
@ -888,12 +781,6 @@ void SOC_U::Connect(Kernel::HLERequestContext& ctx) {
|
||||
// performing nonblocking operations and spinlock until the data is available
|
||||
IPC::RequestParser rp(ctx, 0x06, 2, 4);
|
||||
const auto socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
[[maybe_unused]] const auto input_addr_len = rp.Pop<u32>();
|
||||
rp.PopPID();
|
||||
auto input_addr_buf = rp.PopStaticBuffer();
|
||||
@ -902,9 +789,7 @@ void SOC_U::Connect(Kernel::HLERequestContext& ctx) {
|
||||
std::memcpy(&ctr_input_addr, input_addr_buf.data(), sizeof(ctr_input_addr));
|
||||
|
||||
sockaddr input_addr = CTRSockAddr::ToPlatform(ctr_input_addr);
|
||||
PreTimerAdjust();
|
||||
s32 ret = ::connect(fd_info->second.socket_fd, &input_addr, sizeof(input_addr));
|
||||
PostTimerAdjust();
|
||||
s32 ret = ::connect(socket_handle, &input_addr, sizeof(input_addr));
|
||||
if (ret != 0)
|
||||
ret = TranslateError(GET_ERRNO);
|
||||
|
||||
@ -936,12 +821,6 @@ void SOC_U::ShutdownSockets(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x11, 4, 2);
|
||||
u32 socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
u32 level = rp.Pop<u32>();
|
||||
s32 optname = rp.Pop<s32>();
|
||||
socklen_t optlen = static_cast<socklen_t>(rp.Pop<u32>());
|
||||
@ -959,7 +838,7 @@ void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
|
||||
#endif
|
||||
} else {
|
||||
char* optval_data = reinterpret_cast<char*>(optval.data());
|
||||
err = ::getsockopt(fd_info->second.socket_fd, level, optname, optval_data, &optlen);
|
||||
err = ::getsockopt(socket_handle, level, optname, optval_data, &optlen);
|
||||
if (err == SOCKET_ERROR_VALUE) {
|
||||
err = TranslateError(GET_ERRNO);
|
||||
}
|
||||
@ -975,12 +854,6 @@ void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
|
||||
void SOC_U::SetSockOpt(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp(ctx, 0x12, 4, 4);
|
||||
const auto socket_handle = rp.Pop<u32>();
|
||||
auto fd_info = open_sockets.find(socket_handle);
|
||||
if (fd_info == open_sockets.end()) {
|
||||
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
|
||||
rb.Push(ERR_INVALID_HANDLE);
|
||||
return;
|
||||
}
|
||||
const auto level = rp.Pop<u32>();
|
||||
const auto optname = rp.Pop<s32>();
|
||||
[[maybe_unused]] const auto optlen = static_cast<socklen_t>(rp.Pop<u32>());
|
||||
@ -997,7 +870,7 @@ void SOC_U::SetSockOpt(Kernel::HLERequestContext& ctx) {
|
||||
#endif
|
||||
} else {
|
||||
const char* optval_data = reinterpret_cast<const char*>(optval.data());
|
||||
err = static_cast<u32>(::setsockopt(fd_info->second.socket_fd, level, optname, optval_data,
|
||||
err = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval_data,
|
||||
static_cast<socklen_t>(optval.size())));
|
||||
if (err == SOCKET_ERROR_VALUE) {
|
||||
err = TranslateError(GET_ERRNO);
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#include <unordered_map>
|
||||
#include <boost/serialization/unordered_map.hpp>
|
||||
#include "core/hle/result.h"
|
||||
#include "core/hle/service/service.h"
|
||||
|
||||
namespace Core {
|
||||
@ -17,13 +16,7 @@ namespace Service::SOC {
|
||||
|
||||
/// Holds information about a particular socket
|
||||
struct SocketHolder {
|
||||
#ifdef _WIN32
|
||||
using SOCKET = unsigned long long;
|
||||
SOCKET socket_fd; ///< The socket descriptor
|
||||
#else
|
||||
u32 socket_fd; ///< The socket descriptor
|
||||
#endif // _WIN32
|
||||
|
||||
bool blocking; ///< Whether the socket is blocking or not, it is only read on Windows.
|
||||
|
||||
private:
|
||||
@ -41,10 +34,6 @@ public:
|
||||
~SOC_U();
|
||||
|
||||
private:
|
||||
static constexpr ResultCode ERR_INVALID_HANDLE =
|
||||
ResultCode(ErrorDescription::InvalidHandle, ErrorModule::SOC, ErrorSummary::InvalidArgument,
|
||||
ErrorLevel::Permanent);
|
||||
|
||||
void Socket(Kernel::HLERequestContext& ctx);
|
||||
void Bind(Kernel::HLERequestContext& ctx);
|
||||
void Fcntl(Kernel::HLERequestContext& ctx);
|
||||
@ -70,29 +59,16 @@ private:
|
||||
void GetAddrInfoImpl(Kernel::HLERequestContext& ctx);
|
||||
void GetNameInfoImpl(Kernel::HLERequestContext& ctx);
|
||||
|
||||
// Socked ids
|
||||
u32 next_socket_id = 3;
|
||||
u32 GetNextSocketID() {
|
||||
return next_socket_id++;
|
||||
}
|
||||
|
||||
// System timer adjust
|
||||
u32 timer_adjust_handle;
|
||||
void PreTimerAdjust();
|
||||
void PostTimerAdjust();
|
||||
|
||||
/// Close all open sockets
|
||||
void CleanupSockets();
|
||||
|
||||
/// Holds info about the currently open sockets
|
||||
friend struct CTRPollFD;
|
||||
std::unordered_map<u32, SocketHolder> open_sockets;
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar, const unsigned int) {
|
||||
ar& boost::serialization::base_object<Kernel::SessionRequestHandler>(*this);
|
||||
ar& open_sockets;
|
||||
ar& timer_adjust_handle;
|
||||
}
|
||||
friend class boost::serialization::access;
|
||||
};
|
||||
|
@ -2,23 +2,19 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <boost/serialization/array.hpp>
|
||||
#include <boost/serialization/binary_object.hpp>
|
||||
#include "audio_core/dsp_interface.h"
|
||||
#include "common/archives.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/atomic_ops.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/core.h"
|
||||
#include "core/global.h"
|
||||
#include "core/hle/kernel/memory.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/lock.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
@ -146,6 +142,144 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void WalkBlock(const Kernel::Process& process, const VAddr src_addr, const std::size_t size,
|
||||
auto on_unmapped, auto on_memory, auto on_special, auto on_rasterizer,
|
||||
auto increment) {
|
||||
auto& page_table = *process.vm_manager.page_table;
|
||||
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
|
||||
std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
|
||||
const VAddr current_vaddr =
|
||||
static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case PageType::Unmapped: {
|
||||
on_unmapped(copy_amount, current_vaddr);
|
||||
break;
|
||||
}
|
||||
case PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
u8* const src_ptr = page_table.pointers[page_index] + page_offset;
|
||||
on_memory(copy_amount, src_ptr);
|
||||
break;
|
||||
}
|
||||
case PageType::Special: {
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
on_special(handler, copy_amount, current_vaddr);
|
||||
break;
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
u8* const rasterizer_ptr = GetPointerForRasterizerCache(current_vaddr);
|
||||
on_rasterizer(current_vaddr, copy_amount, rasterizer_ptr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
increment(copy_amount);
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UNSAFE>
|
||||
void ReadBlockImpl(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
|
||||
const std::size_t size) {
|
||||
WalkBlock(
|
||||
process, src_addr, size,
|
||||
[src_addr, size, &dest_buffer](const std::size_t copy_amount,
|
||||
const VAddr current_vaddr) {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, src_addr, size);
|
||||
std::memset(dest_buffer, 0, copy_amount);
|
||||
},
|
||||
[&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) {
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
},
|
||||
[&dest_buffer](MMIORegionPointer& handler, const std::size_t copy_amount,
|
||||
const VAddr current_vaddr) {
|
||||
handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
|
||||
},
|
||||
[&dest_buffer](const VAddr current_vaddr, const std::size_t copy_amount,
|
||||
const u8* const rasterizer_ptr) {
|
||||
if constexpr (!UNSAFE) {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Flush);
|
||||
}
|
||||
std::memcpy(dest_buffer, rasterizer_ptr, copy_amount);
|
||||
},
|
||||
[&dest_buffer](const std::size_t copy_amount) {
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
});
|
||||
}
|
||||
|
||||
template <bool UNSAFE>
|
||||
void WriteBlockImpl(const Kernel::Process& process, const VAddr dest_addr,
|
||||
const void* src_buffer, const std::size_t size) {
|
||||
WalkBlock(
|
||||
process, dest_addr, size,
|
||||
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, dest_addr, size);
|
||||
},
|
||||
[&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) {
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
},
|
||||
[&src_buffer](MMIORegionPointer& handler, const std::size_t copy_amount,
|
||||
const VAddr current_vaddr) {
|
||||
handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
|
||||
},
|
||||
[&src_buffer](const VAddr current_vaddr, const std::size_t copy_amount,
|
||||
u8* const host_ptr) {
|
||||
if constexpr (!UNSAFE) {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Invalidate);
|
||||
}
|
||||
std::memcpy(host_ptr, src_buffer, copy_amount);
|
||||
},
|
||||
[&src_buffer](const std::size_t copy_amount) {
|
||||
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
||||
});
|
||||
}
|
||||
|
||||
MemoryRef GetPointerForRasterizerCache(VAddr addr) const {
|
||||
if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
|
||||
return {fcram_mem, addr - LINEAR_HEAP_VADDR};
|
||||
}
|
||||
if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
|
||||
return {fcram_mem, addr - NEW_LINEAR_HEAP_VADDR};
|
||||
}
|
||||
if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
|
||||
return {vram_mem, addr - VRAM_VADDR};
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
return MemoryRef{};
|
||||
}
|
||||
|
||||
/**
|
||||
* This function should only be called for virtual addreses with attribute `PageType::Special`.
|
||||
*/
|
||||
MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
|
||||
for (const auto& region : page_table.special_regions) {
|
||||
if (vaddr >= region.base && vaddr < (region.base + region.size)) {
|
||||
return region.handler;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Mapped IO page without a handler @ {:08X}", vaddr);
|
||||
return nullptr; // Should never happen
|
||||
}
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
template <class Archive>
|
||||
@ -270,16 +404,7 @@ void MemorySystem::UnmapRegion(PageTable& page_table, VAddr base, u32 size) {
|
||||
}
|
||||
|
||||
MemoryRef MemorySystem::GetPointerForRasterizerCache(VAddr addr) const {
|
||||
if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
|
||||
return {impl->fcram_mem, addr - LINEAR_HEAP_VADDR};
|
||||
}
|
||||
if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
|
||||
return {impl->fcram_mem, addr - NEW_LINEAR_HEAP_VADDR};
|
||||
}
|
||||
if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
|
||||
return {impl->vram_mem, addr - VRAM_VADDR};
|
||||
}
|
||||
UNREACHABLE();
|
||||
return impl->GetPointerForRasterizerCache(addr);
|
||||
}
|
||||
|
||||
void MemorySystem::RegisterPageTable(std::shared_ptr<PageTable> page_table) {
|
||||
@ -293,19 +418,6 @@ void MemorySystem::UnregisterPageTable(std::shared_ptr<PageTable> page_table) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function should only be called for virtual addreses with attribute `PageType::Special`.
|
||||
*/
|
||||
static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
|
||||
for (const auto& region : page_table.special_regions) {
|
||||
if (vaddr >= region.base && vaddr < (region.base + region.size)) {
|
||||
return region.handler;
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(false, "Mapped IO page without a handler @ {:08X}", vaddr);
|
||||
return nullptr; // Should never happen
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ReadMMIO(MMIORegionPointer mmio_handler, VAddr addr);
|
||||
|
||||
@ -336,10 +448,12 @@ T MemorySystem::Read(const VAddr vaddr) {
|
||||
return value;
|
||||
}
|
||||
case PageType::Special:
|
||||
return ReadMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr);
|
||||
return ReadMMIO<T>(impl->GetMMIOHandler(*impl->current_page_table, vaddr), vaddr);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return T{};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -369,48 +483,14 @@ void MemorySystem::Write(const VAddr vaddr, const T data) {
|
||||
break;
|
||||
}
|
||||
case PageType::Special:
|
||||
WriteMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
|
||||
WriteMMIO<T>(impl->GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MemorySystem::WriteExclusive(const VAddr vaddr, const T data, const T expected) {
|
||||
u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS];
|
||||
|
||||
if (page_pointer) {
|
||||
const auto volatile_pointer =
|
||||
reinterpret_cast<volatile T*>(&page_pointer[vaddr & CITRA_PAGE_MASK]);
|
||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||
}
|
||||
|
||||
PageType type = impl->current_page_table->attributes[vaddr >> CITRA_PAGE_BITS];
|
||||
switch (type) {
|
||||
case PageType::Unmapped:
|
||||
LOG_ERROR(HW_Memory, "unmapped Write{} 0x{:08X} @ 0x{:08X} at PC 0x{:08X}",
|
||||
sizeof(data) * 8, (u32)data, vaddr, Core::GetRunningCore().GetPC());
|
||||
return true;
|
||||
case PageType::Memory:
|
||||
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:08X}", vaddr);
|
||||
return true;
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
|
||||
const auto volatile_pointer =
|
||||
reinterpret_cast<volatile T*>(GetPointerForRasterizerCache(vaddr).GetPtr());
|
||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||
}
|
||||
case PageType::Special:
|
||||
WriteMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
|
||||
return false;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
|
||||
bool MemorySystem::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
|
||||
auto& page_table = *process.vm_manager.page_table;
|
||||
|
||||
auto page_pointer = page_table.pointers[vaddr >> CITRA_PAGE_BITS];
|
||||
@ -423,7 +503,7 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
|
||||
if (page_table.attributes[vaddr >> CITRA_PAGE_BITS] != PageType::Special)
|
||||
return false;
|
||||
|
||||
MMIORegionPointer mmio_region = GetMMIOHandler(page_table, vaddr);
|
||||
MMIORegionPointer mmio_region = impl->GetMMIOHandler(page_table, vaddr);
|
||||
if (mmio_region) {
|
||||
return mmio_region->IsValidAddress(vaddr);
|
||||
}
|
||||
@ -432,7 +512,7 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
|
||||
}
|
||||
|
||||
bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const {
|
||||
return GetPhysicalPointer(paddr) != nullptr;
|
||||
return GetPhysicalRef(paddr);
|
||||
}
|
||||
|
||||
u8* MemorySystem::GetPointer(const VAddr vaddr) {
|
||||
@ -471,11 +551,14 @@ std::string MemorySystem::ReadCString(VAddr vaddr, std::size_t max_length) {
|
||||
string.reserve(max_length);
|
||||
for (std::size_t i = 0; i < max_length; ++i) {
|
||||
char c = Read8(vaddr);
|
||||
if (c == '\0')
|
||||
if (c == '\0') {
|
||||
break;
|
||||
}
|
||||
|
||||
string.push_back(c);
|
||||
++vaddr;
|
||||
}
|
||||
|
||||
string.shrink_to_fit();
|
||||
return string;
|
||||
}
|
||||
@ -484,10 +567,6 @@ u8* MemorySystem::GetPhysicalPointer(PAddr address) {
|
||||
return GetPhysicalRef(address);
|
||||
}
|
||||
|
||||
const u8* MemorySystem::GetPhysicalPointer(PAddr address) const {
|
||||
return GetPhysicalRef(address);
|
||||
}
|
||||
|
||||
MemoryRef MemorySystem::GetPhysicalRef(PAddr address) const {
|
||||
constexpr std::array memory_areas = {
|
||||
std::make_pair(VRAM_PADDR, VRAM_SIZE),
|
||||
@ -698,53 +777,12 @@ u64 MemorySystem::Read64(const VAddr addr) {
|
||||
|
||||
void MemorySystem::ReadBlock(const Kernel::Process& process, const VAddr src_addr,
|
||||
void* dest_buffer, const std::size_t size) {
|
||||
auto& page_table = *process.vm_manager.page_table;
|
||||
return impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
|
||||
std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
|
||||
const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case PageType::Unmapped: {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"unmapped ReadBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
|
||||
"0x{:08X}",
|
||||
current_vaddr, src_addr, size, Core::GetRunningCore().GetPC());
|
||||
std::memset(dest_buffer, 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Special: {
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Flush);
|
||||
std::memcpy(dest_buffer, GetPointerForRasterizerCache(current_vaddr), copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
void MemorySystem::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
|
||||
const auto& process = *Core::System::GetInstance().Kernel().GetCurrentProcess();
|
||||
return impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void MemorySystem::Write8(const VAddr addr, const u8 data) {
|
||||
@ -763,119 +801,41 @@ void MemorySystem::Write64(const VAddr addr, const u64 data) {
|
||||
Write<u64_le>(addr, data);
|
||||
}
|
||||
|
||||
bool MemorySystem::WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
|
||||
return WriteExclusive<u8>(addr, data, expected);
|
||||
}
|
||||
|
||||
bool MemorySystem::WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
|
||||
return WriteExclusive<u16_le>(addr, data, expected);
|
||||
}
|
||||
|
||||
bool MemorySystem::WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
|
||||
return WriteExclusive<u32_le>(addr, data, expected);
|
||||
}
|
||||
|
||||
bool MemorySystem::WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
|
||||
return WriteExclusive<u64_le>(addr, data, expected);
|
||||
}
|
||||
|
||||
void MemorySystem::WriteBlock(const Kernel::Process& process, const VAddr dest_addr,
|
||||
const void* src_buffer, const std::size_t size) {
|
||||
auto& page_table = *process.vm_manager.page_table;
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = dest_addr >> CITRA_PAGE_BITS;
|
||||
std::size_t page_offset = dest_addr & CITRA_PAGE_MASK;
|
||||
return impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
|
||||
const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case PageType::Unmapped: {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"unmapped WriteBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
|
||||
"0x{:08X}",
|
||||
current_vaddr, dest_addr, size, Core::GetRunningCore().GetPC());
|
||||
break;
|
||||
}
|
||||
case PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Special: {
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Invalidate);
|
||||
std::memcpy(GetPointerForRasterizerCache(current_vaddr), src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
void MemorySystem::WriteBlock(const VAddr dest_addr, const void* src_buffer,
|
||||
const std::size_t size) {
|
||||
auto& process = *Core::System::GetInstance().Kernel().GetCurrentProcess();
|
||||
return impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void MemorySystem::ZeroBlock(const Kernel::Process& process, const VAddr dest_addr,
|
||||
const std::size_t size) {
|
||||
auto& page_table = *process.vm_manager.page_table;
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = dest_addr >> CITRA_PAGE_BITS;
|
||||
std::size_t page_offset = dest_addr & CITRA_PAGE_MASK;
|
||||
static const std::array<u8, CITRA_PAGE_SIZE> zeros{0};
|
||||
|
||||
static const std::array<u8, CITRA_PAGE_SIZE> zeros = {};
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
|
||||
const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case PageType::Unmapped: {
|
||||
impl->WalkBlock(
|
||||
process, dest_addr, size,
|
||||
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"unmapped ZeroBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
|
||||
"0x{:08X}",
|
||||
current_vaddr, dest_addr, size, Core::GetRunningCore().GetPC());
|
||||
break;
|
||||
}
|
||||
case PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
|
||||
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, dest_addr, size);
|
||||
},
|
||||
[](const std::size_t copy_amount, u8* const dest_ptr) {
|
||||
std::memset(dest_ptr, 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Special: {
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
},
|
||||
[&zeros = zeros](MMIORegionPointer& handler, const std::size_t copy_amount,
|
||||
const VAddr current_vaddr) {
|
||||
handler->WriteBlock(current_vaddr, zeros.data(), copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
},
|
||||
[](const VAddr current_vaddr, const std::size_t copy_amount, u8* const rasterizer_ptr) {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Invalidate);
|
||||
std::memset(GetPointerForRasterizerCache(current_vaddr), 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
std::memset(rasterizer_ptr, 0, copy_amount);
|
||||
},
|
||||
[](const std::size_t copy_amount) {});
|
||||
}
|
||||
|
||||
void MemorySystem::CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
|
||||
@ -886,55 +846,35 @@ void MemorySystem::CopyBlock(const Kernel::Process& process, VAddr dest_addr, VA
|
||||
void MemorySystem::CopyBlock(const Kernel::Process& dest_process,
|
||||
const Kernel::Process& src_process, VAddr dest_addr, VAddr src_addr,
|
||||
std::size_t size) {
|
||||
auto& page_table = *src_process.vm_manager.page_table;
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
|
||||
std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
|
||||
std::array<u8, CITRA_PAGE_SIZE> copy_buffer{};
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
|
||||
const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case PageType::Unmapped: {
|
||||
impl->WalkBlock(
|
||||
src_process, src_addr, size,
|
||||
[this, &dest_process, &dest_addr, &src_addr, size](const std::size_t copy_amount,
|
||||
const VAddr current_vaddr) {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"unmapped CopyBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
|
||||
"0x{:08X}",
|
||||
current_vaddr, src_addr, size, Core::GetRunningCore().GetPC());
|
||||
"unmapped CopyBlock @ 0x{:08X} (start address = 0x{:08X}, size = {})",
|
||||
current_vaddr, src_addr, size);
|
||||
ZeroBlock(dest_process, dest_addr, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
|
||||
WriteBlock(dest_process, dest_addr, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
case PageType::Special: {
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
std::vector<u8> buffer(copy_amount);
|
||||
handler->ReadBlock(current_vaddr, buffer.data(), buffer.size());
|
||||
WriteBlock(dest_process, dest_addr, buffer.data(), buffer.size());
|
||||
break;
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
},
|
||||
[this, &dest_process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) {
|
||||
impl->WriteBlockImpl<false>(dest_process, dest_addr, src_ptr, copy_amount);
|
||||
},
|
||||
[this, &dest_process, &dest_addr, ©_buffer](
|
||||
MMIORegionPointer& handler, const std::size_t copy_amount, const VAddr current_vaddr) {
|
||||
handler->ReadBlock(current_vaddr, copy_buffer.data(), copy_amount);
|
||||
impl->WriteBlockImpl<false>(dest_process, dest_addr, copy_buffer.data(), copy_amount);
|
||||
},
|
||||
[this, &dest_process, &dest_addr](const VAddr current_vaddr, const std::size_t copy_amount,
|
||||
u8* const rasterizer_ptr) {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::Flush);
|
||||
WriteBlock(dest_process, dest_addr, GetPointerForRasterizerCache(current_vaddr),
|
||||
copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
dest_addr += static_cast<VAddr>(copy_amount);
|
||||
src_addr += static_cast<VAddr>(copy_amount);
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
impl->WriteBlockImpl<false>(dest_process, dest_addr, rasterizer_ptr, copy_amount);
|
||||
},
|
||||
[&dest_addr, &src_addr](const std::size_t copy_amount) {
|
||||
dest_addr += static_cast<VAddr>(copy_amount);
|
||||
src_addr += static_cast<VAddr>(copy_amount);
|
||||
});
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -5,12 +5,9 @@
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <boost/serialization/array.hpp>
|
||||
#include <boost/serialization/vector.hpp>
|
||||
#include "common/common_types.h"
|
||||
#include "common/memory_ref.h"
|
||||
#include "core/mmio.h"
|
||||
|
||||
@ -102,11 +99,10 @@ struct PageTable {
|
||||
|
||||
private:
|
||||
std::array<u8*, PAGE_TABLE_NUM_ENTRIES> raw;
|
||||
|
||||
std::array<MemoryRef, PAGE_TABLE_NUM_ENTRIES> refs;
|
||||
|
||||
friend struct PageTable;
|
||||
};
|
||||
|
||||
Pointers pointers;
|
||||
|
||||
/**
|
||||
@ -313,55 +309,257 @@ public:
|
||||
void SetCurrentPageTable(std::shared_ptr<PageTable> page_table);
|
||||
std::shared_ptr<PageTable> GetCurrentPageTable() const;
|
||||
|
||||
u8 Read8(VAddr addr);
|
||||
u16 Read16(VAddr addr);
|
||||
u32 Read32(VAddr addr);
|
||||
u64 Read64(VAddr addr);
|
||||
|
||||
void Write8(VAddr addr, u8 data);
|
||||
void Write16(VAddr addr, u16 data);
|
||||
void Write32(VAddr addr, u32 data);
|
||||
void Write64(VAddr addr, u64 data);
|
||||
/**
|
||||
* Gets a pointer to the given address.
|
||||
*
|
||||
* @param vaddr Virtual address to retrieve a pointer to.
|
||||
*
|
||||
* @returns The pointer to the given address, if the address is valid.
|
||||
* If the address is not valid, nullptr will be returned.
|
||||
*/
|
||||
u8* GetPointer(VAddr vaddr);
|
||||
|
||||
/**
|
||||
* Writes a {8, 16, 32, 64}-bit unsigned integer to the given virtual address in
|
||||
* the current process' address space if and only if the address contains
|
||||
* the expected value. This operation is atomic.
|
||||
* Gets a pointer to the given address.
|
||||
*
|
||||
* @param addr The virtual address to write the X-bit unsigned integer to.
|
||||
* @param data The X-bit unsigned integer to write to the given virtual address.
|
||||
* @param expected The X-bit unsigned integer to check against the given virtual address.
|
||||
* @returns true if the operation failed
|
||||
* @param vaddr Virtual address to retrieve a pointer to.
|
||||
*
|
||||
* @returns The pointer to the given address, if the address is valid.
|
||||
* If the address is not valid, nullptr will be returned.
|
||||
*/
|
||||
const u8* GetPointer(VAddr vaddr) const;
|
||||
|
||||
/**
|
||||
* Reads an 8-bit unsigned value from the current process' address space
|
||||
* at the given virtual address.
|
||||
*
|
||||
* @param addr The virtual address to read the 8-bit value from.
|
||||
*
|
||||
* @returns the read 8-bit unsigned value.
|
||||
*/
|
||||
u8 Read8(VAddr addr);
|
||||
|
||||
/**
|
||||
* Reads a 16-bit unsigned value from the current process' address space
|
||||
* at the given virtual address.
|
||||
*
|
||||
* @param addr The virtual address to read the 16-bit value from.
|
||||
*
|
||||
* @returns the read 16-bit unsigned value.
|
||||
*/
|
||||
u16 Read16(VAddr addr);
|
||||
|
||||
/**
|
||||
* Reads a 32-bit unsigned value from the current process' address space
|
||||
* at the given virtual address.
|
||||
*
|
||||
* @param addr The virtual address to read the 32-bit value from.
|
||||
*
|
||||
* @returns the read 32-bit unsigned value.
|
||||
*/
|
||||
u32 Read32(VAddr addr);
|
||||
|
||||
/**
|
||||
* Reads a 64-bit unsigned value from the current process' address space
|
||||
* at the given virtual address.
|
||||
*
|
||||
* @param addr The virtual address to read the 64-bit value from.
|
||||
*
|
||||
* @returns the read 64-bit value.
|
||||
*/
|
||||
u64 Read64(VAddr addr);
|
||||
|
||||
/**
|
||||
* Writes an 8-bit unsigned integer to the given virtual address in
|
||||
* the current process' address space.
|
||||
*
|
||||
* @param addr The virtual address to write the 8-bit unsigned integer to.
|
||||
* @param data The 8-bit unsigned integer to write to the given virtual address.
|
||||
*
|
||||
* @post The memory at the given virtual address contains the specified data value.
|
||||
*/
|
||||
void Write8(VAddr addr, u8 data);
|
||||
|
||||
/**
|
||||
* Writes a 16-bit unsigned integer to the given virtual address in
|
||||
* the current process' address space.
|
||||
*
|
||||
* @param addr The virtual address to write the 16-bit unsigned integer to.
|
||||
* @param data The 16-bit unsigned integer to write to the given virtual address.
|
||||
*
|
||||
* @post The memory range [addr, sizeof(data)) contains the given data value.
|
||||
*/
|
||||
bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected);
|
||||
bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected);
|
||||
bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected);
|
||||
bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected);
|
||||
void Write16(VAddr addr, u16 data);
|
||||
|
||||
/**
|
||||
* Writes a 32-bit unsigned integer to the given virtual address in
|
||||
* the current process' address space.
|
||||
*
|
||||
* @param addr The virtual address to write the 32-bit unsigned integer to.
|
||||
* @param data The 32-bit unsigned integer to write to the given virtual address.
|
||||
*
|
||||
* @post The memory range [addr, sizeof(data)) contains the given data value.
|
||||
*/
|
||||
void Write32(VAddr addr, u32 data);
|
||||
|
||||
/**
|
||||
* Writes a 64-bit unsigned integer to the given virtual address in
|
||||
* the current process' address space.
|
||||
*
|
||||
* @param addr The virtual address to write the 64-bit unsigned integer to.
|
||||
* @param data The 64-bit unsigned integer to write to the given virtual address.
|
||||
*
|
||||
* @post The memory range [addr, sizeof(data)) contains the given data value.
|
||||
*/
|
||||
void Write64(VAddr addr, u64 data);
|
||||
|
||||
/**
|
||||
* Reads a null-terminated string from the given virtual address.
|
||||
* This function will continually read characters until either:
|
||||
*
|
||||
* - A null character ('\0') is reached.
|
||||
* - max_length characters have been read.
|
||||
*
|
||||
* @note The final null-terminating character (if found) is not included
|
||||
* in the returned string.
|
||||
*
|
||||
* @param vaddr The address to begin reading the string from.
|
||||
* @param max_length The maximum length of the string to read in characters.
|
||||
*
|
||||
* @returns The read string.
|
||||
*/
|
||||
std::string ReadCString(VAddr vaddr, std::size_t max_length);
|
||||
|
||||
/**
|
||||
* Reads a contiguous block of bytes from a specified process' address space.
|
||||
*
|
||||
* @param process The process to read the data from.
|
||||
* @param src_addr The virtual address to begin reading from.
|
||||
* @param dest_buffer The buffer to place the read bytes into.
|
||||
* @param size The amount of data to read, in bytes.
|
||||
*
|
||||
* @note If a size of 0 is specified, then this function reads nothing and
|
||||
* no attempts to access memory are made at all.
|
||||
*
|
||||
* @pre dest_buffer must be at least size bytes in length, otherwise a
|
||||
* buffer overrun will occur.
|
||||
*
|
||||
* @post The range [dest_buffer, size) contains the read bytes from the
|
||||
* process' address space.
|
||||
*/
|
||||
void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Reads a contiguous block of bytes from the current process' address space.
|
||||
*
|
||||
* @param src_addr The virtual address to begin reading from.
|
||||
* @param dest_buffer The buffer to place the read bytes into.
|
||||
* @param size The amount of data to read, in bytes.
|
||||
*
|
||||
* @note If a size of 0 is specified, then this function reads nothing and
|
||||
* no attempts to access memory are made at all.
|
||||
*
|
||||
* @pre dest_buffer must be at least size bytes in length, otherwise a
|
||||
* buffer overrun will occur.
|
||||
*
|
||||
* @post The range [dest_buffer, size) contains the read bytes from the
|
||||
* current process' address space.
|
||||
*/
|
||||
void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into a given process' address space at the specified
|
||||
* virtual address.
|
||||
*
|
||||
* @param process The process to write data into the address space of.
|
||||
* @param dest_addr The destination virtual address to begin writing the data at.
|
||||
* @param src_buffer The data to write into the process' address space.
|
||||
* @param size The size of the data to write, in bytes.
|
||||
*
|
||||
* @post The address range [dest_addr, size) in the process' address space
|
||||
* contains the data that was within src_buffer.
|
||||
*
|
||||
* @post If an attempt is made to write into an unmapped region of memory, the writes
|
||||
* will be ignored and an error will be logged.
|
||||
*
|
||||
* @post If a write is performed into a region of memory that is considered cached
|
||||
* rasterizer memory, will cause the currently active rasterizer to be notified
|
||||
* and will mark that region as invalidated to caches that the active
|
||||
* graphics backend may be maintaining over the course of execution.
|
||||
*/
|
||||
void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into a given process' address space at the specified
|
||||
* virtual address.
|
||||
*
|
||||
* @param dest_addr The destination virtual address to begin writing the data at.
|
||||
* @param src_buffer The data to write into the process' address space.
|
||||
* @param size The size of the data to write, in bytes.
|
||||
*
|
||||
* @post The address range [dest_addr, size) in the process' address space
|
||||
* contains the data that was within src_buffer.
|
||||
*
|
||||
* @post If an attempt is made to write into an unmapped region of memory, the writes
|
||||
* will be ignored and an error will be logged.
|
||||
*
|
||||
* @post If a write is performed into a region of memory that is considered cached
|
||||
* rasterizer memory, will cause the currently active rasterizer to be notified
|
||||
* and will mark that region as invalidated to caches that the active
|
||||
* graphics backend may be maintaining over the course of execution.
|
||||
*/
|
||||
void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Zeros a range of bytes within the current process' address space at the specified
|
||||
* virtual address.
|
||||
*
|
||||
* @param process The process that will have data zeroed within its address space.
|
||||
* @param dest_addr The destination virtual address to zero the data from.
|
||||
* @param size The size of the range to zero out, in bytes.
|
||||
*
|
||||
* @post The range [dest_addr, size) within the process' address space contains the
|
||||
* value 0.
|
||||
*/
|
||||
void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, const std::size_t size);
|
||||
|
||||
/**
|
||||
* Copies data within a process' address space to another location within the
|
||||
* same address space.
|
||||
*
|
||||
* @param process The process that will have data copied within its address space.
|
||||
* @param dest_addr The destination virtual address to begin copying the data into.
|
||||
* @param src_addr The source virtual address to begin copying the data from.
|
||||
* @param size The size of the data to copy, in bytes.
|
||||
*
|
||||
* @post The range [dest_addr, size) within the process' address space contains the
|
||||
* same data within the range [src_addr, size).
|
||||
*/
|
||||
void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
|
||||
std::size_t size);
|
||||
void CopyBlock(const Kernel::Process& dest_process, const Kernel::Process& src_process,
|
||||
VAddr dest_addr, VAddr src_addr, std::size_t size);
|
||||
|
||||
std::string ReadCString(VAddr vaddr, std::size_t max_length);
|
||||
/**
|
||||
* Marks each page within the specified address range as cached or uncached.
|
||||
*
|
||||
* @param vaddr The virtual address indicating the start of the address range.
|
||||
* @param size The size of the address range in bytes.
|
||||
* @param cached Whether or not any pages within the address range should be
|
||||
* marked as cached or uncached.
|
||||
*/
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
|
||||
|
||||
/// Gets a pointer to the memory region beginning at the specified physical address.
|
||||
u8* GetPhysicalPointer(PAddr address);
|
||||
|
||||
/// Gets a pointer to the memory region beginning at the specified physical address.
|
||||
const u8* GetPhysicalPointer(PAddr address) const;
|
||||
|
||||
/// Returns a reference to the memory region beginning at the specified physical address
|
||||
MemoryRef GetPhysicalRef(PAddr address) const;
|
||||
|
||||
u8* GetPointer(VAddr vaddr);
|
||||
const u8* GetPointer(VAddr vaddr) const;
|
||||
/// Determines if the given VAddr is valid for the specified process.
|
||||
bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
|
||||
|
||||
/// Returns true if the address refers to a valid memory region
|
||||
bool IsValidPhysicalAddress(PAddr paddr) const;
|
||||
@ -378,11 +576,6 @@ public:
|
||||
/// Gets a serializable ref to FCRAM with the given offset
|
||||
MemoryRef GetFCRAMRef(std::size_t offset) const;
|
||||
|
||||
/**
|
||||
* Mark each page touching the region as cached.
|
||||
*/
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
|
||||
|
||||
/// Registers page table for rasterizer cache marking
|
||||
void RegisterPageTable(std::shared_ptr<PageTable> page_table);
|
||||
|
||||
@ -398,9 +591,6 @@ private:
|
||||
template <typename T>
|
||||
void Write(const VAddr vaddr, const T data);
|
||||
|
||||
template <typename T>
|
||||
bool WriteExclusive(const VAddr vaddr, const T data, const T expected);
|
||||
|
||||
/**
|
||||
* Gets the pointer for virtual memory where the page is marked as RasterizerCachedMemory.
|
||||
* This is used to access the memory where the page pointer is nullptr due to rasterizer cache.
|
||||
@ -412,7 +602,6 @@ private:
|
||||
void MapPages(PageTable& page_table, u32 base, u32 size, MemoryRef memory, PageType type);
|
||||
|
||||
class Impl;
|
||||
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
||||
friend class boost::serialization::access;
|
||||
@ -424,9 +613,6 @@ public:
|
||||
class BackingMemImpl;
|
||||
};
|
||||
|
||||
/// Determines if the given VAddr is valid for the specified process.
|
||||
bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
|
||||
|
||||
} // namespace Memory
|
||||
|
||||
BOOST_CLASS_EXPORT_KEY(Memory::MemorySystem::BackingMemImpl<Memory::Region::FCRAM>)
|
||||
|
@ -90,7 +90,6 @@ void LogSettings() {
|
||||
LogSetting("Core_UseCpuJit", values.use_cpu_jit);
|
||||
LogSetting("Core_CPUClockPercentage", values.cpu_clock_percentage);
|
||||
LogSetting("Renderer_GraphicsAPI", GetAPIName(values.graphics_api));
|
||||
LogSetting("Renderer_AsyncRecording", values.async_command_recording);
|
||||
LogSetting("Renderer_UseHwRenderer", values.use_hw_renderer);
|
||||
LogSetting("Renderer_UseHwShader", values.use_hw_shader);
|
||||
LogSetting("Renderer_SeparableShader", values.separable_shader);
|
||||
@ -129,11 +128,8 @@ void LogSettings() {
|
||||
LogSetting("Camera_OuterLeftConfig", values.camera_config[Service::CAM::OuterLeftCamera]);
|
||||
LogSetting("Camera_OuterLeftFlip", values.camera_flip[Service::CAM::OuterLeftCamera]);
|
||||
LogSetting("DataStorage_UseVirtualSd", values.use_virtual_sd);
|
||||
LogSetting("DataStorage_UseCustomStorage", values.use_custom_storage);
|
||||
if (values.use_custom_storage) {
|
||||
LogSetting("DataStorage_SdmcDir", FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
|
||||
LogSetting("DataStorage_NandDir", FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
|
||||
}
|
||||
LogSetting("DataStorage_SdmcDir", FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
|
||||
LogSetting("DataStorage_NandDir", FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
|
||||
LogSetting("System_IsNew3ds", values.is_new_3ds);
|
||||
LogSetting("System_RegionValue", values.region_value);
|
||||
LogSetting("Debugging_UseGdbstub", values.use_gdbstub);
|
||||
|
@ -157,7 +157,6 @@ struct Values {
|
||||
|
||||
// Data Storage
|
||||
bool use_virtual_sd;
|
||||
bool use_custom_storage;
|
||||
|
||||
// System
|
||||
int region_value;
|
||||
@ -167,10 +166,8 @@ struct Values {
|
||||
// Renderer
|
||||
GraphicsAPI graphics_api;
|
||||
u16 physical_device;
|
||||
bool spirv_shader_gen;
|
||||
bool renderer_debug;
|
||||
bool dump_command_buffers;
|
||||
bool async_command_recording;
|
||||
bool use_hw_renderer;
|
||||
bool use_hw_shader;
|
||||
bool separable_shader;
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
|
@ -47,7 +47,7 @@ public:
|
||||
} else {
|
||||
tilt_direction = mouse_move.Cast<float>();
|
||||
tilt_angle = std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f,
|
||||
Common::PI * this->tilt_clamp / 180.0f);
|
||||
std::numbers::pi_v<float> * this->tilt_clamp / 180.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -110,7 +110,7 @@ private:
|
||||
|
||||
// Find the angular rate vector in world space
|
||||
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
|
||||
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
|
||||
angular_rate *= 1000 / update_millisecond / std::numbers::pi_v<float> * 180;
|
||||
|
||||
// Transform the two vectors from world space to 3DS space
|
||||
gravity = QuaternionRotate(inv_q, gravity);
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <SDL.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/param_package.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
@ -598,9 +596,9 @@ void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
|
||||
event.csensor.data[2] / SDL_STANDARD_GRAVITY);
|
||||
break;
|
||||
case SDL_SENSOR_GYRO:
|
||||
joystick->SetGyro(-event.csensor.data[0] * (180.0f / Common::PI),
|
||||
event.csensor.data[1] * (180.0f / Common::PI),
|
||||
-event.csensor.data[2] * (180.0f / Common::PI));
|
||||
joystick->SetGyro(-event.csensor.data[0] * (180.0f / std::numbers::pi),
|
||||
event.csensor.data[1] * (180.0f / std::numbers::pi),
|
||||
-event.csensor.data[2] * (180.0f / std::numbers::pi));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -3,34 +3,31 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/hle/kernel/memory.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/shared_page.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
|
||||
TEST_CASE("memory.IsValidVirtualAddress", "[core][memory]") {
|
||||
Core::Timing timing(1, 100);
|
||||
Memory::MemorySystem memory;
|
||||
Kernel::KernelSystem kernel(
|
||||
memory, timing, [] {}, 0, 1, 0);
|
||||
SECTION("these regions should not be mapped on an empty process") {
|
||||
auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::PROCESS_IMAGE_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::HEAP_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::LINEAR_HEAP_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == false);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::TLS_AREA_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::PROCESS_IMAGE_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::HEAP_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::LINEAR_HEAP_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::TLS_AREA_VADDR) == false);
|
||||
}
|
||||
|
||||
SECTION("CONFIG_MEMORY_VADDR and SHARED_PAGE_VADDR should be valid after mapping them") {
|
||||
auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
|
||||
kernel.MapSharedPages(process->vm_manager);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == true);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == true);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == true);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == true);
|
||||
}
|
||||
|
||||
SECTION("special regions should be valid after mapping them") {
|
||||
@ -38,13 +35,13 @@ TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
|
||||
SECTION("VRAM") {
|
||||
kernel.HandleSpecialMapping(process->vm_manager,
|
||||
{Memory::VRAM_VADDR, Memory::VRAM_SIZE, false, false});
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == true);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == true);
|
||||
}
|
||||
|
||||
SECTION("IO (Not yet implemented)") {
|
||||
kernel.HandleSpecialMapping(
|
||||
process->vm_manager, {Memory::IO_AREA_VADDR, Memory::IO_AREA_SIZE, false, false});
|
||||
CHECK_FALSE(Memory::IsValidVirtualAddress(*process, Memory::IO_AREA_VADDR) == true);
|
||||
CHECK_FALSE(memory.IsValidVirtualAddress(*process, Memory::IO_AREA_VADDR) == true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,6 +49,6 @@ TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
|
||||
auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
|
||||
kernel.MapSharedPages(process->vm_manager);
|
||||
process->vm_manager.UnmapRange(Memory::CONFIG_MEMORY_VADDR, Memory::CONFIG_MEMORY_SIZE);
|
||||
CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
|
||||
CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
|
||||
}
|
||||
}
|
||||
|
@ -86,8 +86,6 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_blit_helper.cpp
|
||||
renderer_vulkan/vk_blit_helper.h
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
renderer_vulkan/vk_blit_screen.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_descriptor_manager.cpp
|
||||
@ -113,8 +111,6 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader_gen_spv.cpp
|
||||
renderer_vulkan/vk_shader_gen_spv.h
|
||||
renderer_vulkan/vk_shader_util.cpp
|
||||
renderer_vulkan/vk_shader_util.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
@ -206,8 +202,7 @@ if (NOT MSVC)
|
||||
endif()
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE nihstro-headers Boost::serialization glm::glm)
|
||||
target_link_libraries(video_core PRIVATE vulkan-headers vma sirit SPIRV glslang glad)
|
||||
target_link_libraries(video_core PRIVATE glad vma vulkan-headers glm::glm SPIRV glslang nihstro-headers Boost::serialization)
|
||||
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
|
@ -5,88 +5,10 @@
|
||||
#include <limits>
|
||||
#include "core/memory.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static Common::Vec4f ColorRGBA8(const u32 color) {
|
||||
const auto rgba =
|
||||
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
|
||||
return rgba / 255.0f;
|
||||
}
|
||||
|
||||
static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
|
||||
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
||||
}
|
||||
|
||||
RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
||||
bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerAccelerated::RasterizerAccelerated() {
|
||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
||||
const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
|
||||
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
|
||||
@ -179,233 +101,4 @@ void RasterizerAccelerated::ClearAll(bool flush) {
|
||||
cached_pages = {};
|
||||
}
|
||||
|
||||
RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
FlushRegion(address, size);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncDepthScale() {
|
||||
float depth_scale =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
|
||||
if (depth_scale != uniform_block_data.data.depth_scale) {
|
||||
uniform_block_data.data.depth_scale = depth_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncDepthOffset() {
|
||||
float depth_offset =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
||||
|
||||
if (depth_offset != uniform_block_data.data.depth_offset) {
|
||||
uniform_block_data.data.depth_offset = depth_offset;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncFogColor() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
uniform_block_data.data.fog_color = {
|
||||
regs.texturing.fog_color.r.Value() / 255.0f,
|
||||
regs.texturing.fog_color.g.Value() / 255.0f,
|
||||
regs.texturing.fog_color.b.Value() / 255.0f,
|
||||
};
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncProcTexNoise() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_noise_f = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_a = {
|
||||
regs.proctex_noise_u.amplitude / 4095.0f,
|
||||
regs.proctex_noise_v.amplitude / 4095.0f,
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_p = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
|
||||
};
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncProcTexBias() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_bias =
|
||||
Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
|
||||
.ToFloat32();
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncAlphaTest() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
|
||||
uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncCombinerColor() {
|
||||
auto combiner_color =
|
||||
ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
|
||||
if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
|
||||
uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncTevConstColor(std::size_t stage_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage) {
|
||||
const auto const_color = ColorRGBA8(tev_stage.const_color);
|
||||
|
||||
if (const_color == uniform_block_data.data.const_color[stage_index]) {
|
||||
return;
|
||||
}
|
||||
|
||||
uniform_block_data.data.const_color[stage_index] = const_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncGlobalAmbient() {
|
||||
auto color = LightColor(Pica::g_state.regs.lighting.global_ambient);
|
||||
if (color != uniform_block_data.data.lighting_global_ambient) {
|
||||
uniform_block_data.data.lighting_global_ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightSpecular0(int light_index) {
|
||||
auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
|
||||
uniform_block_data.data.light_src[light_index].specular_0 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightSpecular1(int light_index) {
|
||||
auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_1) {
|
||||
uniform_block_data.data.light_src[light_index].specular_1 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightDiffuse(int light_index) {
|
||||
auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
|
||||
if (color != uniform_block_data.data.light_src[light_index].diffuse) {
|
||||
uniform_block_data.data.light_src[light_index].diffuse = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightAmbient(int light_index) {
|
||||
auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
|
||||
if (color != uniform_block_data.data.light_src[light_index].ambient) {
|
||||
uniform_block_data.data.light_src[light_index].ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightPosition(int light_index) {
|
||||
const Common::Vec3f position = {
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
|
||||
|
||||
if (position != uniform_block_data.data.light_src[light_index].position) {
|
||||
uniform_block_data.data.light_src[light_index].position = position;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightSpotDirection(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
const auto spot_direction = Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f};
|
||||
|
||||
if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
|
||||
uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) {
|
||||
float dist_atten_bias =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
|
||||
.ToFloat32();
|
||||
|
||||
if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) {
|
||||
float dist_atten_scale =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
|
||||
.ToFloat32();
|
||||
|
||||
if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncShadowBias() {
|
||||
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
|
||||
float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
|
||||
float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
|
||||
|
||||
if (constant != uniform_block_data.data.shadow_bias_constant ||
|
||||
linear != uniform_block_data.data.shadow_bias_linear) {
|
||||
uniform_block_data.data.shadow_bias_constant = constant;
|
||||
uniform_block_data.data.shadow_bias_linear = linear;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncShadowTextureBias() {
|
||||
int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
|
||||
if (bias != uniform_block_data.data.shadow_texture_bias) {
|
||||
uniform_block_data.data.shadow_texture_bias = bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -3,137 +3,19 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class RasterizerAccelerated : public RasterizerInterface {
|
||||
public:
|
||||
RasterizerAccelerated();
|
||||
virtual ~RasterizerAccelerated() = default;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
||||
|
||||
void ClearAll(bool flush) override;
|
||||
|
||||
protected:
|
||||
/// Syncs the depth scale to match the PICA register
|
||||
void SyncDepthScale();
|
||||
|
||||
/// Syncs the depth offset to match the PICA register
|
||||
void SyncDepthOffset();
|
||||
|
||||
/// Syncs the fog states to match the PICA register
|
||||
void SyncFogColor();
|
||||
|
||||
/// Sync the procedural texture noise configuration to match the PICA register
|
||||
void SyncProcTexNoise();
|
||||
|
||||
/// Sync the procedural texture bias configuration to match the PICA register
|
||||
void SyncProcTexBias();
|
||||
|
||||
/// Syncs the alpha test states to match the PICA register
|
||||
void SyncAlphaTest();
|
||||
|
||||
/// Syncs the TEV combiner color buffer to match the PICA register
|
||||
void SyncCombinerColor();
|
||||
|
||||
/// Syncs the TEV constant color to match the PICA register
|
||||
void SyncTevConstColor(std::size_t tev_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage);
|
||||
|
||||
/// Syncs the lighting global ambient color to match the PICA register
|
||||
void SyncGlobalAmbient();
|
||||
|
||||
/// Syncs the specified light's specular 0 color to match the PICA register
|
||||
void SyncLightSpecular0(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 1 color to match the PICA register
|
||||
void SyncLightSpecular1(int light_index);
|
||||
|
||||
/// Syncs the specified light's diffuse color to match the PICA register
|
||||
void SyncLightDiffuse(int light_index);
|
||||
|
||||
/// Syncs the specified light's ambient color to match the PICA register
|
||||
void SyncLightAmbient(int light_index);
|
||||
|
||||
/// Syncs the specified light's position to match the PICA register
|
||||
void SyncLightPosition(int light_index);
|
||||
|
||||
/// Syncs the specified spot light direcition to match the PICA register
|
||||
void SyncLightSpotDirection(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation bias to match the PICA register
|
||||
void SyncLightDistanceAttenuationBias(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation scale to match the PICA register
|
||||
void SyncLightDistanceAttenuationScale(int light_index);
|
||||
|
||||
/// Syncs the shadow rendering bias to match the PICA register
|
||||
void SyncShadowBias();
|
||||
|
||||
/// Syncs the shadow texture bias to match the PICA register
|
||||
void SyncShadowTextureBias();
|
||||
|
||||
protected:
|
||||
/// Structure that keeps tracks of the uniform state
|
||||
struct UniformBlockData {
|
||||
Pica::Shader::UniformData data{};
|
||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
||||
bool lighting_lut_dirty_any = true;
|
||||
bool fog_lut_dirty = true;
|
||||
bool proctex_noise_lut_dirty = true;
|
||||
bool proctex_color_map_dirty = true;
|
||||
bool proctex_alpha_map_dirty = true;
|
||||
bool proctex_lut_dirty = true;
|
||||
bool proctex_diff_lut_dirty = true;
|
||||
bool dirty = true;
|
||||
};
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
protected:
|
||||
private:
|
||||
std::array<u16, 0x30000> cached_pages{};
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
bool shader_dirty = true;
|
||||
|
||||
UniformBlockData uniform_block_data{};
|
||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||
lighting_lut_data{};
|
||||
std::array<Common::Vec2f, 128> fog_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_color_map_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_lut_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
|
||||
};
|
||||
} // namespace VideoCore
|
||||
|
@ -908,7 +908,7 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||
|
||||
const auto staging = runtime.FindStaging(
|
||||
const auto& staging = runtime.FindStaging(
|
||||
load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true);
|
||||
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(load_info.addr);
|
||||
if (!source_ptr) [[unlikely]] {
|
||||
@ -939,7 +939,7 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
const u32 flush_end = boost::icl::last_next(interval);
|
||||
ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
|
||||
|
||||
const auto staging = runtime.FindStaging(
|
||||
const auto& staging = runtime.FindStaging(
|
||||
flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false);
|
||||
const BufferTextureCopy download = {.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
|
@ -91,7 +91,8 @@ public:
|
||||
u32 fill_size = 0;
|
||||
|
||||
public:
|
||||
std::vector<std::weak_ptr<Watcher>> watchers;
|
||||
u32 watcher_count = 0;
|
||||
std::array<std::weak_ptr<Watcher>, 8> watchers;
|
||||
};
|
||||
|
||||
template <class S>
|
||||
@ -189,7 +190,7 @@ template <class S>
|
||||
auto SurfaceBase<S>::CreateWatcher() -> std::shared_ptr<Watcher> {
|
||||
auto weak_ptr = reinterpret_cast<S*>(this)->weak_from_this();
|
||||
auto watcher = std::make_shared<Watcher>(std::move(weak_ptr));
|
||||
watchers.push_back(watcher);
|
||||
watchers[watcher_count++] = watcher;
|
||||
return watcher;
|
||||
}
|
||||
|
||||
@ -211,7 +212,8 @@ void SurfaceBase<S>::UnlinkAllWatcher() {
|
||||
}
|
||||
}
|
||||
|
||||
watchers.clear();
|
||||
watchers = {};
|
||||
watcher_count = 0;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -13,11 +13,6 @@ FrameDumperOpenGL::FrameDumperOpenGL(VideoDumper::Backend& video_dumper_,
|
||||
Frontend::EmuWindow& emu_window)
|
||||
: video_dumper(video_dumper_), context(emu_window.CreateSharedContext()) {}
|
||||
|
||||
FrameDumperOpenGL::~FrameDumperOpenGL() {
|
||||
if (present_thread.joinable())
|
||||
present_thread.join();
|
||||
}
|
||||
|
||||
bool FrameDumperOpenGL::IsDumping() const {
|
||||
return video_dumper.IsDumping();
|
||||
}
|
||||
@ -27,22 +22,19 @@ Layout::FramebufferLayout FrameDumperOpenGL::GetLayout() const {
|
||||
}
|
||||
|
||||
void FrameDumperOpenGL::StartDumping() {
|
||||
if (present_thread.joinable())
|
||||
present_thread.join();
|
||||
|
||||
present_thread = std::thread(&FrameDumperOpenGL::PresentLoop, this);
|
||||
present_thread = std::jthread([&](std::stop_token stop_token) { PresentLoop(stop_token); });
|
||||
}
|
||||
|
||||
void FrameDumperOpenGL::StopDumping() {
|
||||
stop_requested.store(true, std::memory_order_relaxed);
|
||||
present_thread.request_stop();
|
||||
}
|
||||
|
||||
void FrameDumperOpenGL::PresentLoop() {
|
||||
void FrameDumperOpenGL::PresentLoop(std::stop_token stop_token) {
|
||||
const auto scope = context->Acquire();
|
||||
InitializeOpenGLObjects();
|
||||
|
||||
const auto& layout = GetLayout();
|
||||
while (!stop_requested.exchange(false)) {
|
||||
while (!stop_token.stop_requested()) {
|
||||
auto frame = mailbox->TryGetPresentFrame(200);
|
||||
if (!frame) {
|
||||
continue;
|
||||
|
@ -29,7 +29,7 @@ class RendererOpenGL;
|
||||
class FrameDumperOpenGL {
|
||||
public:
|
||||
explicit FrameDumperOpenGL(VideoDumper::Backend& video_dumper, Frontend::EmuWindow& emu_window);
|
||||
~FrameDumperOpenGL();
|
||||
~FrameDumperOpenGL() = default;
|
||||
|
||||
bool IsDumping() const;
|
||||
Layout::FramebufferLayout GetLayout() const;
|
||||
@ -41,12 +41,11 @@ public:
|
||||
private:
|
||||
void InitializeOpenGLObjects();
|
||||
void CleanupOpenGLObjects();
|
||||
void PresentLoop();
|
||||
void PresentLoop(std::stop_token stop_token);
|
||||
|
||||
VideoDumper::Backend& video_dumper;
|
||||
std::unique_ptr<Frontend::GraphicsContext> context;
|
||||
std::thread present_thread;
|
||||
std::atomic_bool stop_requested{false};
|
||||
std::jthread present_thread;
|
||||
|
||||
// PBOs used to dump frames faster
|
||||
std::array<OGLBuffer, 2> pbos;
|
||||
|
@ -10,7 +10,7 @@ namespace OpenGL {
|
||||
enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 };
|
||||
|
||||
enum class DriverBug {
|
||||
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
|
||||
// AMD drivers sometimes freeze when one shader stage is changed but not the others.
|
||||
ShaderStageChangeFreeze = 1 << 0,
|
||||
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
|
@ -20,26 +20,39 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
|
||||
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
|
||||
static bool IsVendorAmd() {
|
||||
const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
static bool IsVendorIntel() {
|
||||
std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
return gpu_vendor == "Intel Inc.";
|
||||
}
|
||||
#endif
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver)
|
||||
: driver{driver}, runtime{driver}, res_cache{*this, runtime},
|
||||
shader_program_manager{emu_window, driver, !driver.IsOpenGLES()},
|
||||
vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE},
|
||||
uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE},
|
||||
index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE},
|
||||
texture_buffer{GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE},
|
||||
texture_lf_buffer{GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE} {
|
||||
: driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()),
|
||||
vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
|
||||
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
|
||||
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
|
||||
texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
|
||||
texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
|
||||
|
||||
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
||||
state.clip_distance[0] = true;
|
||||
|
||||
// Create a 1x1 clear texture to use in the NULL case,
|
||||
// instead of OpenGL's default of solid black
|
||||
default_texture.Create();
|
||||
glGenTextures(1, &default_texture);
|
||||
glBindTexture(GL_TEXTURE_2D, default_texture);
|
||||
// For some reason alpha 0 wraps around to 1.0, so use 1/255 instead
|
||||
u8 framebuffer_data[4] = {0, 0, 0, 1};
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
|
||||
@ -58,6 +71,19 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
|
||||
sw_vao.Create();
|
||||
hw_vao.Create();
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
|
||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||
uniform_block_data.lighting_lut_dirty_any = true;
|
||||
|
||||
uniform_block_data.fog_lut_dirty = true;
|
||||
|
||||
uniform_block_data.proctex_noise_lut_dirty = true;
|
||||
uniform_block_data.proctex_color_map_dirty = true;
|
||||
uniform_block_data.proctex_alpha_map_dirty = true;
|
||||
uniform_block_data.proctex_lut_dirty = true;
|
||||
uniform_block_data.proctex_diff_lut_dirty = true;
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||
uniform_size_aligned_vs =
|
||||
Common::AlignUp<std::size_t>(sizeof(VSUniformData), uniform_buffer_alignment);
|
||||
@ -122,6 +148,17 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
|
||||
state.Apply();
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
|
||||
|
||||
#ifdef __APPLE__
|
||||
if (IsVendorIntel()) {
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(
|
||||
emu_window, VideoCore::g_separable_shader_enabled, is_amd);
|
||||
} else {
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
|
||||
}
|
||||
#else
|
||||
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
|
||||
#endif
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
// Explicitly call the derived version to avoid warnings about calling virtual
|
||||
@ -133,7 +170,7 @@ RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
shader_program_manager.LoadDiskCache(stop_loading, callback);
|
||||
shader_program_manager->LoadDiskCache(stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncEntireState() {
|
||||
@ -178,6 +215,39 @@ void RasterizerOpenGL::SyncEntireState() {
|
||||
SyncShadowTextureBias();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||
GL_BYTE, // VertexAttributeFormat::BYTE
|
||||
GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
|
||||
@ -185,7 +255,50 @@ static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||
GL_FLOAT // VertexAttributeFormat::FLOAT
|
||||
};
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
res_cache.FlushRegion(address, size, nullptr);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
GLuint vs_input_index_min, GLuint vs_input_index_max) {
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
@ -265,14 +378,12 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
bool RasterizerOpenGL::SetupVertexShader() {
|
||||
MICROPROFILE_SCOPE(OpenGL_VS);
|
||||
return shader_program_manager.UseProgrammableVertexShader(Pica::g_state.regs,
|
||||
return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs,
|
||||
Pica::g_state.vs);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
|
||||
bool RasterizerOpenGL::SetupGeometryShader() {
|
||||
MICROPROFILE_SCOPE(OpenGL_GS);
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
@ -282,7 +393,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_program_manager.UseFixedGeometryShader(regs);
|
||||
shader_program_manager->UseFixedGeometryShader(regs);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -343,7 +454,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
|
||||
vertex_buffer.Unmap(vs_input_size);
|
||||
|
||||
shader_program_manager.ApplyTo(state);
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
if (is_indexed) {
|
||||
@ -378,7 +489,6 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
Draw(false, false);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
@ -606,7 +716,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
// the geometry in question.
|
||||
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
|
||||
// on the male character's face, which in the OpenGL default appear black.
|
||||
state.texture_units[texture_index].texture_2d = default_texture.handle;
|
||||
state.texture_units[texture_index].texture_2d = default_texture;
|
||||
}
|
||||
} else {
|
||||
state.texture_units[texture_index].texture_2d = 0;
|
||||
@ -670,9 +780,9 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
} else {
|
||||
state.draw.vertex_array = sw_vao.handle;
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
shader_program_manager.UseTrivialVertexShader();
|
||||
shader_program_manager.UseTrivialGeometryShader();
|
||||
shader_program_manager.ApplyTo(state);
|
||||
shader_program_manager->UseTrivialVertexShader();
|
||||
shader_program_manager->UseTrivialGeometryShader();
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
|
||||
@ -767,7 +877,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Blending
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (GLES) {
|
||||
// With GLES, we need this in the fragment shader to emulate logic operations
|
||||
shader_dirty = true;
|
||||
}
|
||||
@ -891,7 +1001,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Logic op
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (GLES) {
|
||||
// With GLES, we need this in the fragment shader to emulate logic operations
|
||||
shader_dirty = true;
|
||||
}
|
||||
@ -1229,7 +1339,6 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
void RasterizerOpenGL::FlushAll() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.FlushAll();
|
||||
@ -1251,7 +1360,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
|
||||
res_cache.InvalidateRegion(addr, size, nullptr);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
|
||||
@ -1502,7 +1610,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetShader() {
|
||||
shader_program_manager.UseFragmentShader(Pica::g_state.regs);
|
||||
shader_program_manager->UseFragmentShader(Pica::g_state.regs);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncClipEnabled() {
|
||||
@ -1545,6 +1653,24 @@ void RasterizerOpenGL::SyncCullMode() {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncDepthScale() {
|
||||
float depth_scale =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
if (depth_scale != uniform_block_data.data.depth_scale) {
|
||||
uniform_block_data.data.depth_scale = depth_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncDepthOffset() {
|
||||
float depth_offset =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
||||
if (depth_offset != uniform_block_data.data.depth_offset) {
|
||||
uniform_block_data.data.depth_offset = depth_offset;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncBlendEnabled() {
|
||||
state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1);
|
||||
}
|
||||
@ -1574,11 +1700,56 @@ void RasterizerOpenGL::SyncBlendColor() {
|
||||
state.blend.color.alpha = blend_color[3];
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncFogColor() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
uniform_block_data.data.fog_color = {
|
||||
regs.texturing.fog_color.r.Value() / 255.0f,
|
||||
regs.texturing.fog_color.g.Value() / 255.0f,
|
||||
regs.texturing.fog_color.b.Value() / 255.0f,
|
||||
};
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexNoise() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_noise_f = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_a = {
|
||||
regs.proctex_noise_u.amplitude / 4095.0f,
|
||||
regs.proctex_noise_v.amplitude / 4095.0f,
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_p = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
|
||||
};
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexBias() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_bias =
|
||||
Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
|
||||
.ToFloat32();
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncAlphaTest() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
|
||||
uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLogicOp() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
|
||||
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (GLES) {
|
||||
if (!regs.framebuffer.output_merger.alphablend_enable) {
|
||||
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
@ -1591,7 +1762,7 @@ void RasterizerOpenGL::SyncLogicOp() {
|
||||
|
||||
void RasterizerOpenGL::SyncColorWriteMask() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (driver.IsOpenGLES()) {
|
||||
if (GLES) {
|
||||
if (!regs.framebuffer.output_merger.alphablend_enable) {
|
||||
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
@ -1657,6 +1828,131 @@ void RasterizerOpenGL::SyncDepthTest() {
|
||||
: GL_ALWAYS;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncCombinerColor() {
|
||||
auto combiner_color =
|
||||
PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
|
||||
if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
|
||||
uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage) {
|
||||
const auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
|
||||
|
||||
if (const_color == uniform_block_data.data.const_color[stage_index]) {
|
||||
return;
|
||||
}
|
||||
|
||||
uniform_block_data.data.const_color[stage_index] = const_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGlobalAmbient() {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
|
||||
if (color != uniform_block_data.data.lighting_global_ambient) {
|
||||
uniform_block_data.data.lighting_global_ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
|
||||
uniform_block_data.data.light_src[light_index].specular_0 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_1) {
|
||||
uniform_block_data.data.light_src[light_index].specular_1 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
|
||||
if (color != uniform_block_data.data.light_src[light_index].diffuse) {
|
||||
uniform_block_data.data.light_src[light_index].diffuse = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightAmbient(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
|
||||
if (color != uniform_block_data.data.light_src[light_index].ambient) {
|
||||
uniform_block_data.data.light_src[light_index].ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightPosition(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
const Common::Vec3f position = {Pica::float16::FromRaw(light.x).ToFloat32(),
|
||||
Pica::float16::FromRaw(light.y).ToFloat32(),
|
||||
Pica::float16::FromRaw(light.z).ToFloat32()};
|
||||
|
||||
if (position != uniform_block_data.data.light_src[light_index].position) {
|
||||
uniform_block_data.data.light_src[light_index].position = position;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightSpotDirection(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
const auto spot_direction =
|
||||
Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f};
|
||||
|
||||
if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
|
||||
uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
float dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
|
||||
|
||||
if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
float dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
|
||||
|
||||
if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncShadowBias() {
|
||||
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
|
||||
GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
|
||||
GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
|
||||
|
||||
if (constant != uniform_block_data.data.shadow_bias_constant ||
|
||||
linear != uniform_block_data.data.shadow_bias_linear) {
|
||||
uniform_block_data.data.shadow_bias_constant = constant;
|
||||
uniform_block_data.data.shadow_bias_linear = linear;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncShadowTextureBias() {
|
||||
GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1;
|
||||
if (bias != uniform_block_data.data.shadow_texture_bias) {
|
||||
uniform_block_data.data.shadow_texture_bias = bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
||||
constexpr std::size_t max_size =
|
||||
sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
|
||||
|
@ -3,13 +3,17 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
@ -30,6 +34,8 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
void DrawTriangles() override;
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
@ -73,6 +79,48 @@ private:
|
||||
bool supress_mipmap_for_cube = false;
|
||||
};
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
/// Syncs the clip enabled status to match the PICA register
|
||||
void SyncClipEnabled();
|
||||
|
||||
@ -85,6 +133,12 @@ private:
|
||||
/// Syncs the cull mode to match the PICA register
|
||||
void SyncCullMode();
|
||||
|
||||
/// Syncs the depth scale to match the PICA register
|
||||
void SyncDepthScale();
|
||||
|
||||
/// Syncs the depth offset to match the PICA register
|
||||
void SyncDepthOffset();
|
||||
|
||||
/// Syncs the blend enabled status to match the PICA register
|
||||
void SyncBlendEnabled();
|
||||
|
||||
@ -94,6 +148,18 @@ private:
|
||||
/// Syncs the blend color to match the PICA register
|
||||
void SyncBlendColor();
|
||||
|
||||
/// Syncs the fog states to match the PICA register
|
||||
void SyncFogColor();
|
||||
|
||||
/// Sync the procedural texture noise configuration to match the PICA register
|
||||
void SyncProcTexNoise();
|
||||
|
||||
/// Sync the procedural texture bias configuration to match the PICA register
|
||||
void SyncProcTexBias();
|
||||
|
||||
/// Syncs the alpha test states to match the PICA register
|
||||
void SyncAlphaTest();
|
||||
|
||||
/// Syncs the logic op states to match the PICA register
|
||||
void SyncLogicOp();
|
||||
|
||||
@ -112,6 +178,46 @@ private:
|
||||
/// Syncs the depth test states to match the PICA register
|
||||
void SyncDepthTest();
|
||||
|
||||
/// Syncs the TEV combiner color buffer to match the PICA register
|
||||
void SyncCombinerColor();
|
||||
|
||||
/// Syncs the TEV constant color to match the PICA register
|
||||
void SyncTevConstColor(std::size_t tev_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage);
|
||||
|
||||
/// Syncs the lighting global ambient color to match the PICA register
|
||||
void SyncGlobalAmbient();
|
||||
|
||||
/// Syncs the specified light's specular 0 color to match the PICA register
|
||||
void SyncLightSpecular0(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 1 color to match the PICA register
|
||||
void SyncLightSpecular1(int light_index);
|
||||
|
||||
/// Syncs the specified light's diffuse color to match the PICA register
|
||||
void SyncLightDiffuse(int light_index);
|
||||
|
||||
/// Syncs the specified light's ambient color to match the PICA register
|
||||
void SyncLightAmbient(int light_index);
|
||||
|
||||
/// Syncs the specified light's position to match the PICA register
|
||||
void SyncLightPosition(int light_index);
|
||||
|
||||
/// Syncs the specified spot light direcition to match the PICA register
|
||||
void SyncLightSpotDirection(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation bias to match the PICA register
|
||||
void SyncLightDistanceAttenuationBias(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation scale to match the PICA register
|
||||
void SyncLightDistanceAttenuationScale(int light_index);
|
||||
|
||||
/// Syncs the shadow rendering bias to match the PICA register
|
||||
void SyncShadowBias();
|
||||
|
||||
/// Syncs the shadow texture bias to match the PICA register
|
||||
void SyncShadowTextureBias();
|
||||
|
||||
/// Syncs and uploads the lighting, fog and proctex LUTs
|
||||
void SyncAndUploadLUTs();
|
||||
void SyncAndUploadLUTsLF();
|
||||
@ -125,6 +231,15 @@ private:
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
|
||||
GLuint vs_input_index_max);
|
||||
@ -138,15 +253,41 @@ private:
|
||||
private:
|
||||
Driver& driver;
|
||||
OpenGLState state;
|
||||
GLuint default_texture;
|
||||
|
||||
TextureRuntime runtime;
|
||||
RasterizerCache res_cache;
|
||||
ShaderProgramManager shader_program_manager;
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
|
||||
bool is_amd;
|
||||
bool shader_dirty = true;
|
||||
|
||||
struct {
|
||||
UniformData data;
|
||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty;
|
||||
bool lighting_lut_dirty_any;
|
||||
bool fog_lut_dirty;
|
||||
bool proctex_noise_lut_dirty;
|
||||
bool proctex_color_map_dirty;
|
||||
bool proctex_alpha_map_dirty;
|
||||
bool proctex_lut_dirty;
|
||||
bool proctex_diff_lut_dirty;
|
||||
bool dirty;
|
||||
} uniform_block_data = {};
|
||||
|
||||
std::unique_ptr<ShaderProgramManager> shader_program_manager;
|
||||
|
||||
// They shall be big enough for about one frame.
|
||||
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
OGLVertexArray sw_vao; // VAO for software shader draw
|
||||
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
|
||||
std::array<bool, 16> hw_vao_enabled_attributes{};
|
||||
|
||||
OGLTexture default_texture;
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
OGLStreamBuffer vertex_buffer;
|
||||
OGLStreamBuffer uniform_buffer;
|
||||
@ -163,6 +304,15 @@ private:
|
||||
OGLTexture texture_buffer_lut_lf;
|
||||
OGLTexture texture_buffer_lut_rg;
|
||||
OGLTexture texture_buffer_lut_rgba;
|
||||
|
||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||
lighting_lut_data{};
|
||||
std::array<Common::Vec2f, 128> fog_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_color_map_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_lut_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_driver.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace OpenGL {
|
||||
@ -327,13 +327,12 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
|
||||
|
||||
class ShaderProgramManager::Impl {
|
||||
public:
|
||||
explicit Impl(bool separable)
|
||||
: separable(separable), programmable_vertex_shaders(separable),
|
||||
explicit Impl(bool separable, bool is_amd)
|
||||
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
|
||||
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
|
||||
fragment_shaders(separable), disk_cache(separable) {
|
||||
if (separable) {
|
||||
if (separable)
|
||||
pipeline.Create();
|
||||
}
|
||||
}
|
||||
|
||||
struct ShaderTuple {
|
||||
@ -362,19 +361,25 @@ public:
|
||||
static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
|
||||
"ShaderTuple layout changed!");
|
||||
|
||||
bool is_amd;
|
||||
bool separable;
|
||||
|
||||
ShaderTuple current;
|
||||
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
TrivialVertexShader trivial_vertex_shader;
|
||||
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
|
||||
FragmentShaders fragment_shaders;
|
||||
std::unordered_map<u64, OGLProgram> program_cache;
|
||||
OGLPipeline pipeline;
|
||||
ShaderDiskCache disk_cache;
|
||||
};
|
||||
|
||||
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable)
|
||||
: impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver} {}
|
||||
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
|
||||
bool is_amd)
|
||||
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
|
||||
|
||||
ShaderProgramManager::~ShaderProgramManager() = default;
|
||||
|
||||
@ -436,7 +441,10 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
|
||||
|
||||
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
|
||||
if (impl->separable) {
|
||||
if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
|
||||
if (impl->is_amd) {
|
||||
// Without this reseting, AMD sometimes freezes when one stage is changed but not
|
||||
// for the others. On the other hand, including this reset seems to introduce memory
|
||||
// leak in Intel Graphics.
|
||||
glUseProgramStages(
|
||||
impl->pipeline.handle,
|
||||
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
|
||||
|
@ -107,13 +107,12 @@ static_assert(sizeof(VSUniformData) == 1856,
|
||||
static_assert(sizeof(VSUniformData) < 16384,
|
||||
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class Driver;
|
||||
class OpenGLState;
|
||||
|
||||
/// A class that manage different shader stages and configures them with given config data.
|
||||
class ShaderProgramManager {
|
||||
public:
|
||||
ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable);
|
||||
ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
|
||||
~ShaderProgramManager();
|
||||
|
||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
@ -134,7 +133,7 @@ public:
|
||||
private:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
||||
Frontend::EmuWindow& emu_window;
|
||||
Driver& driver;
|
||||
};
|
||||
} // namespace OpenGL
|
||||
|
@ -12,21 +12,32 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback, bool prefer_coherent)
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
|
||||
bool prefer_coherent)
|
||||
: gl_target(target), buffer_size(size) {
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
|
||||
GLsizeiptr allocate_size = size;
|
||||
if (array_buffer_for_amd) {
|
||||
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
|
||||
// crash.
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
GLbitfield flags =
|
||||
(readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
||||
glBufferStorage(gl_target, size, nullptr, flags);
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
||||
glBufferStorage(gl_target, allocate_size, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(glMapBufferRange(
|
||||
gl_target, 0, buffer_size, flags | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0)));
|
||||
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
||||
} else {
|
||||
glBufferData(gl_target, size, nullptr, GL_STREAM_DRAW);
|
||||
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
|
||||
@ -67,8 +78,8 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
|
||||
|
||||
if (invalidate || !persistent) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : 0) | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0) |
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
|
||||
@ -81,7 +92,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (!coherent && !readback && size > 0) {
|
||||
if (!coherent && size > 0) {
|
||||
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,8 @@ namespace OpenGL {
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback = false, bool prefer_coherent = false);
|
||||
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
|
||||
bool prefer_coherent = false);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
@ -32,7 +33,6 @@ private:
|
||||
OGLBuffer gl_buffer;
|
||||
GLenum gl_target;
|
||||
|
||||
bool readback = false;
|
||||
bool coherent = false;
|
||||
bool persistent = false;
|
||||
|
||||
|
@ -36,7 +36,7 @@ static constexpr std::array COLOR_TUPLES_OES = {
|
||||
FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
|
||||
};
|
||||
|
||||
[[nodiscard]] GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
|
||||
GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
case VideoCore::SurfaceType::Texture:
|
||||
@ -53,13 +53,9 @@ static constexpr std::array COLOR_TUPLES_OES = {
|
||||
return GL_COLOR_BUFFER_BIT;
|
||||
}
|
||||
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(Driver& driver)
|
||||
: driver{driver}, filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()},
|
||||
downloader_es{false}, upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE},
|
||||
download_buffer{GL_PIXEL_PACK_BUFFER, DOWNLOAD_BUFFER_SIZE, true} {
|
||||
: driver{driver}, downloader_es{false}, filterer{Settings::values.texture_filter_name,
|
||||
VideoCore::GetResolutionScaleFactor()} {
|
||||
|
||||
read_fbo.Create();
|
||||
draw_fbo.Create();
|
||||
@ -74,14 +70,51 @@ TextureRuntime::TextureRuntime(Driver& driver)
|
||||
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
|
||||
}
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||
const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER;
|
||||
const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT;
|
||||
auto& search = upload ? upload_buffers : download_buffers;
|
||||
|
||||
return StagingData{.buffer = buffer.GetHandle(),
|
||||
.size = size,
|
||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||
.buffer_offset = offset};
|
||||
// Attempt to find a free buffer that fits the requested data
|
||||
for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) {
|
||||
if (!upload || it->IsFree()) {
|
||||
it->mapped = std::span{it->mapped.data(), size};
|
||||
return *it;
|
||||
}
|
||||
}
|
||||
|
||||
OGLBuffer buffer{};
|
||||
buffer.Create();
|
||||
|
||||
glBindBuffer(target, buffer.handle);
|
||||
|
||||
// Allocate a new buffer and map the data to the host
|
||||
std::byte* data = nullptr;
|
||||
if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) {
|
||||
const GLbitfield storage =
|
||||
upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT;
|
||||
glBufferStorageEXT(target, size, nullptr,
|
||||
storage | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT);
|
||||
data = reinterpret_cast<std::byte*>(glMapBufferRange(
|
||||
target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT));
|
||||
} else if (driver.HasArbBufferStorage()) {
|
||||
const GLbitfield storage =
|
||||
upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT;
|
||||
glBufferStorage(target, size, nullptr,
|
||||
storage | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
||||
data = reinterpret_cast<std::byte*>(glMapBufferRange(
|
||||
target, 0, size, access | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT));
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
glBindBuffer(target, 0);
|
||||
|
||||
StagingBuffer staging = {
|
||||
.buffer = std::move(buffer), .mapped = std::span{data, size}, .size = size};
|
||||
|
||||
const auto& it = search.emplace(std::move(staging));
|
||||
return *it;
|
||||
}
|
||||
|
||||
const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) {
|
||||
@ -121,14 +154,12 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
|
||||
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
const u32 levels = std::log2(std::max(width, height)) + 1;
|
||||
const GLenum target =
|
||||
type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
|
||||
|
||||
// Attempt to recycle an unused texture
|
||||
const VideoCore::HostTextureTag key = {
|
||||
.format = format, .width = width, .height = height, .layers = layers};
|
||||
|
||||
// Attempt to recycle an unused texture
|
||||
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
|
||||
OGLTexture texture = std::move(it->second);
|
||||
texture_recycler.erase(it);
|
||||
@ -146,7 +177,8 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(target, texture.handle);
|
||||
|
||||
glTexStorage2D(target, levels, tuple.internal_format, width, height);
|
||||
glTexStorage2D(target, std::bit_width(std::max(width, height)), tuple.internal_format, width,
|
||||
height);
|
||||
|
||||
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
@ -338,41 +370,42 @@ Surface::~Surface() {
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
|
||||
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Upload);
|
||||
|
||||
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
|
||||
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
|
||||
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle);
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledUpload(upload, staging);
|
||||
} else {
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, texture.handle);
|
||||
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, upload.texture_rect.left,
|
||||
upload.texture_rect.bottom, upload.texture_rect.GetWidth(),
|
||||
upload.texture_rect.GetHeight(), tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
runtime.upload_buffer.Unmap(staging.size);
|
||||
upload.texture_rect.GetHeight(), tuple.format, tuple.type, 0);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
|
||||
// Lock the staging buffer until glTexSubImage completes
|
||||
staging.Lock();
|
||||
InvalidateAllWatcher();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
|
||||
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Download);
|
||||
|
||||
// Ensure no bad interactions with GL_PACK_ALIGNMENT
|
||||
@ -382,11 +415,11 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle);
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledDownload(download, staging);
|
||||
ScaledDownload(download);
|
||||
} else {
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type,
|
||||
texture);
|
||||
@ -394,17 +427,15 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
|
||||
download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
|
||||
tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
|
||||
runtime.download_buffer.Unmap(staging.size);
|
||||
tuple.format, tuple.type, 0);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
}
|
||||
|
||||
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload,
|
||||
const StagingData& staging) {
|
||||
const StagingBuffer& staging) {
|
||||
const u32 rect_width = upload.texture_rect.GetWidth();
|
||||
const u32 rect_height = upload.texture_rect.GetHeight();
|
||||
const auto scaled_rect = upload.texture_rect * res_scale;
|
||||
@ -437,7 +468,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload,
|
||||
}
|
||||
}
|
||||
|
||||
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
|
||||
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
|
||||
const u32 rect_width = download.texture_rect.GetWidth();
|
||||
const u32 rect_height = download.texture_rect.GetHeight();
|
||||
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
|
||||
@ -467,14 +498,11 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, const
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
if (driver.IsOpenGLES()) {
|
||||
const auto& downloader_es = runtime.GetDownloaderES();
|
||||
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height, rect_width,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height,
|
||||
rect_width, 0);
|
||||
} else {
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0);
|
||||
}
|
||||
|
||||
runtime.download_buffer.Unmap(staging.size);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/texture_downloader_es.h"
|
||||
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
|
||||
|
||||
@ -20,11 +19,35 @@ struct FormatTuple {
|
||||
GLenum type;
|
||||
};
|
||||
|
||||
struct StagingData {
|
||||
GLuint buffer;
|
||||
u32 size = 0;
|
||||
std::span<std::byte> mapped{};
|
||||
GLintptr buffer_offset = 0;
|
||||
struct StagingBuffer {
|
||||
OGLBuffer buffer{};
|
||||
mutable OGLSync buffer_lock{};
|
||||
mutable std::span<std::byte> mapped{};
|
||||
u32 size{};
|
||||
|
||||
bool operator<(const StagingBuffer& other) const {
|
||||
return size < other.size;
|
||||
}
|
||||
|
||||
/// Returns true if the buffer does not take part in pending transfer operations
|
||||
bool IsFree() const {
|
||||
if (buffer_lock) {
|
||||
GLint status;
|
||||
glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status);
|
||||
return status == GL_SIGNALED;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Prevents the runtime from reusing the buffer until the transfer operation is complete
|
||||
void Lock() const {
|
||||
if (buffer_lock) {
|
||||
buffer_lock.Release();
|
||||
}
|
||||
|
||||
buffer_lock.Create();
|
||||
}
|
||||
};
|
||||
|
||||
class Driver;
|
||||
@ -42,7 +65,7 @@ public:
|
||||
~TextureRuntime() = default;
|
||||
|
||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||
StagingData FindStaging(u32 size, bool upload);
|
||||
const StagingBuffer& FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Returns the OpenGL format tuple associated with the provided pixel format
|
||||
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
|
||||
@ -99,12 +122,17 @@ private:
|
||||
|
||||
private:
|
||||
Driver& driver;
|
||||
TextureFilterer filterer;
|
||||
TextureDownloaderES downloader_es;
|
||||
TextureFilterer filterer;
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
std::unordered_multimap<VideoCore::HostTextureTag, OGLTexture> texture_recycler;
|
||||
OGLStreamBuffer upload_buffer, download_buffer;
|
||||
|
||||
// Staging buffers stored in increasing size
|
||||
std::multiset<StagingBuffer> upload_buffers;
|
||||
std::multiset<StagingBuffer> download_buffers;
|
||||
OGLFramebuffer read_fbo, draw_fbo;
|
||||
|
||||
// Recycled textures to reduce driver allocation overhead
|
||||
std::unordered_multimap<VideoCore::HostTextureTag, OGLTexture> texture_recycler;
|
||||
};
|
||||
|
||||
class Surface : public VideoCore::SurfaceBase<Surface> {
|
||||
@ -113,10 +141,10 @@ public:
|
||||
~Surface() override;
|
||||
|
||||
/// Uploads pixel data in staging to a rectangle region of the surface texture
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);
|
||||
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging);
|
||||
|
||||
/// Returns the bpp of the internal surface format
|
||||
u32 GetInternalBytesPerPixel() const {
|
||||
@ -125,10 +153,10 @@ public:
|
||||
|
||||
private:
|
||||
/// Uploads pixel data to scaled texture
|
||||
void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
|
||||
void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);
|
||||
|
||||
/// Downloads scaled image by downscaling the requested rectangle
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
|
||||
|
||||
private:
|
||||
TextureRuntime& runtime;
|
||||
|
@ -141,6 +141,7 @@ void main(){
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, MAX_SIZE, MAX_SIZE);
|
||||
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
|
||||
r32ui_renderbuffer.handle);
|
||||
glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
|
||||
|
||||
state.draw.draw_framebuffer = depth16_fbo.handle;
|
||||
state.renderbuffer = r16_renderbuffer.handle;
|
||||
|
@ -149,12 +149,14 @@ struct ScreenRectVertex {
|
||||
Common::Vec2f tex_coord;
|
||||
};
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192;
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
|
||||
: RendererBase{window}, instance{window, Settings::values.physical_device},
|
||||
scheduler{instance, renderpass_cache, *this},
|
||||
: RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
|
||||
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
|
||||
runtime{instance, scheduler, renderpass_cache, desc_manager},
|
||||
swapchain{instance, scheduler, renderpass_cache},
|
||||
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
||||
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
|
||||
|
||||
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
|
||||
@ -887,19 +889,12 @@ void RendererVulkan::SwapBuffers() {
|
||||
const auto& layout = render_window.GetFramebufferLayout();
|
||||
PrepareRendertarget();
|
||||
|
||||
const auto RecreateSwapchain = [&] {
|
||||
scheduler.Finish();
|
||||
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||
// Create swapchain if needed
|
||||
if (swapchain.NeedsRecreation()) {
|
||||
swapchain.Create(layout.width, layout.height);
|
||||
};
|
||||
}
|
||||
|
||||
do {
|
||||
if (swapchain.NeedsRecreation()) {
|
||||
RecreateSwapchain();
|
||||
}
|
||||
scheduler.WaitWorker();
|
||||
swapchain.AcquireNextImage();
|
||||
} while (swapchain.NeedsRecreation());
|
||||
swapchain.AcquireNextImage();
|
||||
|
||||
scheduler.Record([layout](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
const vk::Viewport viewport = {.x = 0.0f,
|
||||
@ -926,8 +921,9 @@ void RendererVulkan::SwapBuffers() {
|
||||
DrawScreens(layout, false);
|
||||
|
||||
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
|
||||
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
|
||||
const VkSemaphore present_ready = swapchain.GetPresentReadySemaphore();
|
||||
scheduler.Flush(present_ready, image_acquired);
|
||||
scheduler.WaitWorker();
|
||||
swapchain.Present();
|
||||
|
||||
m_current_frame++;
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "common/math_util.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
@ -23,6 +23,42 @@ struct FramebufferLayout;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Structure used for storing information about the textures for each 3DS screen
|
||||
struct TextureInfo {
|
||||
ImageAlloc alloc;
|
||||
u32 width;
|
||||
u32 height;
|
||||
GPU::Regs::PixelFormat format;
|
||||
};
|
||||
|
||||
/// Structure used for storing information about the display target for each 3DS screen
|
||||
struct ScreenInfo {
|
||||
ImageAlloc* display_texture = nullptr;
|
||||
Common::Rectangle<float> display_texcoords;
|
||||
TextureInfo texture;
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
// Uniform data used for presenting the 3DS screens
|
||||
struct PresentUniformData {
|
||||
glm::mat4 modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
|
||||
// Returns an immutable byte view of the uniform data
|
||||
auto AsBytes() const {
|
||||
return std::as_bytes(std::span{this, 1});
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
|
||||
|
||||
constexpr u32 PRESENT_PIPELINES = 3;
|
||||
|
||||
class RasterizerVulkan;
|
||||
|
||||
class RendererVulkan : public RendererBase {
|
||||
@ -73,10 +109,25 @@ private:
|
||||
DescriptorManager desc_manager;
|
||||
TextureRuntime runtime;
|
||||
Swapchain swapchain;
|
||||
StreamBuffer vertex_buffer;
|
||||
RasterizerVulkan rasterizer;
|
||||
|
||||
// Display information for top and bottom screens respectively
|
||||
// Present pipelines (Normal, Anaglyph, Interlaced)
|
||||
vk::PipelineLayout present_pipeline_layout;
|
||||
vk::DescriptorSetLayout present_descriptor_layout;
|
||||
vk::DescriptorUpdateTemplate present_update_template;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
|
||||
std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
|
||||
std::array<vk::Sampler, 2> present_samplers;
|
||||
vk::ShaderModule present_vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
u32 current_sampler = 0;
|
||||
|
||||
/// Display information for top and bottom screens respectively
|
||||
std::array<ScreenInfo, 3> screen_infos{};
|
||||
PresentUniformData draw_info{};
|
||||
vk::ClearColorValue clear_color{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,152 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <glm/glm.hpp>
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Instance;
|
||||
class RasterizerVulkan;
|
||||
class Scheduler;
|
||||
class Swapchain;
|
||||
class RenderpassCache;
|
||||
class DescriptorManager;
|
||||
|
||||
struct ScreenInfo {
|
||||
vk::ImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
Common::Rectangle<f32> texcoords;
|
||||
};
|
||||
|
||||
using Images = std::array<vk::Image, 3>;
|
||||
|
||||
struct PresentUniformData {
|
||||
glm::mat4 modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
|
||||
// Returns an immutable byte view of the uniform data
|
||||
auto AsBytes() const {
|
||||
return std::as_bytes(std::span{this, 1});
|
||||
}
|
||||
};
|
||||
|
||||
constexpr u32 PRESENT_PIPELINES = 3;
|
||||
|
||||
class BlitScreen {
|
||||
public:
|
||||
explicit BlitScreen(Frontend::EmuWindow& render_window, const Instance& instance,
|
||||
Scheduler& scheduler, Swapchain& swapchain, RenderpassCache& renderpass_cache,
|
||||
DescriptorManager& desc_manager, std::array<ScreenInfo, 3>& screen_infos);
|
||||
~BlitScreen();
|
||||
|
||||
void Recreate();
|
||||
|
||||
[[nodiscard]] vk::Semaphore Draw(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
const vk::Framebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, vk::Extent2D render_area,
|
||||
bool use_accelerated, u32 screen);
|
||||
|
||||
[[nodiscard]] vk::Semaphore DrawToSwapchain(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
|
||||
vk::Extent2D extent);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
|
||||
vk::Extent2D extent, vk::RenderPass& rd);
|
||||
|
||||
private:
|
||||
void CreateStaticResources();
|
||||
void CreateShaders();
|
||||
void CreateSemaphores();
|
||||
void CreateDescriptorPool();
|
||||
void CreateRenderPass();
|
||||
vk::RenderPass CreateRenderPassImpl(vk::Format format, bool is_present = true);
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreatePipelineLayout();
|
||||
void CreateGraphicsPipeline();
|
||||
void CreateSampler();
|
||||
|
||||
void CreateDynamicResources();
|
||||
void CreateFramebuffers();
|
||||
|
||||
void RefreshResources(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void ReleaseRawImages();
|
||||
void CreateStagingBuffer(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void CreateRawImages(const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
|
||||
struct BufferData;
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, bool use_accelerated) const;
|
||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
void SetVertexData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
|
||||
private:
|
||||
Frontend::EmuWindow& render_window;
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
Swapchain& swapchain;
|
||||
RenderpassCache& renderpass_cache;
|
||||
DescriptorManager& desc_manager;
|
||||
Memory::MemorySystem& memory;
|
||||
std::array<ScreenInfo, 3>& screen_infos;
|
||||
std::size_t image_count;
|
||||
PresentUniformData draw_info{};
|
||||
StreamBuffer vertex_buffer;
|
||||
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::DescriptorUpdateTemplate update_template;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> pipelines;
|
||||
std::array<vk::DescriptorSet, PRESENT_PIPELINES> descriptor_sets;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> shaders;
|
||||
std::array<vk::Sampler, 2> samplers;
|
||||
vk::ShaderModule vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
u32 current_sampler = 0;
|
||||
|
||||
vk::RenderPass renderpass;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
std::vector<u64> resource_ticks;
|
||||
std::vector<vk::Semaphore> semaphores;
|
||||
std::vector<Images> raw_images;
|
||||
GPU::Regs::PixelFormat pixel_format;
|
||||
u32 raw_width;
|
||||
u32 raw_height;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/common_types.h"
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
|
@ -13,8 +13,6 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::DynamicLoader Instance::dl;
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
@ -42,7 +40,7 @@ vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
}
|
||||
}
|
||||
|
||||
Instance::Instance(bool validation, bool dump_command_buffers) {
|
||||
Instance::Instance() {
|
||||
// Fetch instance independant function pointers
|
||||
auto vkGetInstanceProcAddr =
|
||||
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
@ -54,19 +52,7 @@ Instance::Instance(bool validation, bool dump_command_buffers) {
|
||||
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.apiVersion = VK_API_VERSION_1_0};
|
||||
|
||||
u32 layer_count = 0;
|
||||
std::array<const char*, 2> layers;
|
||||
|
||||
if (validation) {
|
||||
layers[layer_count++] = "VK_LAYER_KHRONOS_validation";
|
||||
}
|
||||
if (dump_command_buffers) {
|
||||
layers[layer_count++] = "VK_LAYER_LUNARG_api_dump";
|
||||
}
|
||||
|
||||
const vk::InstanceCreateInfo instance_info = {.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = layer_count,
|
||||
.ppEnabledLayerNames = layers.data()};
|
||||
const vk::InstanceCreateInfo instance_info = {.pApplicationInfo = &application_info};
|
||||
|
||||
instance = vk::createInstance(instance_info);
|
||||
|
||||
@ -255,7 +241,6 @@ bool Instance::CreateDevice() {
|
||||
|
||||
// Not having geometry shaders will cause issues with accelerated rendering.
|
||||
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
|
||||
device_features = available;
|
||||
if (!available.geometryShader) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Geometry shaders not availabe! Accelerated rendering not possible!");
|
||||
@ -287,6 +272,7 @@ bool Instance::CreateDevice() {
|
||||
};
|
||||
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
@ -31,7 +31,7 @@ struct FormatTraits {
|
||||
/// The global Vulkan instance
|
||||
class Instance {
|
||||
public:
|
||||
Instance(bool validation = false, bool dump_command_buffers = false);
|
||||
Instance(); ///< Portable constructor used to query physical devices
|
||||
Instance(Frontend::EmuWindow& window, u32 physical_device_index);
|
||||
~Instance();
|
||||
|
||||
@ -85,11 +85,6 @@ public:
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Returns true if logic operations need shader emulation
|
||||
bool NeedsLogicOpEmulation() const {
|
||||
return !device_features.logicOp;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_timeline_semaphore is supported
|
||||
bool IsTimelineSemaphoreSupported() const {
|
||||
return timeline_semaphores;
|
||||
@ -144,13 +139,12 @@ private:
|
||||
void CreateAllocator();
|
||||
|
||||
private:
|
||||
static vk::DynamicLoader dl;
|
||||
vk::DynamicLoader dl;
|
||||
vk::Device device;
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::Instance instance;
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceProperties device_properties;
|
||||
vk::PhysicalDeviceFeatures device_features;
|
||||
VmaAllocator allocator;
|
||||
vk::Queue present_queue;
|
||||
vk::Queue graphics_queue;
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include <filesystem>
|
||||
#include "common/common_paths.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -17,33 +17,37 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
|
||||
switch (format) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
return sizeof(float) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
return sizeof(u16) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
return sizeof(u8) * size;
|
||||
u32 AttribBytes(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
return sizeof(float) * attrib.size;
|
||||
case AttribType::Int:
|
||||
return sizeof(u32) * attrib.size;
|
||||
case AttribType::Short:
|
||||
return sizeof(u16) * attrib.size;
|
||||
case AttribType::Byte:
|
||||
case AttribType::Ubyte:
|
||||
return sizeof(u8) * attrib.size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
constexpr std::array attribute_formats = {
|
||||
std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat},
|
||||
std::array{vk::Format::eR32Sint, vk::Format::eR32G32Sint, vk::Format::eR32G32B32Sint,
|
||||
vk::Format::eR32G32B32A32Sint},
|
||||
std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint,
|
||||
vk::Format::eR16G16B16A16Sint},
|
||||
std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
|
||||
vk::Format::eR8G8B8A8Sint},
|
||||
std::array{vk::Format::eR8Uint, vk::Format::eR8G8Uint, vk::Format::eR8G8B8Uint,
|
||||
vk::Format::eR8G8B8A8Uint},
|
||||
std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint,
|
||||
vk::Format::eR16G16B16A16Sint},
|
||||
std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat}};
|
||||
vk::Format::eR8G8B8A8Uint}};
|
||||
|
||||
ASSERT(size <= 4);
|
||||
return attribute_formats[static_cast<u32>(format)][size - 1];
|
||||
ASSERT(attrib.size <= 4);
|
||||
return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
@ -62,21 +66,6 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, const Instance& instance) {
|
||||
static std::unordered_map<vk::Format, bool> format_support_cache;
|
||||
|
||||
vk::PhysicalDevice physical_device = instance.GetPhysicalDevice();
|
||||
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
|
||||
auto [it, new_format] = format_support_cache.try_emplace(format, false);
|
||||
if (new_format) {
|
||||
LOG_INFO(Render_Vulkan, "Quering support for format {}", vk::to_string(format));
|
||||
const vk::FormatFeatureFlags features = physical_device.getFormatProperties(format).bufferFeatures;
|
||||
it->second = (features & vk::FormatFeatureFlagBits::eVertexBuffer) == vk::FormatFeatureFlagBits::eVertexBuffer;
|
||||
}
|
||||
|
||||
return it->second;
|
||||
};
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
|
||||
@ -100,11 +89,7 @@ PipelineCache::~PipelineCache() {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fragment_shaders_glsl.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fragment_shaders_spv.shaders) {
|
||||
for (auto& [key, module] : fragment_shaders.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
@ -181,7 +166,8 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
|
||||
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
|
||||
? offsetof(PipelineInfo, rasterization)
|
||||
: offsetof(PipelineInfo, dynamic);
|
||||
: offsetof(PipelineInfo, depth_stencil) +
|
||||
offsetof(DepthStencilState, stencil_reference);
|
||||
|
||||
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
|
||||
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
|
||||
@ -202,30 +188,22 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout) {
|
||||
PicaVSConfig config{regs.vs, setup};
|
||||
|
||||
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
|
||||
for (u32 i = 0; i < layout.attribute_count; i++) {
|
||||
const auto& attrib = layout.attributes[i];
|
||||
const u32 location = attrib.location.Value();
|
||||
const bool is_supported = IsAttribFormatSupported(attrib, instance);
|
||||
ASSERT(is_supported || attrib.size == 3);
|
||||
|
||||
config.state.attrib_types[location] = attrib.type.Value();
|
||||
config.state.emulated_attrib_locations[location] =
|
||||
is_supported ? 0 : emulated_attrib_loc++;
|
||||
config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
|
||||
}
|
||||
|
||||
auto [handle, result] =
|
||||
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
if (!handle) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
|
||||
return false;
|
||||
}
|
||||
scheduler.Record([this, config, setup = std::move(setup)](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
auto [handle, result] =
|
||||
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
if (!handle) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
current_shaders[ProgramType::VS] = handle;
|
||||
shader_hashes[ProgramType::VS] = hash;
|
||||
shader_hashes[ProgramType::VS] = config.Hash();
|
||||
});
|
||||
|
||||
return true;
|
||||
@ -242,8 +220,8 @@ void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
const PicaFixedGSConfig gs_config{regs};
|
||||
|
||||
scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
vk::ShaderModule handle = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::GS] = handle;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
});
|
||||
@ -256,21 +234,12 @@ void PipelineCache::UseTrivialGeometryShader() {
|
||||
});
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_FragmentGeneration, "Vulkan", "Fragment Shader Compilation", MP_RGB(255, 100, 100));
|
||||
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
|
||||
const PicaFSConfig config{regs, instance};
|
||||
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
|
||||
|
||||
scheduler.Record([this, config](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
MICROPROFILE_SCOPE(Vulkan_FragmentGeneration);
|
||||
|
||||
vk::ShaderModule handle{};
|
||||
if (Settings::values.spirv_shader_gen) {
|
||||
handle = fragment_shaders_spv.Get(config, instance.GetDevice());
|
||||
} else {
|
||||
handle = fragment_shaders_glsl.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
}
|
||||
|
||||
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::FS] = handle;
|
||||
shader_hashes[ProgramType::FS] = config.Hash();
|
||||
});
|
||||
@ -305,17 +274,27 @@ void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
|
||||
}
|
||||
|
||||
void PipelineCache::SetViewport(float x, float y, float width, float height) {
|
||||
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
|
||||
const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
|
||||
scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setViewport(0, viewport);
|
||||
});
|
||||
|
||||
if (viewport != current_viewport || is_dirty) {
|
||||
scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setViewport(0, viewport);
|
||||
});
|
||||
current_viewport = viewport;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
|
||||
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
|
||||
const vk::Rect2D scissor{{x, y}, {width, height}};
|
||||
scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setScissor(0, scissor);
|
||||
});
|
||||
|
||||
if (scissor != current_scissor || is_dirty) {
|
||||
scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setScissor(0, scissor);
|
||||
});
|
||||
current_scissor = scissor;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
@ -323,28 +302,23 @@ void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
|
||||
PipelineInfo current = current_info;
|
||||
scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
if (info.dynamic.stencil_compare_mask !=
|
||||
current.dynamic.stencil_compare_mask ||
|
||||
if (info.depth_stencil.stencil_compare_mask !=
|
||||
current.depth_stencil.stencil_compare_mask ||
|
||||
is_dirty) {
|
||||
render_cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
info.dynamic.stencil_compare_mask);
|
||||
info.depth_stencil.stencil_compare_mask);
|
||||
}
|
||||
|
||||
if (info.dynamic.stencil_write_mask != current.dynamic.stencil_write_mask ||
|
||||
if (info.depth_stencil.stencil_write_mask != current.depth_stencil.stencil_write_mask ||
|
||||
is_dirty) {
|
||||
render_cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
info.dynamic.stencil_write_mask);
|
||||
info.depth_stencil.stencil_write_mask);
|
||||
}
|
||||
|
||||
if (info.dynamic.stencil_reference != current.dynamic.stencil_reference ||
|
||||
if (info.depth_stencil.stencil_reference != current.depth_stencil.stencil_reference ||
|
||||
is_dirty) {
|
||||
render_cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
info.dynamic.stencil_reference);
|
||||
}
|
||||
|
||||
if (info.dynamic.blend_color != current.dynamic.blend_color || is_dirty) {
|
||||
const Common::Vec4f color = PicaToVK::ColorRGBA8(info.dynamic.blend_color);
|
||||
render_cmdbuf.setBlendConstants(color.AsArray());
|
||||
info.depth_stencil.stencil_reference);
|
||||
}
|
||||
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
@ -419,12 +393,10 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
.stage = ToVkShaderStage(i), .module = shader, .pName = "main"};
|
||||
}
|
||||
|
||||
/**
|
||||
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
|
||||
* increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
|
||||
* as the input rate. Since one instance is all we render, the shader will always read the
|
||||
* single attribute.
|
||||
**/
|
||||
// Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
|
||||
// increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
|
||||
// as the input rate. Since one instance is all we render, the shader will always read the
|
||||
// single attribute.
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
|
||||
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
|
||||
const auto& binding = info.vertex_layout.bindings[i];
|
||||
@ -435,37 +407,20 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
: vk::VertexInputRate::eVertex};
|
||||
}
|
||||
|
||||
u32 emulated_attrib_count = 0;
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES * 2> attributes;
|
||||
// Populate vertex attribute structures
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
|
||||
const VertexAttribute& attrib = info.vertex_layout.attributes[i];
|
||||
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
|
||||
const bool is_supported = IsAttribFormatSupported(attrib, instance);
|
||||
ASSERT_MSG(is_supported || attrib.size == 3);
|
||||
|
||||
attributes[i] = vk::VertexInputAttributeDescription{.location = attrib.location,
|
||||
.binding = attrib.binding,
|
||||
.format = is_supported ? format
|
||||
: ToVkAttributeFormat(attrib.type, 2),
|
||||
.offset = attrib.offset};
|
||||
|
||||
// When the requested 3-component vertex format is unsupported by the hardware
|
||||
// is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3
|
||||
// by the vertex shader.
|
||||
if (!is_supported) {
|
||||
const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++;
|
||||
LOG_WARNING(Render_Vulkan, "\nEmulating attrib {} at location {}\n", attrib.location, location);
|
||||
attributes[location] = vk::VertexInputAttributeDescription{.location = location,
|
||||
.binding = attrib.binding,
|
||||
.format = ToVkAttributeFormat(attrib.type, 1),
|
||||
.offset = attrib.offset + AttribBytes(attrib.type, 2)};
|
||||
}
|
||||
const auto& attr = info.vertex_layout.attributes[i];
|
||||
attributes[i] = vk::VertexInputAttributeDescription{.location = attr.location,
|
||||
.binding = attr.binding,
|
||||
.format = ToVkAttributeFormat(attr),
|
||||
.offset = attr.offset};
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count + emulated_attrib_count,
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
|
||||
.pVertexAttributeDescriptions = attributes.data()};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
@ -494,7 +449,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = !info.blending.blend_enable.Value() && !instance.NeedsLogicOpEmulation(),
|
||||
.logicOpEnable = !info.blending.blend_enable.Value(),
|
||||
.logicOp = PicaToVK::LogicOp(info.blending.logic_op.Value()),
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
@ -505,7 +460,11 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
|
||||
const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {1, 1}};
|
||||
|
||||
vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {.negativeOneToOne =
|
||||
true};
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.pNext = &depth_clip_control,
|
||||
.viewportCount = 1,
|
||||
.pViewports = &viewport,
|
||||
.scissorCount = 1,
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/shader/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -29,16 +29,23 @@ union RasterizationState {
|
||||
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
|
||||
};
|
||||
|
||||
union DepthStencilState {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<1, 1, u32> depth_write_enable;
|
||||
BitField<2, 1, u32> stencil_test_enable;
|
||||
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
|
||||
struct DepthStencilState {
|
||||
union {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<1, 1, u32> depth_write_enable;
|
||||
BitField<2, 1, u32> stencil_test_enable;
|
||||
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
|
||||
};
|
||||
|
||||
// These are dynamic state so keep them separate
|
||||
u8 stencil_reference;
|
||||
u8 stencil_compare_mask;
|
||||
u8 stencil_write_mask;
|
||||
};
|
||||
|
||||
union BlendingState {
|
||||
@ -54,13 +61,6 @@ union BlendingState {
|
||||
BitField<27, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
};
|
||||
|
||||
struct DynamicState {
|
||||
u32 blend_color = 0;
|
||||
u8 stencil_reference;
|
||||
u8 stencil_compare_mask;
|
||||
u8 stencil_write_mask;
|
||||
};
|
||||
|
||||
union VertexBinding {
|
||||
u16 value = 0;
|
||||
BitField<0, 4, u16> binding;
|
||||
@ -72,7 +72,7 @@ union VertexAttribute {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, u32> binding;
|
||||
BitField<4, 4, u32> location;
|
||||
BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type;
|
||||
BitField<8, 3, AttribType> type;
|
||||
BitField<11, 3, u32> size;
|
||||
BitField<14, 11, u32> offset;
|
||||
};
|
||||
@ -94,14 +94,13 @@ struct PipelineInfo {
|
||||
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
|
||||
RasterizationState rasterization{};
|
||||
DepthStencilState depth_stencil{};
|
||||
DynamicState dynamic;
|
||||
|
||||
[[nodiscard]] bool IsDepthWriteEnabled() const noexcept {
|
||||
bool IsDepthWriteEnabled() const {
|
||||
const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8;
|
||||
const bool depth_write =
|
||||
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
|
||||
const bool stencil_write = has_stencil && depth_stencil.stencil_test_enable &&
|
||||
dynamic.stencil_write_mask != 0;
|
||||
depth_stencil.stencil_write_mask != 0;
|
||||
|
||||
return depth_write || stencil_write;
|
||||
}
|
||||
@ -116,12 +115,9 @@ using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache<PicaVSConfig,
|
||||
using FixedGeometryShaders = Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule,
|
||||
&Compile, &GenerateFixedGeometryShader>;
|
||||
|
||||
using FragmentShadersGLSL =
|
||||
using FragmentShaders =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
|
||||
|
||||
using FragmentShadersSPV =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &CompileSPV, &GenerateFragmentShaderSPV>;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class RenderpassCache;
|
||||
@ -129,6 +125,7 @@ class DescriptorManager;
|
||||
|
||||
/**
|
||||
* Stores a collection of rasterizer pipelines used during rendering.
|
||||
* In addition handles descriptor set management.
|
||||
*/
|
||||
class PipelineCache {
|
||||
public:
|
||||
@ -212,6 +209,8 @@ private:
|
||||
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
|
||||
vk::Pipeline current_pipeline{};
|
||||
PipelineInfo current_info{};
|
||||
vk::Viewport current_viewport{};
|
||||
vk::Rect2D current_scissor{};
|
||||
|
||||
// Bound shader modules
|
||||
enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 };
|
||||
@ -220,8 +219,7 @@ private:
|
||||
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
FragmentShadersGLSL fragment_shaders_glsl;
|
||||
FragmentShadersSPV fragment_shaders_spv;
|
||||
FragmentShaders fragment_shaders;
|
||||
vk::ShaderModule trivial_vertex_shader;
|
||||
};
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK
|
||||
#define VK_USE_PLATFORM_METAL_EXT
|
||||
#else
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR
|
||||
@ -49,7 +50,9 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
}
|
||||
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
|
||||
.display = static_cast<wl_display*>(window_info.display_connection),
|
||||
.surface = static_cast<wl_surface*>(window_info.render_surface)};
|
||||
@ -60,33 +63,10 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif defined(VK_USE_PLATFORM_METAL_EXT)
|
||||
if (window_info.type == Frontend::WindowSystemType::MacOS) {
|
||||
const vk::MetalSurfaceCreateInfoEXT macos_ci = {
|
||||
.pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createMetalSurfaceEXT(&macos_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize MacOS surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
if (window_info.type == Frontend::WindowSystemType::Android) {
|
||||
vk::AndroidSurfaceCreateInfoKHR android_ci = {
|
||||
.window = reinterpret_cast<ANativeWindow*>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createAndroidSurfaceKHR(&android_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Android surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return surface;
|
||||
@ -118,14 +98,6 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif defined(VK_USE_PLATFORM_METAL_EXT)
|
||||
case Frontend::WindowSystemType::MacOS:
|
||||
extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
case Frontend::WindowSystemType::Android:
|
||||
extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_pipeline.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
@ -21,6 +20,74 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
||||
bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This maps to the following layout in GLSL code:
|
||||
* layout(location = 0) in vec4 vert_position;
|
||||
* layout(location = 1) in vec4 vert_color;
|
||||
* layout(location = 2) in vec2 vert_texcoord0;
|
||||
* layout(location = 3) in vec2 vert_texcoord1;
|
||||
* layout(location = 4) in vec2 vert_texcoord2;
|
||||
* layout(location = 5) in float vert_texcoord0_w;
|
||||
* layout(location = 6) in vec4 vert_normquat;
|
||||
* layout(location = 7) in vec3 vert_view;
|
||||
*/
|
||||
constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
|
||||
VertexLayout layout{};
|
||||
layout.attribute_count = 8;
|
||||
layout.binding_count = 1;
|
||||
|
||||
// Define binding
|
||||
layout.bindings[0].binding.Assign(0);
|
||||
layout.bindings[0].fixed.Assign(0);
|
||||
layout.bindings[0].stride.Assign(sizeof(HardwareVertex));
|
||||
|
||||
// Define attributes
|
||||
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
||||
u32 offset = 0;
|
||||
|
||||
for (u32 loc = 0; loc < 8; loc++) {
|
||||
VertexAttribute& attribute = layout.attributes[loc];
|
||||
attribute.binding.Assign(0);
|
||||
attribute.location.Assign(loc);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(sizes[loc]);
|
||||
offset += sizes[loc] * sizeof(float);
|
||||
}
|
||||
|
||||
return layout;
|
||||
}
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
@ -65,6 +132,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
|
||||
null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1);
|
||||
|
||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||
|
||||
uniform_buffer_alignment = instance.UniformMinAlignment();
|
||||
uniform_size_aligned_vs =
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
||||
@ -72,8 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||
|
||||
// Define vertex layout for software shaders
|
||||
MakeSoftwareVertexLayout();
|
||||
pipeline_info.vertex_layout = software_layout;
|
||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
||||
|
||||
const SamplerInfo default_sampler_info = {
|
||||
.mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear,
|
||||
@ -109,6 +177,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
}
|
||||
|
||||
RasterizerVulkan::~RasterizerVulkan() {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Finish();
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
@ -175,28 +244,108 @@ void RasterizerVulkan::SyncFixedState() {
|
||||
SyncDepthWriteMask();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
static constexpr std::array vs_attrib_types = {
|
||||
AttribType::Byte, // VertexAttributeFormat::BYTE
|
||||
AttribType::Ubyte, // VertexAttributeFormat::UBYTE
|
||||
AttribType::Short, // VertexAttributeFormat::SHORT
|
||||
AttribType::Float // VertexAttributeFormat::FLOAT
|
||||
};
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
res_cache.FlushRegion(address, size, nullptr);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||
u32 vs_input_index_max) {
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
||||
|
||||
/**
|
||||
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
* how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
|
||||
* address containing the vertex array data. The data for each attribute loader (i) can be found
|
||||
* by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
|
||||
* as something analogous to Vulkan bindings. The user can store attributes in separate loaders
|
||||
* or interleave them in the same loader.
|
||||
**/
|
||||
// The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
// how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
|
||||
// address containing the vertex array data. The data for each attribute loader (i) can be found
|
||||
// by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
|
||||
// as something analogous to Vulkan bindings. The user can store attributes in separate loaders
|
||||
// or interleave them in the same loader.
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
|
||||
|
||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||
layout.attribute_count = 0;
|
||||
layout.binding_count = 0;
|
||||
enable_attributes.fill(false);
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
VertexLayout layout{};
|
||||
|
||||
u32 buffer_offset = 0;
|
||||
u32 buffer_offset = array_offset;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count == 0 || loader.byte_count == 0) {
|
||||
continue;
|
||||
@ -212,14 +361,16 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
offset, vertex_attributes.GetElementSizeInBytes(attribute_index));
|
||||
|
||||
const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index);
|
||||
const Pica::PipelineRegs::VertexAttributeFormat format =
|
||||
vertex_attributes.GetFormat(attribute_index);
|
||||
const u32 attrib_format =
|
||||
static_cast<u32>(vertex_attributes.GetFormat(attribute_index));
|
||||
const AttribType type = vs_attrib_types[attrib_format];
|
||||
|
||||
// Define the attribute
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
attribute.location.Assign(input_reg);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(format);
|
||||
attribute.type.Assign(type);
|
||||
attribute.size.Assign(size);
|
||||
|
||||
enable_attributes[input_reg] = true;
|
||||
@ -236,10 +387,10 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
const PAddr data_addr =
|
||||
base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
|
||||
const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
|
||||
const u32 data_size = loader.byte_count * vertex_num;
|
||||
u32 data_size = loader.byte_count * vertex_num;
|
||||
|
||||
res_cache.FlushRegion(data_addr, data_size);
|
||||
std::memcpy(array_ptr + buffer_offset, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
|
||||
res_cache.FlushRegion(data_addr, data_size, nullptr);
|
||||
std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
|
||||
|
||||
// Create the binding associated with this loader
|
||||
VertexBinding& binding = layout.bindings[layout.binding_count];
|
||||
@ -248,40 +399,20 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
binding.stride.Assign(loader.byte_count);
|
||||
|
||||
// Keep track of the binding offsets so we can bind the vertex buffer later
|
||||
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
|
||||
buffer_offset += Common::AlignUp(data_size, 16);
|
||||
binding_offsets[layout.binding_count++] = buffer_offset;
|
||||
data_size = Common::AlignUp(data_size, 16);
|
||||
array_ptr += data_size;
|
||||
buffer_offset += data_size;
|
||||
}
|
||||
|
||||
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
|
||||
vertex_buffer.Commit(buffer_offset);
|
||||
|
||||
// Assign the rest of the attributes to the last binding
|
||||
SetupFixedAttribs();
|
||||
|
||||
// Bind the generated bindings
|
||||
scheduler.Record([this, layout = pipeline_info.vertex_layout,
|
||||
offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
std::array<vk::Buffer, 16> buffers;
|
||||
buffers.fill(vertex_buffer.GetHandle());
|
||||
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
|
||||
offsets.data());
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupFixedAttribs() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||
|
||||
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
|
||||
|
||||
// Reserve the last binding for fixed and default attributes
|
||||
// Place the default attrib at offset zero for easy access
|
||||
static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
|
||||
std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
|
||||
constexpr Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
|
||||
u32 offset = sizeof(Common::Vec4f);
|
||||
std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
|
||||
array_ptr += sizeof(Common::Vec4f);
|
||||
|
||||
// Find all fixed attributes and assign them to the last binding
|
||||
u32 offset = sizeof(Common::Vec4f);
|
||||
for (std::size_t i = 0; i < 16; i++) {
|
||||
if (vertex_attributes.IsDefaultAttribute(i)) {
|
||||
const u32 reg = regs.vs.GetRegisterForAttribute(i);
|
||||
@ -291,42 +422,56 @@ void RasterizerVulkan::SetupFixedAttribs() {
|
||||
attr.w.ToFloat32()};
|
||||
|
||||
const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
|
||||
std::memcpy(fixed_ptr + offset, data.data(), data_size);
|
||||
std::memcpy(array_ptr, data.data(), data_size);
|
||||
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
attribute.location.Assign(reg);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(4);
|
||||
|
||||
offset += data_size;
|
||||
array_ptr += data_size;
|
||||
enable_attributes[reg] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Loop one more time to find unused attributes and assign them to the default one
|
||||
// If the attribute is just disabled, shove the default attribute to avoid
|
||||
// errors if the shader ever decides to use it.
|
||||
// This needs to happen because i = 2 might be assigned to location = 3 so the loop
|
||||
// above would skip setting it
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
// If the attribute is just disabled, shove the default attribute to avoid
|
||||
// errors if the shader ever decides to use it. The pipeline cache can discard
|
||||
// this if needed since it has access to the usage mask from the code generator
|
||||
if (!enable_attributes[i]) {
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
attribute.location.Assign(i);
|
||||
attribute.offset.Assign(0);
|
||||
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(4);
|
||||
}
|
||||
}
|
||||
|
||||
// Define the fixed+default binding
|
||||
VertexBinding& binding = layout.bindings[layout.binding_count];
|
||||
binding.binding.Assign(layout.binding_count++);
|
||||
binding.binding.Assign(layout.binding_count);
|
||||
binding.fixed.Assign(1);
|
||||
binding.stride.Assign(offset);
|
||||
binding_offsets[layout.binding_count++] = buffer_offset;
|
||||
buffer_offset += offset;
|
||||
|
||||
vertex_buffer.Commit(offset);
|
||||
pipeline_info.vertex_layout = layout;
|
||||
vertex_buffer.Commit(buffer_offset - array_offset);
|
||||
|
||||
scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
std::array<vk::Buffer, 16> buffers;
|
||||
buffers.fill(vertex_buffer.GetHandle());
|
||||
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
|
||||
offsets.data());
|
||||
});
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
@ -342,7 +487,7 @@ bool RasterizerVulkan::SetupGeometryShader() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
|
||||
LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader");
|
||||
LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -367,7 +512,7 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
|
||||
bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
|
||||
auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
|
||||
|
||||
if (vs_input_size > VERTEX_BUFFER_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size);
|
||||
@ -461,10 +606,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
auto [color_surface, depth_surface, surfaces_rect] =
|
||||
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
|
||||
|
||||
if (!color_surface && shadow_rendering) {
|
||||
return true;
|
||||
}
|
||||
|
||||
pipeline_info.color_attachment =
|
||||
color_surface ? color_surface->pixel_format : VideoCore::PixelFormat::Invalid;
|
||||
pipeline_info.depth_attachment =
|
||||
@ -671,7 +812,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
// Sync and bind the shader
|
||||
if (shader_dirty) {
|
||||
pipeline_cache.UseFragmentShader(regs);
|
||||
SetShader();
|
||||
shader_dirty = false;
|
||||
}
|
||||
|
||||
@ -740,7 +881,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
||||
} else {
|
||||
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
|
||||
pipeline_info.vertex_layout = software_layout;
|
||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
||||
pipeline_cache.UseTrivialVertexShader();
|
||||
pipeline_cache.UseTrivialGeometryShader();
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
@ -822,9 +963,6 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Blending
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
|
||||
if (instance.NeedsLogicOpEmulation()) {
|
||||
shader_dirty = true;
|
||||
}
|
||||
SyncBlendEnabled();
|
||||
break;
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
|
||||
@ -945,9 +1083,6 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
|
||||
|
||||
// Logic op
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
|
||||
if (instance.NeedsLogicOpEmulation()) {
|
||||
shader_dirty = true;
|
||||
}
|
||||
SyncLogicOp();
|
||||
break;
|
||||
|
||||
@ -1473,33 +1608,6 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::MakeSoftwareVertexLayout() {
|
||||
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
||||
|
||||
software_layout = VertexLayout{
|
||||
.binding_count = 1,
|
||||
.attribute_count = 8
|
||||
};
|
||||
|
||||
for (u32 i = 0; i < software_layout.binding_count; i++) {
|
||||
VertexBinding& binding = software_layout.bindings[i];
|
||||
binding.binding.Assign(i);
|
||||
binding.fixed.Assign(0);
|
||||
binding.stride.Assign(sizeof(HardwareVertex));
|
||||
}
|
||||
|
||||
u32 offset = 0;
|
||||
for (u32 i = 0; i < 8; i++) {
|
||||
VertexAttribute& attribute = software_layout.attributes[i];
|
||||
attribute.binding.Assign(0);
|
||||
attribute.location.Assign(i);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
|
||||
attribute.size.Assign(sizes[i]);
|
||||
offset += sizes[i] * sizeof(float);
|
||||
}
|
||||
}
|
||||
|
||||
vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
|
||||
const bool use_border_color = instance.IsCustomBorderColorSupported() &&
|
||||
(info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder ||
|
||||
@ -1565,6 +1673,10 @@ void RasterizerVulkan::FlushBuffers() {
|
||||
texture_lf_buffer.Flush();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetShader() {
|
||||
pipeline_cache.UseFragmentShader(Pica::g_state.regs);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncClipEnabled() {
|
||||
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||
}
|
||||
@ -1584,6 +1696,26 @@ void RasterizerVulkan::SyncCullMode() {
|
||||
pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncDepthScale() {
|
||||
float depth_scale =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
|
||||
if (depth_scale != uniform_block_data.data.depth_scale) {
|
||||
uniform_block_data.data.depth_scale = depth_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncDepthOffset() {
|
||||
float depth_offset =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
||||
|
||||
if (depth_offset != uniform_block_data.data.depth_offset) {
|
||||
uniform_block_data.data.depth_offset = depth_offset;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncBlendEnabled() {
|
||||
pipeline_info.blending.blend_enable.Assign(
|
||||
Pica::g_state.regs.framebuffer.output_merger.alphablend_enable);
|
||||
@ -1607,46 +1739,73 @@ void RasterizerVulkan::SyncBlendFuncs() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncBlendColor() {
|
||||
const Common::Vec4f blend_color =
|
||||
PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
|
||||
|
||||
scheduler.Record([blend_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.setBlendConstants(blend_color.AsArray());
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncFogColor() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw;
|
||||
uniform_block_data.data.fog_color = {
|
||||
regs.texturing.fog_color.r.Value() / 255.0f,
|
||||
regs.texturing.fog_color.g.Value() / 255.0f,
|
||||
regs.texturing.fog_color.b.Value() / 255.0f,
|
||||
};
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncProcTexNoise() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_noise_f = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_a = {
|
||||
regs.proctex_noise_u.amplitude / 4095.0f,
|
||||
regs.proctex_noise_v.amplitude / 4095.0f,
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_p = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
|
||||
};
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncProcTexBias() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_bias =
|
||||
Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
|
||||
.ToFloat32();
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncAlphaTest() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
|
||||
uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLogicOp() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
const bool is_logic_op_emulated =
|
||||
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
|
||||
const bool is_logic_op_noop =
|
||||
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
|
||||
if (is_logic_op_emulated && is_logic_op_noop) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
// color but allow depth write.
|
||||
pipeline_info.blending.color_write_mask.Assign(0);
|
||||
} else {
|
||||
pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
}
|
||||
pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncColorWriteMask() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const u32 color_mask = (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF;
|
||||
|
||||
const bool is_logic_op_emulated =
|
||||
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
|
||||
const bool is_logic_op_noop =
|
||||
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
|
||||
if (is_logic_op_emulated && is_logic_op_noop) {
|
||||
// Color output is disabled by logic operation. We use color write mask to skip
|
||||
// color but allow depth write. Return early to avoid overwriting this.
|
||||
return;
|
||||
}
|
||||
|
||||
pipeline_info.blending.color_write_mask.Assign(color_mask);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncStencilWriteMask() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
pipeline_info.dynamic.stencil_write_mask =
|
||||
pipeline_info.depth_stencil.stencil_write_mask =
|
||||
(regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
|
||||
? static_cast<u32>(regs.framebuffer.output_merger.stencil_test.write_mask)
|
||||
: 0;
|
||||
@ -1672,8 +1831,8 @@ void RasterizerVulkan::SyncStencilTest() {
|
||||
pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass);
|
||||
pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail);
|
||||
pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func);
|
||||
pipeline_info.dynamic.stencil_reference = stencil_test.reference_value;
|
||||
pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask;
|
||||
pipeline_info.depth_stencil.stencil_reference = stencil_test.reference_value;
|
||||
pipeline_info.depth_stencil.stencil_compare_mask = stencil_test.input_mask;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncDepthTest() {
|
||||
@ -1689,6 +1848,132 @@ void RasterizerVulkan::SyncDepthTest() {
|
||||
pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncCombinerColor() {
|
||||
auto combiner_color =
|
||||
PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
|
||||
if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
|
||||
uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage) {
|
||||
const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color);
|
||||
|
||||
if (const_color == uniform_block_data.data.const_color[stage_index]) {
|
||||
return;
|
||||
}
|
||||
|
||||
uniform_block_data.data.const_color[stage_index] = const_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncGlobalAmbient() {
|
||||
auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient);
|
||||
if (color != uniform_block_data.data.lighting_global_ambient) {
|
||||
uniform_block_data.data.lighting_global_ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightSpecular0(int light_index) {
|
||||
auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
|
||||
uniform_block_data.data.light_src[light_index].specular_0 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightSpecular1(int light_index) {
|
||||
auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_1) {
|
||||
uniform_block_data.data.light_src[light_index].specular_1 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightDiffuse(int light_index) {
|
||||
auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
|
||||
if (color != uniform_block_data.data.light_src[light_index].diffuse) {
|
||||
uniform_block_data.data.light_src[light_index].diffuse = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightAmbient(int light_index) {
|
||||
auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
|
||||
if (color != uniform_block_data.data.light_src[light_index].ambient) {
|
||||
uniform_block_data.data.light_src[light_index].ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightPosition(int light_index) {
|
||||
const Common::Vec3f position = {
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
|
||||
|
||||
if (position != uniform_block_data.data.light_src[light_index].position) {
|
||||
uniform_block_data.data.light_src[light_index].position = position;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightSpotDirection(int light_index) {
|
||||
const auto& light = Pica::g_state.regs.lighting.light[light_index];
|
||||
const auto spot_direction = Common::Vec3i{light.spot_x, light.spot_y, light.spot_z} / 2047.0f;
|
||||
|
||||
if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
|
||||
uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
|
||||
float dist_atten_bias =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
|
||||
.ToFloat32();
|
||||
|
||||
if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
|
||||
float dist_atten_scale =
|
||||
Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
|
||||
.ToFloat32();
|
||||
|
||||
if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
|
||||
uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncShadowBias() {
|
||||
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
|
||||
float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
|
||||
float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
|
||||
|
||||
if (constant != uniform_block_data.data.shadow_bias_constant ||
|
||||
linear != uniform_block_data.data.shadow_bias_linear) {
|
||||
uniform_block_data.data.shadow_bias_constant = constant;
|
||||
uniform_block_data.data.shadow_bias_linear = linear;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncShadowTextureBias() {
|
||||
int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
|
||||
if (bias != uniform_block_data.data.shadow_texture_bias) {
|
||||
uniform_block_data.data.shadow_texture_bias = bias;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncAndUploadLUTsLF() {
|
||||
constexpr std::size_t max_size =
|
||||
sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
|
||||
|
@ -4,11 +4,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
@ -83,6 +88,8 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
void DrawTriangles() override;
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
@ -112,9 +119,18 @@ private:
|
||||
/// Syncs the clip coefficients to match the PICA register
|
||||
void SyncClipCoef();
|
||||
|
||||
/// Sets the OpenGL shader in accordance with the current PICA register state
|
||||
void SetShader();
|
||||
|
||||
/// Syncs the cull mode to match the PICA register
|
||||
void SyncCullMode();
|
||||
|
||||
/// Syncs the depth scale to match the PICA register
|
||||
void SyncDepthScale();
|
||||
|
||||
/// Syncs the depth offset to match the PICA register
|
||||
void SyncDepthOffset();
|
||||
|
||||
/// Syncs the blend enabled status to match the PICA register
|
||||
void SyncBlendEnabled();
|
||||
|
||||
@ -124,6 +140,18 @@ private:
|
||||
/// Syncs the blend color to match the PICA register
|
||||
void SyncBlendColor();
|
||||
|
||||
/// Syncs the fog states to match the PICA register
|
||||
void SyncFogColor();
|
||||
|
||||
/// Sync the procedural texture noise configuration to match the PICA register
|
||||
void SyncProcTexNoise();
|
||||
|
||||
/// Sync the procedural texture bias configuration to match the PICA register
|
||||
void SyncProcTexBias();
|
||||
|
||||
/// Syncs the alpha test states to match the PICA register
|
||||
void SyncAlphaTest();
|
||||
|
||||
/// Syncs the logic op states to match the PICA register
|
||||
void SyncLogicOp();
|
||||
|
||||
@ -142,6 +170,46 @@ private:
|
||||
/// Syncs the depth test states to match the PICA register
|
||||
void SyncDepthTest();
|
||||
|
||||
/// Syncs the TEV combiner color buffer to match the PICA register
|
||||
void SyncCombinerColor();
|
||||
|
||||
/// Syncs the TEV constant color to match the PICA register
|
||||
void SyncTevConstColor(std::size_t tev_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage);
|
||||
|
||||
/// Syncs the lighting global ambient color to match the PICA register
|
||||
void SyncGlobalAmbient();
|
||||
|
||||
/// Syncs the specified light's specular 0 color to match the PICA register
|
||||
void SyncLightSpecular0(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 1 color to match the PICA register
|
||||
void SyncLightSpecular1(int light_index);
|
||||
|
||||
/// Syncs the specified light's diffuse color to match the PICA register
|
||||
void SyncLightDiffuse(int light_index);
|
||||
|
||||
/// Syncs the specified light's ambient color to match the PICA register
|
||||
void SyncLightAmbient(int light_index);
|
||||
|
||||
/// Syncs the specified light's position to match the PICA register
|
||||
void SyncLightPosition(int light_index);
|
||||
|
||||
/// Syncs the specified spot light direcition to match the PICA register
|
||||
void SyncLightSpotDirection(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation bias to match the PICA register
|
||||
void SyncLightDistanceAttenuationBias(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation scale to match the PICA register
|
||||
void SyncLightDistanceAttenuationScale(int light_index);
|
||||
|
||||
/// Syncs the shadow rendering bias to match the PICA register
|
||||
void SyncShadowBias();
|
||||
|
||||
/// Syncs the shadow texture bias to match the PICA register
|
||||
void SyncShadowTextureBias();
|
||||
|
||||
/// Syncs and uploads the lighting, fog and proctex LUTs
|
||||
void SyncAndUploadLUTs();
|
||||
void SyncAndUploadLUTsLF();
|
||||
@ -155,21 +223,27 @@ private:
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
/// Copies vertex data performing needed convertions and casts
|
||||
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
||||
|
||||
/// Setup the fixed attribute emulation in vulkan
|
||||
void SetupFixedAttribs();
|
||||
|
||||
/// Setup vertex shader for AccelerateDrawBatch
|
||||
bool SetupVertexShader();
|
||||
|
||||
/// Setup geometry shader for AccelerateDrawBatch
|
||||
bool SetupGeometryShader();
|
||||
|
||||
/// Creates the vertex layout struct used for software shader pipelines
|
||||
void MakeSoftwareVertexLayout();
|
||||
|
||||
/// Creates a new sampler object
|
||||
vk::Sampler CreateSampler(const SamplerInfo& info);
|
||||
|
||||
@ -184,14 +258,44 @@ private:
|
||||
DescriptorManager& desc_manager;
|
||||
RasterizerCache res_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
bool shader_dirty = true;
|
||||
|
||||
VertexLayout software_layout;
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||
|
||||
constexpr static VertexLayout GetVertexLayout();
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
vk::Sampler default_sampler;
|
||||
Surface null_surface;
|
||||
Surface null_storage_surface;
|
||||
|
||||
struct {
|
||||
Pica::Shader::UniformData data{};
|
||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
||||
bool lighting_lut_dirty_any = true;
|
||||
bool fog_lut_dirty = true;
|
||||
bool proctex_noise_lut_dirty = true;
|
||||
bool proctex_color_map_dirty = true;
|
||||
bool proctex_alpha_map_dirty = true;
|
||||
bool proctex_lut_dirty = true;
|
||||
bool proctex_diff_lut_dirty = true;
|
||||
bool dirty = true;
|
||||
} uniform_block_data = {};
|
||||
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
SamplerInfo texture_cube_sampler;
|
||||
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
||||
@ -206,6 +310,15 @@ private:
|
||||
std::size_t uniform_buffer_alignment;
|
||||
std::size_t uniform_size_aligned_vs;
|
||||
std::size_t uniform_size_aligned_fs;
|
||||
|
||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||
lighting_lut_data{};
|
||||
std::array<Common::Vec2f, 128> fog_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_color_map_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_lut_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -121,12 +121,11 @@ void DescriptorPool::RefreshTick() {
|
||||
}
|
||||
|
||||
void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
|
||||
LOG_INFO(Render_Vulkan, "Allocating new descriptor pool");
|
||||
vk::DescriptorPool& pool = pools.emplace_back();
|
||||
|
||||
// Choose a sane pool size good for most games
|
||||
static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
|
||||
{vk::DescriptorType::eUniformBuffer, 4096},
|
||||
{vk::DescriptorType::eUniformBuffer, 2048},
|
||||
{vk::DescriptorType::eSampledImage, 4096},
|
||||
{vk::DescriptorType::eSampler, 4096},
|
||||
{vk::DescriptorType::eUniformTexelBuffer, 2048},
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include "common/microprofile.h"
|
||||
#include "core/settings.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
@ -25,29 +25,14 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, RendererVulkan& renderer)
|
||||
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, master_semaphore{instance},
|
||||
command_pool{instance, master_semaphore}, stop_requested{false},
|
||||
use_worker_thread{Settings::values.async_command_recording} {
|
||||
Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
|
||||
: instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore} {
|
||||
AcquireNewChunk();
|
||||
AllocateWorkerCommandBuffers();
|
||||
if (use_worker_thread) {
|
||||
AcquireNewChunk();
|
||||
worker_thread = std::thread([this]() { WorkerThread(); });
|
||||
}
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() {
|
||||
stop_requested = true;
|
||||
|
||||
// Push a dummy chunk to unblock the thread
|
||||
{
|
||||
std::scoped_lock lock{work_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
|
||||
work_cv.notify_one();
|
||||
worker_thread.join();
|
||||
}
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
SubmitExecution(signal, wait);
|
||||
@ -62,10 +47,6 @@ void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
|
||||
void Scheduler::WaitWorker() {
|
||||
if (!use_worker_thread) {
|
||||
return;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
|
||||
DispatchWork();
|
||||
|
||||
@ -87,7 +68,9 @@ void Scheduler::DispatchWork() {
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::WorkerThread() {
|
||||
void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
Common::SetCurrentThreadName("Vulkan Worker Thread");
|
||||
|
||||
do {
|
||||
std::unique_ptr<CommandChunk> work;
|
||||
bool has_submit{false};
|
||||
@ -96,8 +79,8 @@ void Scheduler::WorkerThread() {
|
||||
if (work_queue.empty()) {
|
||||
wait_cv.notify_all();
|
||||
}
|
||||
work_cv.wait(lock, [this] { return !work_queue.empty() || stop_requested; });
|
||||
if (stop_requested) {
|
||||
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
|
||||
if (stop_token.stop_requested()) {
|
||||
continue;
|
||||
}
|
||||
work = std::move(work_queue.front());
|
||||
@ -111,7 +94,7 @@ void Scheduler::WorkerThread() {
|
||||
}
|
||||
std::scoped_lock reserve_lock{reserve_mutex};
|
||||
chunk_reserve.push_back(std::move(work));
|
||||
} while (!stop_requested);
|
||||
} while (!stop_token.stop_requested());
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
@ -126,16 +109,13 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
render_cmdbuf.begin(begin_info);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
|
||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||
renderer.FlushBuffers();
|
||||
const u64 signal_value = master_semaphore.NextTick();
|
||||
state = StateFlags::AllDirty;
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this]
|
||||
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Submit);
|
||||
upload_cmdbuf.end();
|
||||
render_cmdbuf.end();
|
||||
|
||||
@ -182,12 +162,8 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
|
||||
}
|
||||
});
|
||||
|
||||
if (!use_worker_thread) {
|
||||
AllocateWorkerCommandBuffers();
|
||||
} else {
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
}
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
}
|
||||
|
||||
void Scheduler::AcquireNewChunk() {
|
||||
|
@ -27,15 +27,13 @@ enum class StateFlags {
|
||||
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class RendererVulkan;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
||||
RendererVulkan& renderer);
|
||||
explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
@ -54,11 +52,6 @@ public:
|
||||
/// Records the command to the current chunk.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
if (!use_worker_thread) {
|
||||
command(render_cmdbuf, upload_cmdbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
@ -185,7 +178,7 @@ private:
|
||||
};
|
||||
|
||||
private:
|
||||
void WorkerThread();
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
|
||||
void AllocateWorkerCommandBuffers();
|
||||
|
||||
@ -195,7 +188,6 @@ private:
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
RenderpassCache& renderpass_cache;
|
||||
RendererVulkan& renderer;
|
||||
MasterSemaphore master_semaphore;
|
||||
CommandPool command_pool;
|
||||
@ -209,9 +201,7 @@ private:
|
||||
std::mutex work_mutex;
|
||||
std::condition_variable_any work_cv;
|
||||
std::condition_variable wait_cv;
|
||||
std::thread worker_thread;
|
||||
std::atomic_bool stop_requested;
|
||||
bool use_worker_thread;
|
||||
std::jthread worker_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -8,10 +8,8 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
using Pica::FramebufferRegs;
|
||||
@ -101,29 +99,25 @@ out gl_PerVertex {
|
||||
return out;
|
||||
}
|
||||
|
||||
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
|
||||
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
|
||||
PicaFSConfig res{};
|
||||
|
||||
state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
|
||||
auto& state = res.state;
|
||||
|
||||
state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable
|
||||
state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
|
||||
|
||||
state.depthmap_enable = regs.rasterizer.depthmap_enable;
|
||||
|
||||
state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
|
||||
? regs.framebuffer.output_merger.alpha_test.func.Value()
|
||||
: FramebufferRegs::CompareFunc::Always);
|
||||
: FramebufferRegs::CompareFunc::Always;
|
||||
|
||||
state.texture0_type.Assign(regs.texturing.texture0.type);
|
||||
state.texture0_type = regs.texturing.texture0.type;
|
||||
|
||||
state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
|
||||
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
|
||||
|
||||
// Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
|
||||
const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
|
||||
!Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
|
||||
|
||||
state.emulate_logic_op.Assign(emulate_logic_op);
|
||||
if (emulate_logic_op) {
|
||||
state.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
|
||||
} else {
|
||||
state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp);
|
||||
}
|
||||
state.alphablend_enable = {};
|
||||
state.logic_op = {};
|
||||
|
||||
// Copy relevant tev stages fields.
|
||||
// We don't sync const_color here because of the high variance, it is a
|
||||
@ -138,95 +132,95 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
|
||||
}
|
||||
|
||||
state.fog_mode.Assign(regs.texturing.fog_mode);
|
||||
state.fog_flip.Assign(regs.texturing.fog_flip != 0);
|
||||
state.fog_mode = regs.texturing.fog_mode;
|
||||
state.fog_flip = regs.texturing.fog_flip != 0;
|
||||
|
||||
state.combiner_buffer_input.Assign(regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
|
||||
state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
|
||||
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
|
||||
<< 4);
|
||||
<< 4;
|
||||
|
||||
// Fragment lighting
|
||||
|
||||
state.lighting.enable.Assign(!regs.lighting.disable);
|
||||
state.lighting.src_num.Assign(regs.lighting.max_light_index + 1);
|
||||
state.lighting.enable = !regs.lighting.disable;
|
||||
state.lighting.src_num = regs.lighting.max_light_index + 1;
|
||||
|
||||
for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) {
|
||||
u32 num = regs.lighting.light_enable.GetNum(light_index);
|
||||
for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
|
||||
unsigned num = regs.lighting.light_enable.GetNum(light_index);
|
||||
const auto& light = regs.lighting.light[num];
|
||||
state.lighting.light[light_index].num.Assign(num);
|
||||
state.lighting.light[light_index].directional.Assign(light.config.directional != 0);
|
||||
state.lighting.light[light_index].two_sided_diffuse.Assign(light.config.two_sided_diffuse != 0);
|
||||
state.lighting.light[light_index].geometric_factor_0.Assign(light.config.geometric_factor_0 != 0);
|
||||
state.lighting.light[light_index].geometric_factor_1.Assign(light.config.geometric_factor_1 != 0);
|
||||
state.lighting.light[light_index].dist_atten_enable.Assign(
|
||||
!regs.lighting.IsDistAttenDisabled(num));
|
||||
state.lighting.light[light_index].spot_atten_enable.Assign(
|
||||
!regs.lighting.IsSpotAttenDisabled(num));
|
||||
state.lighting.light[light_index].shadow_enable.Assign(!regs.lighting.IsShadowDisabled(num));
|
||||
state.lighting.light[light_index].num = num;
|
||||
state.lighting.light[light_index].directional = light.config.directional != 0;
|
||||
state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
|
||||
state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
|
||||
state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
|
||||
state.lighting.light[light_index].dist_atten_enable =
|
||||
!regs.lighting.IsDistAttenDisabled(num);
|
||||
state.lighting.light[light_index].spot_atten_enable =
|
||||
!regs.lighting.IsSpotAttenDisabled(num);
|
||||
state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
|
||||
}
|
||||
|
||||
state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0);
|
||||
state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0);
|
||||
state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value());
|
||||
state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
|
||||
state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
|
||||
state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
|
||||
state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
|
||||
|
||||
state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0);
|
||||
state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0);
|
||||
state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value());
|
||||
state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
|
||||
state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
|
||||
state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
|
||||
state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
|
||||
|
||||
// this is a dummy field due to lack of the corresponding register
|
||||
state.lighting.lut_sp.enable.Assign(1);
|
||||
state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0);
|
||||
state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value());
|
||||
state.lighting.lut_sp.enable = true;
|
||||
state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
|
||||
state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
|
||||
state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
|
||||
|
||||
state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0);
|
||||
state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0);
|
||||
state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value());
|
||||
state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
|
||||
state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
|
||||
state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
|
||||
state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
|
||||
|
||||
state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0);
|
||||
state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0);
|
||||
state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value());
|
||||
state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
|
||||
state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
|
||||
state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
|
||||
state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
|
||||
|
||||
state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0);
|
||||
state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0);
|
||||
state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value());
|
||||
state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
|
||||
state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
|
||||
state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
|
||||
state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
|
||||
|
||||
state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0);
|
||||
state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0);
|
||||
state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value());
|
||||
state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
|
||||
state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
|
||||
state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
|
||||
state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
|
||||
|
||||
state.lighting.config.Assign(regs.lighting.config0.config);
|
||||
state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha);
|
||||
state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha);
|
||||
state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode);
|
||||
state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector);
|
||||
state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0);
|
||||
state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0);
|
||||
state.lighting.config = regs.lighting.config0.config;
|
||||
state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
|
||||
state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
|
||||
state.lighting.bump_mode = regs.lighting.config0.bump_mode;
|
||||
state.lighting.bump_selector = regs.lighting.config0.bump_selector;
|
||||
state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
|
||||
state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
|
||||
|
||||
state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0);
|
||||
state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0);
|
||||
state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0);
|
||||
state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0);
|
||||
state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0);
|
||||
state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector);
|
||||
state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
|
||||
state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
|
||||
state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
|
||||
state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
|
||||
state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
|
||||
state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
|
||||
|
||||
state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable);
|
||||
state.proctex.enable = regs.texturing.main_config.texture3_enable;
|
||||
if (state.proctex.enable) {
|
||||
state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates);
|
||||
state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp);
|
||||
state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp);
|
||||
state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner);
|
||||
state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner);
|
||||
state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha);
|
||||
state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable);
|
||||
state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift);
|
||||
state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift);
|
||||
state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
|
||||
state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
|
||||
state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
|
||||
state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
|
||||
state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
|
||||
state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
|
||||
state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
|
||||
state.proctex.u_shift = regs.texturing.proctex.u_shift;
|
||||
state.proctex.v_shift = regs.texturing.proctex.v_shift;
|
||||
state.proctex.lut_width = regs.texturing.proctex_lut.width;
|
||||
state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
|
||||
state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
|
||||
@ -234,16 +228,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
|
||||
state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
|
||||
state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
|
||||
state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter);
|
||||
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
|
||||
}
|
||||
|
||||
state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow);
|
||||
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow;
|
||||
|
||||
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
|
||||
state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
program_hash = setup.GetProgramCodeHash();
|
||||
swizzle_hash = setup.GetSwizzleDataHash();
|
||||
@ -498,33 +493,33 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
|
||||
using Operation = TevStageConfig::Operation;
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
out += "color_results_1";
|
||||
out += fmt::format("{}[0]", variable_name);
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
out += "color_results_1 * color_results_2";
|
||||
out += fmt::format("{0}[0] * {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::Add:
|
||||
out += "color_results_1 + color_results_2";
|
||||
out += fmt::format("{0}[0] + {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
out += "color_results_1 + color_results_2 - vec3(0.5)";
|
||||
out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
out += "color_results_1 * color_results_3 + color_results_2 * (vec3(1.0) - color_results_3)";
|
||||
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
out += "color_results_1 - color_results_2";
|
||||
out += fmt::format("{0}[0] - {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
out += "color_results_1 * color_results_2 + color_results_3";
|
||||
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
out += "min(color_results_1 + color_results_2, vec3(1.0)) * color_results_3";
|
||||
out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name);
|
||||
break;
|
||||
case Operation::Dot3_RGB:
|
||||
case Operation::Dot3_RGBA:
|
||||
out +=
|
||||
"vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)";
|
||||
fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name);
|
||||
break;
|
||||
default:
|
||||
out += "vec3(0.0)";
|
||||
@ -541,28 +536,28 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
|
||||
using Operation = TevStageConfig::Operation;
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
out += "alpha_results_1";
|
||||
out += fmt::format("{}[0]", variable_name);
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
out += "alpha_results_1 * alpha_results_2";
|
||||
out += fmt::format("{0}[0] * {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::Add:
|
||||
out += "alpha_results_1 + alpha_results_2";
|
||||
out += fmt::format("{0}[0] + {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
out += "alpha_results_1 + alpha_results_2 - 0.5";
|
||||
out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
out += "alpha_results_1 * alpha_results_3 + alpha_results_2 * (1.0 - alpha_results_3)";
|
||||
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
out += "alpha_results_1 - alpha_results_2";
|
||||
out += fmt::format("{0}[0] - {0}[1]", variable_name);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
out += "alpha_results_1 * alpha_results_2 + alpha_results_3";
|
||||
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
out += "min(alpha_results_1 + alpha_results_2, 1.0) * alpha_results_3";
|
||||
out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name);
|
||||
break;
|
||||
default:
|
||||
out += "0.0";
|
||||
@ -608,34 +603,38 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
const std::string index_name = std::to_string(index);
|
||||
|
||||
out += fmt::format("color_results_1 = ", index_name);
|
||||
out += fmt::format("vec3 color_results_{}_1 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
|
||||
out += fmt::format(";\ncolor_results_2 = ", index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
|
||||
out += fmt::format(";\ncolor_results_3 = ", index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
|
||||
AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
|
||||
out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
|
||||
"color_results_{}_2, color_results_{}_3);\n",
|
||||
index_name, index_name, index_name, index_name);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
out += fmt::format(";\nvec3 color_output_{} = byteround(", index_name);
|
||||
AppendColorCombiner(out, stage.color_op, "color_results");
|
||||
out += fmt::format("vec3 color_output_{} = byteround(", index_name);
|
||||
AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
|
||||
out += ");\n";
|
||||
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name);
|
||||
} else {
|
||||
out += fmt::format("alpha_results_1 = ", index_name);
|
||||
out += fmt::format("float alpha_results_{}[3] = float[3](", index_name);
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
|
||||
index_name);
|
||||
out += fmt::format(";\nalpha_results_2 = ", index_name);
|
||||
out += ", ";
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
|
||||
index_name);
|
||||
out += fmt::format(";\nalpha_results_3 = ", index_name);
|
||||
out += ", ";
|
||||
AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
|
||||
index_name);
|
||||
out += ");\n";
|
||||
|
||||
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index_name);
|
||||
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results");
|
||||
out += fmt::format("float alpha_output_{} = byteround(", index_name);
|
||||
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
|
||||
out += ");\n";
|
||||
}
|
||||
|
||||
@ -1471,14 +1470,6 @@ vec4 secondary_fragment_color = vec4(0.0);
|
||||
"vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"
|
||||
"vec4 last_tex_env_out = vec4(0.0);\n";
|
||||
|
||||
out += "vec3 color_results_1 = vec3(0.0);\n"
|
||||
"vec3 color_results_2 = vec3(0.0);\n"
|
||||
"vec3 color_results_3 = vec3(0.0);\n";
|
||||
|
||||
out += "float alpha_results_1 = 0.0;\n"
|
||||
"float alpha_results_2 = 0.0;\n"
|
||||
"float alpha_results_3 = 0.0;\n";
|
||||
|
||||
for (std::size_t index = 0; index < state.tev_stages.size(); ++index) {
|
||||
WriteTevStage(out, config, static_cast<u32>(index));
|
||||
}
|
||||
@ -1547,30 +1538,6 @@ do {
|
||||
out += "color = byteround(last_tex_env_out);\n";
|
||||
}
|
||||
|
||||
if (state.emulate_logic_op) {
|
||||
switch (state.logic_op) {
|
||||
case FramebufferRegs::LogicOp::Clear:
|
||||
out += "color = vec4(0);\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::Set:
|
||||
out += "color = vec4(1);\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::Copy:
|
||||
// Take the color output as-is
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::CopyInverted:
|
||||
out += "color = ~color;\n";
|
||||
break;
|
||||
case FramebufferRegs::LogicOp::NoOp:
|
||||
// We need to discard the color, but not necessarily the depth. This is not possible
|
||||
// with fragment shader alone, so we emulate this behavior with the color mask.
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<u32>(state.logic_op.Value()));
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
out += '}';
|
||||
return out;
|
||||
}
|
||||
@ -1605,7 +1572,6 @@ void main() {
|
||||
normquat = vert_normquat;
|
||||
view = vert_view;
|
||||
gl_Position = vert_position;
|
||||
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
|
||||
|
||||
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
|
||||
if (enable_clip1) {
|
||||
@ -1661,18 +1627,18 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
if (used_regs[i]) {
|
||||
std::string_view prefix;
|
||||
switch (config.state.attrib_types[i]) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
case AttribType::Float:
|
||||
prefix = "";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
case AttribType::Byte:
|
||||
case AttribType::Short:
|
||||
prefix = "i";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
case AttribType::Ubyte:
|
||||
prefix = "u";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
@ -1680,42 +1646,12 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
|
||||
}
|
||||
}
|
||||
|
||||
// Some 3-component attributes might be emulated by breaking them to vec2 + scalar.
|
||||
// Define them here and combine them below
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (const u32 location = config.state.emulated_attrib_locations[i]; location != 0 && used_regs[i]) {
|
||||
std::string_view type;
|
||||
switch (config.state.attrib_types[i]) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
type = "float";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
type = "int";
|
||||
break;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
type = "uint";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, type, i);
|
||||
}
|
||||
}
|
||||
|
||||
out += '\n';
|
||||
|
||||
// cast input registers to float to avoid computational errors
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (used_regs[i]) {
|
||||
if (config.state.emulated_attrib_locations[i] != 0) {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), float(vs_in_typed_reg{0}_part2), 0.f);\n", i);
|
||||
} else {
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
|
||||
}
|
||||
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
|
||||
}
|
||||
}
|
||||
out += '\n';
|
||||
@ -1775,7 +1711,6 @@ struct Vertex {
|
||||
semantic(VSOutputAttributes::POSITION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_W) + ");\n";
|
||||
out += " gl_Position = vtx_pos;\n";
|
||||
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
|
||||
out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
|
||||
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
|
||||
out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
|
||||
|
@ -8,12 +8,11 @@
|
||||
#include <optional>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/regs_pipeline.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 };
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
@ -44,85 +43,77 @@ struct TevStageConfigRaw {
|
||||
};
|
||||
|
||||
struct PicaFSConfigState {
|
||||
union {
|
||||
BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func;
|
||||
BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode;
|
||||
BitField<5, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type;
|
||||
BitField<8, 1, u32> texture2_use_coord1;
|
||||
BitField<9, 8, u32> combiner_buffer_input;
|
||||
BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
|
||||
BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode;
|
||||
BitField<21, 1, u32> fog_flip;
|
||||
BitField<22, 1, u32> emulate_logic_op;
|
||||
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
BitField<27, 1, u32> shadow_rendering;
|
||||
BitField<28, 1, u32> shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
Pica::FramebufferRegs::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
bool texture2_use_coord1;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 3, u16> num;
|
||||
BitField<3, 1, u16> directional;
|
||||
BitField<4, 1, u16> two_sided_diffuse;
|
||||
BitField<5, 1, u16> dist_atten_enable;
|
||||
BitField<6, 1, u16> spot_atten_enable;
|
||||
BitField<7, 1, u16> geometric_factor_0;
|
||||
BitField<8, 1, u16> geometric_factor_1;
|
||||
BitField<9, 1, u16> shadow_enable;
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
union {
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 4, u32> src_num;
|
||||
BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode;
|
||||
BitField<7, 2, u32> bump_selector;
|
||||
BitField<9, 1, u32> bump_renorm;
|
||||
BitField<10, 1, u32> clamp_highlights;
|
||||
BitField<11, 4, Pica::LightingRegs::LightingConfig> config;
|
||||
BitField<15, 1, u32> enable_primary_alpha;
|
||||
BitField<16, 1, u32> enable_secondary_alpha;
|
||||
BitField<17, 1, u32> enable_shadow;
|
||||
BitField<18, 1, u32> shadow_primary;
|
||||
BitField<19, 1, u32> shadow_secondary;
|
||||
BitField<20, 1, u32> shadow_invert;
|
||||
BitField<21, 1, u32> shadow_alpha;
|
||||
BitField<22, 2, u32> shadow_selector;
|
||||
};
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 1, u32> abs_input;
|
||||
BitField<2, 3, Pica::LightingRegs::LightingLutInput> type;
|
||||
};
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 2, u32> coord;
|
||||
BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp;
|
||||
BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp;
|
||||
BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner;
|
||||
BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner;
|
||||
BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter;
|
||||
BitField<20, 1, u32> separate_alpha;
|
||||
BitField<21, 1, u32> noise_enable;
|
||||
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
|
||||
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
|
||||
};
|
||||
u8 lut_width;
|
||||
u8 lut_offset0;
|
||||
u8 lut_offset1;
|
||||
u8 lut_offset2;
|
||||
u8 lut_offset3;
|
||||
u8 lod_min;
|
||||
u8 lod_max;
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -134,7 +125,9 @@ struct PicaFSConfigState {
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
PicaFSConfig(const Pica::Regs& regs, const Instance& instance);
|
||||
|
||||
/// Construct a PicaFSConfig with the given Pica register configuration.
|
||||
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
@ -156,8 +149,7 @@ struct PicaShaderConfigCommon {
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
std::array<Pica::PipelineRegs::VertexAttributeFormat, 16> attrib_types;
|
||||
std::array<u8, 16> emulated_attrib_locations;
|
||||
std::array<AttribType, 16> attrib_types;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
|
@ -1,958 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
using Pica::FramebufferRegs;
|
||||
using Pica::LightingRegs;
|
||||
using Pica::RasterizerRegs;
|
||||
using Pica::TexturingRegs;
|
||||
using TevStageConfig = TexturingRegs::TevStageConfig;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
FragmentModule::FragmentModule(const PicaFSConfig& config) : Sirit::Module{0x00010300}, config{config} {
|
||||
DefineArithmeticTypes();
|
||||
DefineUniformStructs();
|
||||
DefineInterface();
|
||||
DefineEntryPoint();
|
||||
}
|
||||
|
||||
FragmentModule::~FragmentModule() = default;
|
||||
|
||||
void FragmentModule::Generate() {
|
||||
AddLabel(OpLabel());
|
||||
|
||||
rounded_primary_color = Byteround(OpLoad(vec_ids.Get(4), primary_color_id), 4);
|
||||
primary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
secondary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
|
||||
if (config.state.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) {
|
||||
OpKill();
|
||||
OpFunctionEnd();
|
||||
return;
|
||||
}
|
||||
|
||||
// Write shader bytecode to emulate all enabled PICA lights
|
||||
if (config.state.lighting.enable) {
|
||||
WriteLighting();
|
||||
}
|
||||
|
||||
combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(27));
|
||||
last_tex_env_out = ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
// Write shader bytecode to emulate PICA TEV stages
|
||||
for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) {
|
||||
WriteTevStage(static_cast<s32>(index));
|
||||
}
|
||||
|
||||
if (WriteAlphaTestCondition(config.state.alpha_test_func)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
|
||||
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
|
||||
// do our own transformation according to PICA specification.
|
||||
WriteDepth();
|
||||
|
||||
// Write output color
|
||||
OpStore(color_id, Byteround(last_tex_env_out, 4));
|
||||
OpReturn();
|
||||
OpFunctionEnd();
|
||||
}
|
||||
|
||||
void FragmentModule::WriteDepth() {
|
||||
const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)};
|
||||
const Id gl_frag_coord_z{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))};
|
||||
const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))};
|
||||
const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
|
||||
const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
|
||||
const Id depth{OpFma(f32_id, z_over_w, depth_scale, depth_offset)};
|
||||
if (config.state.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) {
|
||||
const Id gl_frag_coord_w{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(3u)))};
|
||||
const Id depth_over_w{OpFDiv(f32_id, depth, gl_frag_coord_w)};
|
||||
OpStore(gl_frag_depth_id, depth_over_w);
|
||||
} else {
|
||||
OpStore(gl_frag_depth_id, depth);
|
||||
}
|
||||
}
|
||||
|
||||
void FragmentModule::WriteLighting() {
|
||||
const auto& lighting = config.state.lighting;
|
||||
|
||||
// Define lighting globals
|
||||
Id diffuse_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
|
||||
Id specular_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
|
||||
Id light_vector{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id spot_dir{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id half_vector{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id dot_product{ConstF32(0.f)};
|
||||
Id clamp_highlights{ConstF32(1.f)};
|
||||
Id geo_factor{ConstF32(1.f)};
|
||||
Id surface_normal{};
|
||||
Id surface_tangent{};
|
||||
|
||||
// Compute fragment normals and tangents
|
||||
const auto Perturbation = [&]() -> Id {
|
||||
const Id texel{SampleTexture(lighting.bump_selector)};
|
||||
const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
|
||||
const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
|
||||
return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
|
||||
};
|
||||
|
||||
if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
|
||||
// Bump mapping is enabled using a normal map
|
||||
surface_normal = Perturbation();
|
||||
|
||||
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
|
||||
// precision result
|
||||
if (lighting.bump_renorm) {
|
||||
const Id normal_x{OpCompositeExtract(f32_id, surface_normal, 0)};
|
||||
const Id normal_y{OpCompositeExtract(f32_id, surface_normal, 1)};
|
||||
const Id y_mul_y{OpFMul(f32_id, normal_y, normal_y)};
|
||||
const Id val{OpFSub(f32_id, ConstF32(1.f), OpFma(f32_id, normal_x, normal_x, y_mul_y))};
|
||||
const Id normal_z{OpSqrt(f32_id, OpFMax(f32_id, val, ConstF32(0.f)))};
|
||||
surface_normal = OpCompositeConstruct(vec_ids.Get(3), normal_x, normal_y, normal_z);
|
||||
}
|
||||
|
||||
// The tangent vector is not perturbed by the normal map and is just a unit vector.
|
||||
surface_tangent = ConstF32(1.f, 0.f, 0.f);
|
||||
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
|
||||
// Bump mapping is enabled using a tangent map
|
||||
surface_tangent = Perturbation();
|
||||
|
||||
// Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
|
||||
// computation below, which is also confirmed on 3DS. So we don't bother recomputing here
|
||||
// even if 'renorm' is enabled.
|
||||
|
||||
// The normal vector is not perturbed by the tangent map and is just a unit vector.
|
||||
surface_normal = ConstF32(0.f, 0.f, 1.f);
|
||||
} else {
|
||||
// No bump mapping - surface local normal and tangent are just unit vectors
|
||||
surface_normal = ConstF32(0.f, 0.f, 1.f);
|
||||
surface_tangent = ConstF32(1.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
// Rotate the vector v by the quaternion q
|
||||
const auto QuaternionRotate = [this](Id q, Id v) -> Id {
|
||||
const Id q_xyz{OpVectorShuffle(vec_ids.Get(3), q, q, 0, 1, 2)};
|
||||
const Id q_xyz_cross_v{OpCross(vec_ids.Get(3), q_xyz, v)};
|
||||
const Id q_w{OpCompositeExtract(f32_id, q, 3)};
|
||||
const Id val1{OpFAdd(vec_ids.Get(3), q_xyz_cross_v, OpVectorTimesScalar(vec_ids.Get(3), v, q_w))};
|
||||
const Id val2{OpVectorTimesScalar(vec_ids.Get(3), OpCross(vec_ids.Get(3), q_xyz, val1), ConstF32(2.f))};
|
||||
return OpFAdd(vec_ids.Get(3), v, val2);
|
||||
};
|
||||
|
||||
// Rotate the surface-local normal by the interpolated normal quaternion to convert it to
|
||||
// eyespace.
|
||||
const Id normalized_normquat{OpNormalize(vec_ids.Get(4), OpLoad(vec_ids.Get(4), normquat_id))};
|
||||
const Id normal{QuaternionRotate(normalized_normquat, surface_normal)};
|
||||
const Id tangent{QuaternionRotate(normalized_normquat, surface_tangent)};
|
||||
|
||||
Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
|
||||
if (lighting.enable_shadow) {
|
||||
shadow = SampleTexture(lighting.shadow_selector);
|
||||
if (lighting.shadow_invert) {
|
||||
shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
|
||||
}
|
||||
}
|
||||
|
||||
const auto LookupLightingLUTUnsigned = [this](Id lut_index, Id pos) -> Id {
|
||||
const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(256.f)))};
|
||||
const Id index{OpSClamp(i32_id, pos_int, ConstS32(0), ConstS32(255))};
|
||||
const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
|
||||
const Id delta{OpFma(f32_id, pos, ConstF32(256.f), neg_index)};
|
||||
return LookupLightingLUT(lut_index, index, delta);
|
||||
};
|
||||
|
||||
const auto LookupLightingLUTSigned = [this](Id lut_index, Id pos) -> Id {
|
||||
const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(128.f)))};
|
||||
const Id index{OpSClamp(i32_id, pos_int, ConstS32(-128), ConstS32(127))};
|
||||
const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
|
||||
const Id delta{OpFma(f32_id, pos, ConstF32(128.f), neg_index)};
|
||||
const Id increment{OpSelect(i32_id, OpSLessThan(bool_id, index, ConstS32(0)), ConstS32(256), ConstS32(0))};
|
||||
return LookupLightingLUT(lut_index, OpIAdd(i32_id, index, increment), delta);
|
||||
};
|
||||
|
||||
// Samples the specified lookup table for specular lighting
|
||||
const Id view{OpLoad(vec_ids.Get(3), view_id)};
|
||||
const auto GetLutValue = [&](LightingRegs::LightingSampler sampler, u32 light_num,
|
||||
LightingRegs::LightingLutInput input, bool abs) -> Id {
|
||||
Id index{};
|
||||
switch (input) {
|
||||
case LightingRegs::LightingLutInput::NH:
|
||||
index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), half_vector));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::VH:
|
||||
index = OpDot(f32_id, OpNormalize(vec_ids.Get(3), view), OpNormalize(vec_ids.Get(3), half_vector));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::NV:
|
||||
index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), view));
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::LN:
|
||||
index = OpDot(f32_id, light_vector, normal);
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::SP:
|
||||
index = OpDot(f32_id, light_vector, spot_dir);
|
||||
break;
|
||||
case LightingRegs::LightingLutInput::CP:
|
||||
// CP input is only available with configuration 7
|
||||
if (lighting.config == LightingRegs::LightingConfig::Config7) {
|
||||
// Note: even if the normal vector is modified by normal map, which is not the
|
||||
// normal of the tangent plane anymore, the half angle vector is still projected
|
||||
// using the modified normal vector.
|
||||
const Id normalized_half_vector{OpNormalize(vec_ids.Get(3), half_vector)};
|
||||
const Id normal_dot_half_vector{OpDot(f32_id, normal, normalized_half_vector)};
|
||||
const Id normal_mul_dot{OpVectorTimesScalar(vec_ids.Get(3), normal, normal_dot_half_vector)};
|
||||
const Id half_angle_proj{OpFSub(vec_ids.Get(3), normalized_half_vector, normal_mul_dot)};
|
||||
|
||||
// Note: the half angle vector projection is confirmed not normalized before the dot
|
||||
// product. The result is in fact not cos(phi) as the name suggested.
|
||||
index = OpDot(f32_id, half_angle_proj, tangent);
|
||||
} else {
|
||||
index = ConstF32(0.f);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input);
|
||||
UNIMPLEMENTED();
|
||||
index = ConstF32(0.f);
|
||||
break;
|
||||
}
|
||||
|
||||
const Id sampler_index{ConstU32(static_cast<u32>(sampler))};
|
||||
if (abs) {
|
||||
// LUT index is in the range of (0.0, 1.0)
|
||||
index = lighting.light[light_num].two_sided_diffuse
|
||||
? OpFAbs(f32_id, index)
|
||||
: OpFMax(f32_id, index, ConstF32(0.f));
|
||||
return LookupLightingLUTUnsigned(sampler_index, index);
|
||||
} else {
|
||||
// LUT index is in the range of (-1.0, 1.0)
|
||||
return LookupLightingLUTSigned(sampler_index, index);
|
||||
}
|
||||
};
|
||||
|
||||
// Write the code to emulate each enabled light
|
||||
for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
|
||||
const auto& light_config = lighting.light[light_index];
|
||||
|
||||
const auto GetLightMember = [&](s32 member) -> Id {
|
||||
const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id;
|
||||
const Id light_num{ConstS32(static_cast<s32>(lighting.light[light_index].num.Value()))};
|
||||
return GetShaderDataMember(member_type, ConstS32(25), light_num, ConstS32(member));
|
||||
};
|
||||
|
||||
// Compute light vector (directional or positional)
|
||||
const Id light_position{GetLightMember(4)};
|
||||
if (light_config.directional) {
|
||||
light_vector = OpNormalize(vec_ids.Get(3), light_position);
|
||||
} else {
|
||||
light_vector = OpNormalize(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), light_position, view));
|
||||
}
|
||||
|
||||
spot_dir = GetLightMember(5);
|
||||
half_vector = OpFAdd(vec_ids.Get(3), OpNormalize(vec_ids.Get(3), view), light_vector);
|
||||
|
||||
// Compute dot product of light_vector and normal, adjust if lighting is one-sided or
|
||||
// two-sided
|
||||
if (light_config.two_sided_diffuse) {
|
||||
dot_product = OpFAbs(f32_id, OpDot(f32_id, light_vector, normal));
|
||||
} else {
|
||||
dot_product = OpFMax(f32_id, OpDot(f32_id, light_vector, normal), ConstF32(0.f));
|
||||
}
|
||||
|
||||
// If enabled, clamp specular component if lighting result is zero
|
||||
if (lighting.clamp_highlights) {
|
||||
clamp_highlights = OpFSign(f32_id, dot_product);
|
||||
}
|
||||
|
||||
// If enabled, compute spot light attenuation value
|
||||
Id spot_atten{ConstF32(1.f)};
|
||||
if (light_config.spot_atten_enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
|
||||
const Id value{GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
|
||||
light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input)};
|
||||
spot_atten = OpFMul(f32_id, ConstF32(lighting.lut_sp.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, compute distance attenuation value
|
||||
Id dist_atten{ConstF32(1.f)};
|
||||
if (light_config.dist_atten_enable) {
|
||||
const Id dist_atten_scale{GetLightMember(7)};
|
||||
const Id dist_atten_bias{GetLightMember(6)};
|
||||
const Id min_view_min_pos{OpFSub(vec_ids.Get(3), OpFNegate(vec_ids.Get(3), view), light_position)};
|
||||
const Id index{OpFma(f32_id, dist_atten_scale, OpLength(f32_id, min_view_min_pos), dist_atten_bias)};
|
||||
const Id clamped_index{OpFClamp(f32_id, index, ConstF32(0.f), ConstF32(1.f))};
|
||||
const Id sampler{ConstS32(static_cast<s32>(LightingRegs::DistanceAttenuationSampler(light_config.num)))};
|
||||
dist_atten = LookupLightingLUTUnsigned(sampler, clamped_index);
|
||||
}
|
||||
|
||||
if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
|
||||
geo_factor = OpDot(f32_id, half_vector, half_vector);
|
||||
const Id dot_div_geo{OpFMin(f32_id, OpFDiv(f32_id, dot_product, geo_factor), ConstF32(1.f))};
|
||||
const Id is_geo_factor_zero{OpFOrdEqual(bool_id, geo_factor, ConstF32(0.f))};
|
||||
geo_factor = OpSelect(f32_id, is_geo_factor_zero, ConstF32(0.f), dot_div_geo);
|
||||
}
|
||||
|
||||
// Specular 0 component
|
||||
Id d0_lut_value{ConstF32(1.f)};
|
||||
if (lighting.lut_d0.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::Distribution0)) {
|
||||
// Lookup specular "distribution 0" LUT value
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
|
||||
lighting.lut_d0.type, lighting.lut_d0.abs_input)};
|
||||
d0_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d0.scale), value);
|
||||
}
|
||||
|
||||
Id specular_0{OpVectorTimesScalar(vec_ids.Get(3), GetLightMember(0), d0_lut_value)};
|
||||
if (light_config.geometric_factor_0) {
|
||||
specular_0 = OpVectorTimesScalar(vec_ids.Get(3), specular_0, geo_factor);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
|
||||
Id refl_value_r{ConstF32(1.f)};
|
||||
if (lighting.lut_rr.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectRed)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
|
||||
lighting.lut_rr.type, lighting.lut_rr.abs_input)};
|
||||
|
||||
refl_value_r = OpFMul(f32_id, ConstF32(lighting.lut_rr.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
|
||||
Id refl_value_g{refl_value_r};
|
||||
if (lighting.lut_rg.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectGreen)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
|
||||
lighting.lut_rg.type, lighting.lut_rg.abs_input)};
|
||||
|
||||
refl_value_g = OpFMul(f32_id, ConstF32(lighting.lut_rg.scale), value);
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
|
||||
Id refl_value_b{refl_value_r};
|
||||
if (lighting.lut_rb.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::ReflectBlue)) {
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
|
||||
lighting.lut_rb.type, lighting.lut_rb.abs_input)};
|
||||
refl_value_b = OpFMul(f32_id, ConstF32(lighting.lut_rb.scale), value);
|
||||
}
|
||||
|
||||
// Specular 1 component
|
||||
Id d1_lut_value{ConstF32(1.f)};
|
||||
if (lighting.lut_d1.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(
|
||||
lighting.config, LightingRegs::LightingSampler::Distribution1)) {
|
||||
// Lookup specular "distribution 1" LUT value
|
||||
const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
|
||||
lighting.lut_d1.type, lighting.lut_d1.abs_input)};
|
||||
d1_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d1.scale), value);
|
||||
}
|
||||
|
||||
const Id refl_value{OpCompositeConstruct(vec_ids.Get(3), refl_value_r, refl_value_g, refl_value_b)};
|
||||
const Id light_specular_1{GetLightMember(1)};
|
||||
Id specular_1{OpFMul(vec_ids.Get(3), OpVectorTimesScalar(vec_ids.Get(3), refl_value, d1_lut_value), light_specular_1)};
|
||||
if (light_config.geometric_factor_1) {
|
||||
specular_1 = OpVectorTimesScalar(vec_ids.Get(3), specular_1, geo_factor);
|
||||
}
|
||||
|
||||
// Fresnel
|
||||
// Note: only the last entry in the light slots applies the Fresnel factor
|
||||
if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
|
||||
LightingRegs::IsLightingSamplerSupported(lighting.config,
|
||||
LightingRegs::LightingSampler::Fresnel)) {
|
||||
// Lookup fresnel LUT value
|
||||
Id value{GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
|
||||
lighting.lut_fr.type, lighting.lut_fr.abs_input)};
|
||||
value = OpFMul(f32_id, ConstF32(lighting.lut_fr.scale), value);
|
||||
|
||||
// Enabled for diffuse lighting alpha component
|
||||
if (lighting.enable_primary_alpha) {
|
||||
diffuse_sum = OpCompositeInsert(vec_ids.Get(4), value, diffuse_sum, 3);
|
||||
}
|
||||
|
||||
// Enabled for the specular lighting alpha component
|
||||
if (lighting.enable_secondary_alpha) {
|
||||
specular_sum = OpCompositeInsert(vec_ids.Get(4), value, specular_sum, 3);
|
||||
}
|
||||
}
|
||||
|
||||
const bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable;
|
||||
const bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable;
|
||||
const Id shadow_rgb{OpVectorShuffle(vec_ids.Get(3), shadow, shadow, 0, 1, 2)};
|
||||
|
||||
const Id light_diffuse{GetLightMember(2)};
|
||||
const Id light_ambient{GetLightMember(3)};
|
||||
const Id diffuse_mul_dot{OpVectorTimesScalar(vec_ids.Get(3),light_diffuse, dot_product)};
|
||||
|
||||
// Compute primary fragment color (diffuse lighting) function
|
||||
Id diffuse_sum_rgb{OpFAdd(vec_ids.Get(3), diffuse_mul_dot, light_ambient)};
|
||||
diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, dist_atten);
|
||||
diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, spot_atten);
|
||||
if (shadow_primary_enable) {
|
||||
diffuse_sum_rgb = OpFMul(vec_ids.Get(3), diffuse_sum_rgb, shadow_rgb);
|
||||
}
|
||||
|
||||
// Compute secondary fragment color (specular lighting) function
|
||||
const Id specular_01{OpFAdd(vec_ids.Get(3), specular_0, specular_1)};
|
||||
Id specular_sum_rgb{OpVectorTimesScalar(vec_ids.Get(3), specular_01, clamp_highlights)};
|
||||
specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, dist_atten);
|
||||
specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, spot_atten);
|
||||
if (shadow_secondary_enable) {
|
||||
specular_sum_rgb = OpFMul(vec_ids.Get(3), specular_sum_rgb, shadow_rgb);
|
||||
}
|
||||
|
||||
// Accumulate the fragment colors
|
||||
const Id diffuse_sum_rgba{PadVectorF32(diffuse_sum_rgb, vec_ids.Get(4), 0.f)};
|
||||
const Id specular_sum_rgba{PadVectorF32(specular_sum_rgb, vec_ids.Get(4), 0.f)};
|
||||
diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, diffuse_sum_rgba);
|
||||
specular_sum = OpFAdd(vec_ids.Get(4), specular_sum, specular_sum_rgba);
|
||||
}
|
||||
|
||||
// Apply shadow attenuation to alpha components if enabled
|
||||
if (lighting.shadow_alpha) {
|
||||
const Id shadow_a{OpCompositeExtract(f32_id, shadow, 3)};
|
||||
const Id shadow_a_vec{OpCompositeConstruct(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f), shadow_a)};
|
||||
if (lighting.enable_primary_alpha) {
|
||||
diffuse_sum = OpFMul(vec_ids.Get(4), diffuse_sum, shadow_a_vec);
|
||||
}
|
||||
if (lighting.enable_secondary_alpha) {
|
||||
specular_sum = OpFMul(vec_ids.Get(4), specular_sum, shadow_a_vec);
|
||||
}
|
||||
}
|
||||
|
||||
// Sum final lighting result
|
||||
const Id lighting_global_ambient{GetShaderDataMember(vec_ids.Get(3), ConstS32(24))};
|
||||
const Id lighting_global_ambient_rgba{PadVectorF32(lighting_global_ambient, vec_ids.Get(4), 0.f)};
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f, 1.f)};
|
||||
diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, lighting_global_ambient_rgba);
|
||||
primary_fragment_color = OpFClamp(vec_ids.Get(4), diffuse_sum, zero_vec, one_vec);
|
||||
secondary_fragment_color = OpFClamp(vec_ids.Get(4), specular_sum, zero_vec, one_vec);
|
||||
}
|
||||
|
||||
void FragmentModule::WriteTevStage(s32 index) {
|
||||
const TexturingRegs::TevStageConfig stage =
|
||||
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
|
||||
|
||||
// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
|
||||
const auto IsPassThroughTevStage = [](const TevStageConfig& stage) {
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
};
|
||||
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
|
||||
Id alpha_output{};
|
||||
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
|
||||
} else {
|
||||
alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
|
||||
}
|
||||
|
||||
color_output = OpVectorTimesScalar(vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
|
||||
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), ConstF32(1.f, 1.f, 1.f));
|
||||
alpha_output = OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
|
||||
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
|
||||
last_tex_env_out = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
|
||||
}
|
||||
|
||||
combiner_buffer = next_combiner_buffer;
|
||||
if (config.TevStageUpdatesCombinerBufferColor(index)) {
|
||||
next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), last_tex_env_out, next_combiner_buffer, 0, 1, 2, 7);
|
||||
}
|
||||
|
||||
if (config.TevStageUpdatesCombinerBufferAlpha(index)) {
|
||||
next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), next_combiner_buffer, last_tex_env_out, 0, 1, 2, 7);
|
||||
}
|
||||
}
|
||||
|
||||
bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
|
||||
using CompareFunc = FramebufferRegs::CompareFunc;
|
||||
|
||||
const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
|
||||
switch (func) {
|
||||
case CompareFunc::Equal:
|
||||
return OpINotEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::NotEqual:
|
||||
return OpIEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::LessThan:
|
||||
return OpSGreaterThanEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::LessThanOrEqual:
|
||||
return OpSGreaterThan(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::GreaterThan:
|
||||
return OpSLessThanEqual(bool_id, alpha, alphatest_ref);
|
||||
case CompareFunc::GreaterThanOrEqual:
|
||||
return OpSLessThan(bool_id, alpha, alphatest_ref);
|
||||
default:
|
||||
return Id{};
|
||||
}
|
||||
};
|
||||
|
||||
switch (func) {
|
||||
case CompareFunc::Never: // Kill the fragment
|
||||
OpKill();
|
||||
OpFunctionEnd();
|
||||
return true;
|
||||
case CompareFunc::Always: // Do nothing
|
||||
return false;
|
||||
case CompareFunc::Equal:
|
||||
case CompareFunc::NotEqual:
|
||||
case CompareFunc::LessThan:
|
||||
case CompareFunc::LessThanOrEqual:
|
||||
case CompareFunc::GreaterThan:
|
||||
case CompareFunc::GreaterThanOrEqual: {
|
||||
const Id alpha_scaled{OpFMul(f32_id, OpCompositeExtract(f32_id, last_tex_env_out, 3), ConstF32(255.f))};
|
||||
const Id alpha_int{OpConvertFToS(i32_id, alpha_scaled)};
|
||||
const Id alphatest_ref{GetShaderDataMember(i32_id, ConstS32(1))};
|
||||
const Id alpha_comp_ref{Compare(alpha_int, alphatest_ref)};
|
||||
const Id kill_label{OpLabel()};
|
||||
const Id keep_label{OpLabel()};
|
||||
OpSelectionMerge(keep_label, spv::SelectionControlMask::MaskNone);
|
||||
OpBranchConditional(alpha_comp_ref, kill_label, keep_label);
|
||||
AddLabel(kill_label);
|
||||
OpKill();
|
||||
AddLabel(keep_label);
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha test condition {}", func);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::SampleTexture(u32 texture_unit) {
|
||||
const PicaFSConfigState& state = config.state;
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
|
||||
// PICA's LOD formula for 2D textures.
|
||||
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
|
||||
// f(x, y) >= max{m_u, m_v, m_w}
|
||||
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
|
||||
const auto SampleLod = [this](Id tex_id, Id tex_sampler_id, Id texcoord_id) {
|
||||
const Id tex{OpLoad(image2d_id, tex_id)};
|
||||
const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
|
||||
const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
|
||||
const Id tex_image{OpImage(image2d_id, sampled_image)};
|
||||
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
|
||||
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
|
||||
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
|
||||
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
|
||||
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
|
||||
const Id dx_dy_max{OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
|
||||
const Id lod{OpLog2(f32_id, dx_dy_max)};
|
||||
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, spv::ImageOperandsMask::Lod, lod);
|
||||
};
|
||||
|
||||
const auto Sample = [this](Id tex_id, Id tex_sampler_id, bool projection) {
|
||||
const Id tex{OpLoad(image2d_id, tex_id)};
|
||||
const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
|
||||
const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
|
||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||
const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord0, 0),
|
||||
OpCompositeExtract(f32_id, texcoord0, 1),
|
||||
texcoord0_w)};
|
||||
if (projection) {
|
||||
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
} else {
|
||||
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
}
|
||||
};
|
||||
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case Pica::TexturingRegs::TextureConfig::Texture2D:
|
||||
return SampleLod(tex0_id, tex0_sampler_id, texcoord0_id);
|
||||
case Pica::TexturingRegs::TextureConfig::Projection2D:
|
||||
return Sample(tex0_id, tex0_sampler_id, true);
|
||||
case Pica::TexturingRegs::TextureConfig::TextureCube:
|
||||
return Sample(tex_cube_id, tex_cube_sampler_id, false);
|
||||
//case Pica::TexturingRegs::TextureConfig::Shadow2D:
|
||||
//return "shadowTexture(texcoord0, texcoord0_w)";
|
||||
//case Pica::TexturingRegs::TextureConfig::ShadowCube:
|
||||
//return "shadowTextureCube(texcoord0, texcoord0_w)";
|
||||
case Pica::TexturingRegs::TextureConfig::Disabled:
|
||||
return zero_vec;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
return zero_vec;
|
||||
}
|
||||
case 1:
|
||||
return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
|
||||
case 2:
|
||||
if (state.texture2_use_coord1)
|
||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
|
||||
else
|
||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
|
||||
case 3:
|
||||
if (false && state.proctex.enable) {
|
||||
//return "ProcTex()";
|
||||
} else {
|
||||
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
|
||||
return zero_vec;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return void_id;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
||||
if (size > 1) {
|
||||
const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
|
||||
const Id rounded_id{OpRound(vec_ids.Get(size), scaled_vec_id)};
|
||||
return OpVectorTimesScalar(vec_ids.Get(size), rounded_id, ConstF32(1.f / 255.f));
|
||||
} else {
|
||||
const Id rounded_id{OpRound(f32_id, OpFMul(f32_id, variable_id, ConstF32(255.f)))};
|
||||
return OpFMul(f32_id, rounded_id, ConstF32(1.f / 255.f));
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
|
||||
// Only load the texture buffer lut once
|
||||
if (!Sirit::ValidId(texture_buffer_lut_lf)) {
|
||||
const Id sampled_image{TypeSampledImage(image_buffer_id)};
|
||||
texture_buffer_lut_lf = OpLoad(sampled_image, texture_buffer_lut_lf_id);
|
||||
}
|
||||
|
||||
const Id lut_index_x{OpShiftRightArithmetic(i32_id, lut_index, ConstS32(2))};
|
||||
const Id lut_index_y{OpBitwiseAnd(i32_id, lut_index, ConstS32(3))};
|
||||
const Id lut_offset{GetShaderDataMember(i32_id, ConstS32(19), lut_index_x, lut_index_y)};
|
||||
const Id coord{OpIAdd(i32_id, lut_offset, index)};
|
||||
const Id entry{OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_lf), coord)};
|
||||
const Id entry_r{OpCompositeExtract(f32_id, entry, 0)};
|
||||
const Id entry_g{OpCompositeExtract(f32_id, entry, 1)};
|
||||
return OpFma(f32_id, entry_g, delta, entry_r);
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
|
||||
using Source = TevStageConfig::Source;
|
||||
switch (source) {
|
||||
case Source::PrimaryColor:
|
||||
return rounded_primary_color;
|
||||
case Source::PrimaryFragmentColor:
|
||||
return primary_fragment_color;
|
||||
case Source::SecondaryFragmentColor:
|
||||
return secondary_fragment_color;
|
||||
case Source::Texture0:
|
||||
return SampleTexture(0);
|
||||
case Source::Texture1:
|
||||
return SampleTexture(1);
|
||||
case Source::Texture2:
|
||||
return SampleTexture(2);
|
||||
case Source::Texture3:
|
||||
return SampleTexture(3);
|
||||
case Source::PreviousBuffer:
|
||||
return combiner_buffer;
|
||||
case Source::Constant:
|
||||
return GetShaderDataMember(vec_ids.Get(4), ConstS32(26), ConstS32(index));
|
||||
case Source::Previous:
|
||||
return last_tex_env_out;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown source op {}", source);
|
||||
return ConstF32(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendColorModifier(TevStageConfig::ColorModifier modifier,
|
||||
TevStageConfig::Source source, s32 index) {
|
||||
using ColorModifier = TevStageConfig::ColorModifier;
|
||||
const Id source_color{AppendSource(source, index)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
|
||||
|
||||
const auto Shuffle = [&](s32 r, s32 g, s32 b) -> Id {
|
||||
return OpVectorShuffle(vec_ids.Get(3), source_color, source_color, r, g, b);
|
||||
};
|
||||
|
||||
switch (modifier) {
|
||||
case ColorModifier::SourceColor:
|
||||
return Shuffle(0, 1, 2);
|
||||
case ColorModifier::OneMinusSourceColor:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 1, 2));
|
||||
case ColorModifier::SourceRed:
|
||||
return Shuffle(0, 0, 0);
|
||||
case ColorModifier::OneMinusSourceRed:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 0, 0));
|
||||
case ColorModifier::SourceGreen:
|
||||
return Shuffle(1, 1, 1);
|
||||
case ColorModifier::OneMinusSourceGreen:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(1, 1, 1));
|
||||
case ColorModifier::SourceBlue:
|
||||
return Shuffle(2, 2, 2);
|
||||
case ColorModifier::OneMinusSourceBlue:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(2, 2, 2));
|
||||
case ColorModifier::SourceAlpha:
|
||||
return Shuffle(3, 3, 3);
|
||||
case ColorModifier::OneMinusSourceAlpha:
|
||||
return OpFSub(vec_ids.Get(3), one_vec, Shuffle(3, 3, 3));
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown color modifier op {}", modifier);
|
||||
return one_vec;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendAlphaModifier(TevStageConfig::AlphaModifier modifier,
|
||||
TevStageConfig::Source source, s32 index) {
|
||||
using AlphaModifier = TevStageConfig::AlphaModifier;
|
||||
const Id source_color{AppendSource(source, index)};
|
||||
const Id one_f32{ConstF32(1.f)};
|
||||
|
||||
const auto Component = [&](s32 c) -> Id {
|
||||
return OpCompositeExtract(f32_id, source_color, c);
|
||||
};
|
||||
|
||||
switch (modifier) {
|
||||
case AlphaModifier::SourceAlpha:
|
||||
return Component(3);
|
||||
case AlphaModifier::OneMinusSourceAlpha:
|
||||
return OpFSub(f32_id, one_f32, Component(3));
|
||||
case AlphaModifier::SourceRed:
|
||||
return Component(0);
|
||||
case AlphaModifier::OneMinusSourceRed:
|
||||
return OpFSub(f32_id, one_f32, Component(0));
|
||||
case AlphaModifier::SourceGreen:
|
||||
return Component(1);
|
||||
case AlphaModifier::OneMinusSourceGreen:
|
||||
return OpFSub(f32_id, one_f32, Component(1));
|
||||
case AlphaModifier::SourceBlue:
|
||||
return Component(2);
|
||||
case AlphaModifier::OneMinusSourceBlue:
|
||||
return OpFSub(f32_id, one_f32, Component(2));
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha modifier op {}", modifier);
|
||||
return one_f32;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
const Id half_vec{ConstF32(0.5f, 0.5f, 0.5f)};
|
||||
const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f)};
|
||||
Id color{};
|
||||
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
color = color_results_1;
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
color = OpFMul(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::Add:
|
||||
color = OpFAdd(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
color = OpFSub(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), half_vec);
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
color = OpFMix(vec_ids.Get(3), color_results_2, color_results_1, color_results_3);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
color = OpFSub(vec_ids.Get(3), color_results_1, color_results_2);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
color = OpFma(vec_ids.Get(3), color_results_1, color_results_2, color_results_3);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
color = OpFMin(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), one_vec);
|
||||
color = OpFMul(vec_ids.Get(3), color, color_results_3);
|
||||
break;
|
||||
case Operation::Dot3_RGB:
|
||||
case Operation::Dot3_RGBA:
|
||||
color = OpDot(f32_id, OpFSub(vec_ids.Get(3), color_results_1, half_vec),
|
||||
OpFSub(vec_ids.Get(3), color_results_2, half_vec));
|
||||
color = OpFMul(f32_id, color, ConstF32(4.f));
|
||||
color = OpCompositeConstruct(vec_ids.Get(3), color, color, color);
|
||||
break;
|
||||
default:
|
||||
color = zero_vec;
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown color combiner operation: {}", operation);
|
||||
break;
|
||||
}
|
||||
|
||||
// Clamp result to 0.0, 1.0
|
||||
return OpFClamp(vec_ids.Get(3), color, zero_vec, one_vec);
|
||||
}
|
||||
|
||||
Id FragmentModule::AppendAlphaCombiner(TevStageConfig::Operation operation) {
|
||||
using Operation = TevStageConfig::Operation;
|
||||
Id color{};
|
||||
|
||||
switch (operation) {
|
||||
case Operation::Replace:
|
||||
color = alpha_results_1;
|
||||
break;
|
||||
case Operation::Modulate:
|
||||
color = OpFMul(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::Add:
|
||||
color = OpFAdd(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::AddSigned:
|
||||
color = OpFSub(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(0.5f));
|
||||
break;
|
||||
case Operation::Lerp:
|
||||
color = OpFMix(f32_id, alpha_results_2, alpha_results_1, alpha_results_3);
|
||||
break;
|
||||
case Operation::Subtract:
|
||||
color = OpFSub(f32_id, alpha_results_1, alpha_results_2);
|
||||
break;
|
||||
case Operation::MultiplyThenAdd:
|
||||
color = OpFma(f32_id, alpha_results_1, alpha_results_2, alpha_results_3);
|
||||
break;
|
||||
case Operation::AddThenMultiply:
|
||||
color = OpFMin(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(1.f));
|
||||
color = OpFMul(f32_id, color, alpha_results_3);
|
||||
break;
|
||||
default:
|
||||
color = ConstF32(0.f);
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown alpha combiner operation: {}", operation);
|
||||
break;
|
||||
}
|
||||
|
||||
return OpFClamp(f32_id, color, ConstF32(0.f), ConstF32(1.f));
|
||||
}
|
||||
|
||||
void FragmentModule::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
bool_id = Name(TypeBool(), "bool_id");
|
||||
f32_id = Name(TypeFloat(32), "f32_id");
|
||||
i32_id = Name(TypeSInt(32), "i32_id");
|
||||
u32_id = Name(TypeUInt(32), "u32_id");
|
||||
|
||||
for (u32 size = 2; size <= 4; size++) {
|
||||
const u32 i = size - 2;
|
||||
vec_ids.ids[i] = Name(TypeVector(f32_id, size), fmt::format("vec{}_id", size));
|
||||
ivec_ids.ids[i] = Name(TypeVector(i32_id, size), fmt::format("ivec{}_id", size));
|
||||
uvec_ids.ids[i] = Name(TypeVector(u32_id, size), fmt::format("uvec{}_id", size));
|
||||
}
|
||||
}
|
||||
|
||||
void FragmentModule::DefineEntryPoint() {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
AddCapability(spv::Capability::SampledBuffer);
|
||||
AddCapability(spv::Capability::ImageQuery);
|
||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||
|
||||
const Id main_type{TypeFunction(TypeVoid())};
|
||||
const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
|
||||
AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id,
|
||||
texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id,
|
||||
gl_frag_coord_id, gl_frag_depth_id);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
|
||||
void FragmentModule::DefineUniformStructs() {
|
||||
const Id light_src_struct_id{TypeStruct(vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3),
|
||||
vec_ids.Get(3), vec_ids.Get(3), f32_id, f32_id)};
|
||||
|
||||
const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
|
||||
const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
|
||||
const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
|
||||
|
||||
const Id shader_data_struct_id{TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id,
|
||||
i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id,
|
||||
i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, vec_ids.Get(3),
|
||||
vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3),
|
||||
light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(4))};
|
||||
|
||||
constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
|
||||
constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u,
|
||||
52u, 56u, 60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u,
|
||||
224u, 240u, 1136u, 1232u, 1248u};
|
||||
|
||||
Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
|
||||
Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
|
||||
MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
|
||||
}
|
||||
for (u32 i = 0; i < static_cast<u32>(shader_data_offsets.size()); i++) {
|
||||
MemberDecorate(shader_data_struct_id, i, spv::Decoration::Offset, shader_data_offsets[i]);
|
||||
}
|
||||
Decorate(shader_data_struct_id, spv::Decoration::Block);
|
||||
|
||||
shader_data_id = AddGlobalVariable(TypePointer(spv::StorageClass::Uniform, shader_data_struct_id),
|
||||
spv::StorageClass::Uniform);
|
||||
Decorate(shader_data_id, spv::Decoration::DescriptorSet, 0);
|
||||
Decorate(shader_data_id, spv::Decoration::Binding, 1);
|
||||
}
|
||||
|
||||
void FragmentModule::DefineInterface() {
|
||||
// Define interface block
|
||||
primary_color_id = DefineInput(vec_ids.Get(4), 1);
|
||||
texcoord0_id = DefineInput(vec_ids.Get(2), 2);
|
||||
texcoord1_id = DefineInput(vec_ids.Get(2), 3);
|
||||
texcoord2_id = DefineInput(vec_ids.Get(2), 4);
|
||||
texcoord0_w_id = DefineInput(f32_id, 5);
|
||||
normquat_id = DefineInput(vec_ids.Get(4), 6);
|
||||
view_id = DefineInput(vec_ids.Get(3), 7);
|
||||
color_id = DefineOutput(vec_ids.Get(4), 0);
|
||||
|
||||
// Define the texture unit samplers/uniforms
|
||||
image_buffer_id = TypeImage(f32_id, spv::Dim::Buffer, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
image2d_id = TypeImage(f32_id, spv::Dim::Dim2D, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
image_cube_id = TypeImage(f32_id, spv::Dim::Cube, 0, 0, 0, 1, spv::ImageFormat::Unknown);
|
||||
sampler_id = TypeSampler();
|
||||
|
||||
texture_buffer_lut_lf_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 2);
|
||||
texture_buffer_lut_rg_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 3);
|
||||
texture_buffer_lut_rgba_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 4);
|
||||
tex0_id = DefineUniformConst(image2d_id, 1, 0);
|
||||
tex1_id = DefineUniformConst(image2d_id, 1, 1);
|
||||
tex2_id = DefineUniformConst(image2d_id, 1, 2);
|
||||
tex_cube_id = DefineUniformConst(image_cube_id, 1, 3);
|
||||
tex0_sampler_id = DefineUniformConst(sampler_id, 2, 0);
|
||||
tex1_sampler_id = DefineUniformConst(sampler_id, 2, 1);
|
||||
tex2_sampler_id = DefineUniformConst(sampler_id, 2, 2);
|
||||
tex_cube_sampler_id = DefineUniformConst(sampler_id, 2, 3);
|
||||
|
||||
// Define built-ins
|
||||
gl_frag_coord_id = DefineVar(vec_ids.Get(4), spv::StorageClass::Input);
|
||||
gl_frag_depth_id = DefineVar(f32_id, spv::StorageClass::Output);
|
||||
Decorate(gl_frag_coord_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragCoord);
|
||||
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
|
||||
}
|
||||
|
||||
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
|
||||
FragmentModule module{config};
|
||||
module.Generate();
|
||||
return module.Assemble();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
@ -1,227 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <sirit/sirit.h>
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Sirit::Id;
|
||||
|
||||
struct VectorIds {
|
||||
/// Returns the type id of the vector with the provided size
|
||||
[[nodiscard]] constexpr Id Get(u32 size) const {
|
||||
return ids[size - 2];
|
||||
}
|
||||
|
||||
std::array<Id, 3> ids;
|
||||
};
|
||||
|
||||
class FragmentModule : public Sirit::Module {
|
||||
static constexpr u32 NUM_TEV_STAGES = 6;
|
||||
static constexpr u32 NUM_LIGHTS = 8;
|
||||
static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
|
||||
public:
|
||||
FragmentModule(const PicaFSConfig& config);
|
||||
~FragmentModule();
|
||||
|
||||
/// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration
|
||||
void Generate();
|
||||
|
||||
/// Undos the vulkan perspective transformation and applies the pica one
|
||||
void WriteDepth();
|
||||
|
||||
/// Writes the code to emulate fragment lighting
|
||||
void WriteLighting();
|
||||
|
||||
/// Writes the code to emulate the specified TEV stage
|
||||
void WriteTevStage(s32 index);
|
||||
|
||||
/// Writes the if-statement condition used to evaluate alpha testing.
|
||||
/// Returns true if the fragment was discarded
|
||||
[[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
|
||||
|
||||
/// Samples the current fragment texel from the provided texture unit
|
||||
[[nodiscard]] Id SampleTexture(u32 texture_unit);
|
||||
|
||||
/// Rounds the provided variable to the nearest 1/255th
|
||||
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
|
||||
|
||||
/// Lookups the lighting LUT at the provided lut_index
|
||||
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
|
||||
|
||||
/// Writes the specified TEV stage source component(s)
|
||||
[[nodiscard]] Id AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the color components to use for the specified TEV stage color modifier
|
||||
[[nodiscard]] Id AppendColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier modifier,
|
||||
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the alpha component to use for the specified TEV stage alpha modifier
|
||||
[[nodiscard]] Id AppendAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier,
|
||||
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
|
||||
|
||||
/// Writes the combiner function for the color components for the specified TEV stage operation
|
||||
[[nodiscard]] Id AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
||||
|
||||
/// Writes the combiner function for the alpha component for the specified TEV stage operation
|
||||
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
||||
|
||||
/// Loads the member specified from the shader_data uniform struct
|
||||
template <typename... Ids>
|
||||
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
|
||||
const Id uniform_ptr{TypePointer(spv::StorageClass::Uniform, type)};
|
||||
return OpLoad(type, OpAccessChain(uniform_ptr, shader_data_id, ids...));
|
||||
}
|
||||
|
||||
/// Pads the provided vector by inserting args at the end
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id PadVectorF32(Id vector, Id pad_type_id, Args&&... args) {
|
||||
return OpCompositeConstruct(pad_type_id, vector, ConstF32(args...));
|
||||
}
|
||||
|
||||
/// Defines a input variable
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
Decorate(input_id, spv::Decoration::Location, location);
|
||||
return input_id;
|
||||
}
|
||||
|
||||
/// Defines a input variable
|
||||
[[nodiscard]] Id DefineOutput(Id type, u32 location) {
|
||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
||||
Decorate(output_id, spv::Decoration::Location, location);
|
||||
return output_id;
|
||||
}
|
||||
|
||||
/// Defines a uniform constant variable
|
||||
[[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding) {
|
||||
const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(uniform_id, spv::Decoration::DescriptorSet, set);
|
||||
Decorate(uniform_id, spv::Decoration::Binding, binding);
|
||||
return uniform_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
||||
const Id pointer_type_id{TypePointer(storage_class, type)};
|
||||
return AddGlobalVariable(pointer_type_id, storage_class);
|
||||
}
|
||||
|
||||
/// Returns the id of a signed integer constant of value
|
||||
[[nodiscard]] Id ConstU32(u32 value) {
|
||||
return Constant(u32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstU32(Args&&... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(u32_id, values)...};
|
||||
return ConstantComposite(uvec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
/// Returns the id of a signed integer constant of value
|
||||
[[nodiscard]] Id ConstS32(s32 value) {
|
||||
return Constant(i32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstS32(Args&&... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(i32_id, values)...};
|
||||
return ConstantComposite(ivec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
/// Returns the id of a float constant of value
|
||||
[[nodiscard]] Id ConstF32(float value) {
|
||||
return Constant(f32_id, value);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] Id ConstF32(Args... values) {
|
||||
constexpr auto size = sizeof...(values);
|
||||
static_assert(size >= 2 && size <= 4);
|
||||
const std::array constituents{Constant(f32_id, values)...};
|
||||
return ConstantComposite(vec_ids.Get(size), constituents);
|
||||
}
|
||||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineEntryPoint();
|
||||
void DefineUniformStructs();
|
||||
void DefineInterface();
|
||||
|
||||
private:
|
||||
PicaFSConfig config;
|
||||
Id void_id{};
|
||||
Id bool_id{};
|
||||
Id f32_id{};
|
||||
Id i32_id{};
|
||||
Id u32_id{};
|
||||
|
||||
VectorIds vec_ids{};
|
||||
VectorIds ivec_ids{};
|
||||
VectorIds uvec_ids{};
|
||||
|
||||
Id image2d_id{};
|
||||
Id image_cube_id{};
|
||||
Id image_buffer_id{};
|
||||
Id sampler_id{};
|
||||
Id shader_data_id{};
|
||||
|
||||
Id primary_color_id{};
|
||||
Id texcoord0_id{};
|
||||
Id texcoord1_id{};
|
||||
Id texcoord2_id{};
|
||||
Id texcoord0_w_id{};
|
||||
Id normquat_id{};
|
||||
Id view_id{};
|
||||
Id color_id{};
|
||||
|
||||
Id gl_frag_coord_id{};
|
||||
Id gl_frag_depth_id{};
|
||||
|
||||
Id tex0_id{};
|
||||
Id tex1_id{};
|
||||
Id tex2_id{};
|
||||
Id tex_cube_id{};
|
||||
Id tex0_sampler_id{};
|
||||
Id tex1_sampler_id{};
|
||||
Id tex2_sampler_id{};
|
||||
Id tex_cube_sampler_id{};
|
||||
Id texture_buffer_lut_lf_id{};
|
||||
Id texture_buffer_lut_rg_id{};
|
||||
Id texture_buffer_lut_rgba_id{};
|
||||
|
||||
Id texture_buffer_lut_lf{};
|
||||
|
||||
Id rounded_primary_color{};
|
||||
Id primary_fragment_color{};
|
||||
Id secondary_fragment_color{};
|
||||
Id combiner_buffer{};
|
||||
Id next_combiner_buffer{};
|
||||
Id last_tex_env_out{};
|
||||
|
||||
Id color_results_1{};
|
||||
Id color_results_2{};
|
||||
Id color_results_3{};
|
||||
Id alpha_results_1{};
|
||||
Id alpha_results_2{};
|
||||
Id alpha_results_3{};
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates the SPIR-V fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config);
|
||||
|
||||
} // namespace Vulkan
|
@ -6,7 +6,6 @@
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
@ -179,8 +178,6 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
||||
includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
|
||||
shader->getInfoDebugLog());
|
||||
LOG_CRITICAL(Render_Vulkan, "{}", code);
|
||||
ASSERT(false);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
@ -218,22 +215,10 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
return CompileSPV(out_code, device);
|
||||
}
|
||||
const vk::ShaderModuleCreateInfo shader_info = {.codeSize = out_code.size() * sizeof(u32),
|
||||
.pCode = out_code.data()};
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_SPVCompilation, "Vulkan", "SPIR-V Shader Compilation", MP_RGB(100, 255, 52));
|
||||
vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device) {
|
||||
MICROPROFILE_SCOPE(Vulkan_SPVCompilation);
|
||||
const vk::ShaderModuleCreateInfo shader_info = {.codeSize = code.size() * sizeof(u32),
|
||||
.pCode = code.data()};
|
||||
try {
|
||||
return device.createShaderModule(shader_info);
|
||||
} catch (vk::SystemError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "{}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return VK_NULL_HANDLE;
|
||||
return device.createShaderModule(shader_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -13,6 +13,4 @@ enum class ShaderOptimization { High = 0, Debug = 1 };
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
|
||||
ShaderOptimization level);
|
||||
|
||||
vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
@ -15,40 +14,30 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) {
|
||||
inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
|
||||
std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
return vk::AccessFlagBits::eVertexAttributeRead;
|
||||
result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
|
||||
vk::PipelineStageFlagBits::eVertexInput);
|
||||
break;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
return vk::AccessFlagBits::eIndexRead;
|
||||
result =
|
||||
std::make_pair(vk::AccessFlagBits::eIndexRead, vk::PipelineStageFlagBits::eVertexInput);
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
return vk::AccessFlagBits::eUniformRead;
|
||||
result = std::make_pair(vk::AccessFlagBits::eUniformRead,
|
||||
vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eGeometryShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
return vk::AccessFlagBits::eShaderRead;
|
||||
result = std::make_pair(vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
UNREACHABLE();
|
||||
}
|
||||
return vk::AccessFlagBits::eNone;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) {
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexInput;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexInput;
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eGeometryShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader;
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
return vk::PipelineStageFlagBits::eFragmentShader;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
UNREACHABLE();
|
||||
}
|
||||
return vk::PipelineStageFlagBits::eNone;
|
||||
return result;
|
||||
}
|
||||
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
|
||||
@ -131,90 +120,98 @@ StreamBuffer::~StreamBuffer() {
|
||||
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= total_size && alignment <= total_size);
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_offset = Common::AlignUp(buffer_offset, alignment);
|
||||
bucket.cursor = Common::AlignUp(bucket.cursor, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
const u32 new_offset = buffer_offset + size;
|
||||
if (u32 new_index = new_offset / bucket_size; new_index != bucket_index) {
|
||||
if (new_index >= BUCKET_COUNT) {
|
||||
if (readback) {
|
||||
Invalidate();
|
||||
} else {
|
||||
Flush();
|
||||
}
|
||||
buffer_offset = 0;
|
||||
flush_offset = 0;
|
||||
new_index = 0;
|
||||
invalidate = true;
|
||||
}
|
||||
ticks[bucket_index] = scheduler.CurrentTick();
|
||||
scheduler.Wait(ticks[new_index]);
|
||||
bucket_index = new_index;
|
||||
// If we reach bucket boundaries move over to the next one
|
||||
if (bucket.cursor + size > bucket_size) {
|
||||
bucket.gpu_tick = scheduler.CurrentTick();
|
||||
Flush();
|
||||
MoveNextBucket();
|
||||
return Map(size, alignment);
|
||||
}
|
||||
|
||||
const bool invalidate = std::exchange(bucket.invalid, false);
|
||||
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
|
||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||
|
||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size) {
|
||||
buffer_offset += size;
|
||||
buckets[bucket_index].cursor += size;
|
||||
}
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
if (readback) {
|
||||
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 flush_size = buffer_offset - flush_offset;
|
||||
ASSERT(flush_size <= total_size);
|
||||
ASSERT(flush_offset + flush_size <= total_size);
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure all staging writes are visible to the host memory domain
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaFlushAllocation(allocator, staging.allocation, flush_offset, flush_size);
|
||||
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
if (gpu_buffer) {
|
||||
scheduler.Record([this, flush_offset = flush_offset, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_offset, .dstOffset = flush_offset, .size = flush_size};
|
||||
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
||||
|
||||
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
|
||||
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = MakeAccessFlags(usage),
|
||||
.dstAccessMask = access_mask,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = gpu_buffer,
|
||||
.offset = flush_offset,
|
||||
.offset = flush_start,
|
||||
.size = flush_size};
|
||||
|
||||
upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
MakePipelineStage(usage),
|
||||
upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
|
||||
{});
|
||||
});
|
||||
}
|
||||
flush_offset = buffer_offset;
|
||||
bucket.flush_cursor += flush_size;
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::Invalidate() {
|
||||
if (!readback) {
|
||||
LOG_WARNING(Render_Vulkan, "Cannot invalidate write only buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 flush_size = buffer_offset - flush_offset;
|
||||
ASSERT(flush_size <= total_size);
|
||||
ASSERT(flush_offset + flush_size <= total_size);
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure the staging memory can be read by the host
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_offset, flush_size);
|
||||
flush_offset = buffer_offset;
|
||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
bucket.flush_cursor += flush_size;
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::MoveNextBucket() {
|
||||
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
|
||||
Bucket& next_bucket = buckets[bucket_index];
|
||||
scheduler.Wait(next_bucket.gpu_tick);
|
||||
next_bucket.cursor = 0;
|
||||
next_bucket.flush_cursor = 0;
|
||||
next_bucket.invalid = true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -71,6 +71,17 @@ public:
|
||||
return views[index];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Moves to the next bucket
|
||||
void MoveNextBucket();
|
||||
|
||||
struct Bucket {
|
||||
bool invalid = false;
|
||||
u32 gpu_tick = 0;
|
||||
u32 cursor = 0;
|
||||
u32 flush_cursor = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
@ -79,14 +90,12 @@ private:
|
||||
VmaAllocation allocation{};
|
||||
vk::BufferUsageFlagBits usage;
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
std::size_t view_count = 0;
|
||||
std::array<Bucket, BUCKET_COUNT> buckets;
|
||||
u32 view_count = 0;
|
||||
u32 total_size = 0;
|
||||
u32 bucket_size = 0;
|
||||
u32 buffer_offset = 0;
|
||||
u32 flush_offset = 0;
|
||||
u32 bucket_index = 0;
|
||||
bool readback = false;
|
||||
std::array<u64, BUCKET_COUNT> ticks{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user