Compare commits

...

15 Commits

Author SHA1 Message Date
e62da0afe1 Android #139 2023-11-23 00:57:23 +00:00
4047e9071f Merge PR 12074 2023-11-23 00:57:23 +00:00
d8f125ab10 Merge PR 11535 2023-11-23 00:57:23 +00:00
91c12db070 Merge pull request #12123 from merryhime/explicit-this
Explicit this
2023-11-21 22:17:42 -05:00
b088a448cd game_list_worker: Explicit caputure of 'this' 2023-11-21 22:57:47 +00:00
c4f6c3b00b shared_widget: Explicit capture of 'this' 2023-11-21 22:57:09 +00:00
cddb28cf26 Merge pull request #12107 from daisymlleung/patch-1
Stub CheckBlockedUserListAvailability for Super Bomberman R 2
2023-11-21 09:19:41 -05:00
538e137bca Merge pull request #12100 from t895/android-translations
translations: Add android translations to transifex config
2023-11-21 09:19:34 -05:00
e69118042f Merge pull request #12045 from liamwhite/codec-refactor
video_core: refactor video frame and packet parsing
2023-11-21 09:19:26 -05:00
a6b8d85b34 Merge pull request #11984 from lat9nq/lupdate
shared_translation: Call tr more directly
2023-11-21 09:19:13 -05:00
0216aa55b4 Stub CheckBlockedUserListAvailability 2023-11-21 01:57:58 +08:00
0298c2cc5d translations: Add android translations to transifex config 2023-11-20 10:37:09 -05:00
4055a476aa video_core: refactor video frame and packet parsing 2023-11-16 17:01:38 -05:00
cb3559539a CMakeLists: Add option to call lupdate directly
qt_create_translation silently fails to run at all on my system. Since
there is no error, I was unable to determine a fix. This sidesteps the
convenience function by setting up the rules ourselves.

This is left as an option since this path likely does not work on
Windows.
2023-11-08 11:54:05 -05:00
71cdfa6ad5 shared_translation: Call tr for each string
Qt can't parse tr called within a macro, so we must call it on each
string.

shared_translation: Remove redundant include
2023-11-08 11:54:01 -05:00
89 changed files with 3673 additions and 947 deletions

View File

@ -3,4 +3,4 @@
[codespell] [codespell]
skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res
ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nce,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink

3
.gitmodules vendored
View File

@ -61,3 +61,6 @@
[submodule "breakpad"] [submodule "breakpad"]
path = externals/breakpad path = externals/breakpad
url = https://github.com/yuzu-emu/breakpad.git url = https://github.com/yuzu-emu/breakpad.git
[submodule "oaknut"]
path = externals/oaknut
url = https://github.com/merryhime/oaknut

View File

@ -1,3 +1,13 @@
| Pull Request | Commit | Title | Author | Merged? |
|----|----|----|----|----|
| [11535](https://github.com/yuzu-emu/yuzu//pull/11535) | [`50bcfa5fb`](https://github.com/yuzu-emu/yuzu//pull/11535/files) | renderer_vulkan: Introduce separate cmd buffer for uploads | [GPUCode](https://github.com/GPUCode/) | Yes |
| [12074](https://github.com/yuzu-emu/yuzu//pull/12074) | [`20b671baa`](https://github.com/yuzu-emu/yuzu//pull/12074/files) | Implement Native Code Execution (NCE) | [GPUCode](https://github.com/GPUCode/) | Yes |
End of merge log. You can find the original README.md below the break.
-----
<!-- <!--
SPDX-FileCopyrightText: 2018 yuzu Emulator Project SPDX-FileCopyrightText: 2018 yuzu Emulator Project
SPDX-License-Identifier: GPL-2.0-or-later SPDX-License-Identifier: GPL-2.0-or-later

View File

@ -6,3 +6,8 @@ file_filter = <lang>.ts
source_file = en.ts source_file = en.ts
source_lang = en source_lang = en
type = QT type = QT
[o:yuzu-emulator:p:yuzu:r:yuzu-android]
file_filter = ../../src/android/app/src/main/res/values-<lang>/strings.xml
source_file = ../../src/android/app/src/main/res/values/strings.xml
type = ANDROID

View File

@ -20,6 +20,10 @@ if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
endif() endif()
# Dynarmic # Dynarmic
if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut)
add_subdirectory(oaknut)
endif()
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
set(DYNARMIC_IGNORE_ASSERTS ON) set(DYNARMIC_IGNORE_ASSERTS ON)
add_subdirectory(dynarmic) add_subdirectory(dynarmic)

1
externals/oaknut vendored Submodule

Submodule externals/oaknut added at 316d8869e8

View File

@ -301,6 +301,11 @@ object NativeLibrary {
*/ */
external fun getPerfStats(): DoubleArray external fun getPerfStats(): DoubleArray
/**
* Returns the current CPU backend.
*/
external fun getCpuBackend(): String
/** /**
* Notifies the core emulation that the orientation has changed. * Notifies the core emulation that the orientation has changed.
*/ */

View File

@ -10,6 +10,7 @@ enum class IntSetting(
override val category: Settings.Category, override val category: Settings.Category,
override val androidDefault: Int? = null override val androidDefault: Int? = null
) : AbstractIntSetting { ) : AbstractIntSetting {
CPU_BACKEND("cpu_backend", Settings.Category.Cpu),
CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu), CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu),
REGION_INDEX("region_index", Settings.Category.System), REGION_INDEX("region_index", Settings.Category.System),
LANGUAGE_INDEX("language_index", Settings.Category.System), LANGUAGE_INDEX("language_index", Settings.Category.System),

View File

@ -77,6 +77,15 @@ abstract class SettingsItem(
"%" "%"
) )
) )
put(
SingleChoiceSetting(
IntSetting.CPU_BACKEND,
R.string.cpu_backend,
0,
R.array.cpuBackendNames,
R.array.cpuBackendValues
)
)
put( put(
SingleChoiceSetting( SingleChoiceSetting(
IntSetting.CPU_ACCURACY, IntSetting.CPU_ACCURACY,

View File

@ -269,6 +269,7 @@ class SettingsFragmentPresenter(
add(BooleanSetting.RENDERER_DEBUG.key) add(BooleanSetting.RENDERER_DEBUG.key)
add(HeaderSetting(R.string.cpu)) add(HeaderSetting(R.string.cpu))
add(IntSetting.CPU_BACKEND.key)
add(IntSetting.CPU_ACCURACY.key) add(IntSetting.CPU_ACCURACY.key)
add(BooleanSetting.CPU_DEBUG_MODE.key) add(BooleanSetting.CPU_DEBUG_MODE.key)
add(SettingsItem.FASTMEM_COMBINED) add(SettingsItem.FASTMEM_COMBINED)

View File

@ -414,8 +414,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback {
perfStatsUpdater = { perfStatsUpdater = {
if (emulationViewModel.emulationStarted.value) { if (emulationViewModel.emulationStarted.value) {
val perfStats = NativeLibrary.getPerfStats() val perfStats = NativeLibrary.getPerfStats()
val cpuBackend = NativeLibrary.getCpuBackend()
if (_binding != null) { if (_binding != null) {
binding.showFpsText.text = String.format("FPS: %.1f", perfStats[FPS]) binding.showFpsText.text =
String.format("FPS: %.1f\n%s", perfStats[FPS], cpuBackend)
} }
perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800) perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800)
} }

View File

@ -177,6 +177,7 @@ void Config::ReadValues() {
ReadSetting("Core", Settings::values.memory_layout_mode); ReadSetting("Core", Settings::values.memory_layout_mode);
// Cpu // Cpu
ReadSetting("Cpu", Settings::values.cpu_backend);
ReadSetting("Cpu", Settings::values.cpu_accuracy); ReadSetting("Cpu", Settings::values.cpu_accuracy);
ReadSetting("Cpu", Settings::values.cpu_debug_mode); ReadSetting("Cpu", Settings::values.cpu_debug_mode);
ReadSetting("Cpu", Settings::values.cpuopt_page_tables); ReadSetting("Cpu", Settings::values.cpuopt_page_tables);

View File

@ -153,6 +153,10 @@ use_multi_core =
use_unsafe_extended_memory_layout = use_unsafe_extended_memory_layout =
[Cpu] [Cpu]
Selects the preferred CPU backend for executing ARM instructions
# 0 (default): Dynarmic, 1: NCE
cpu_backend =
# Adjusts various optimizations. # Adjusts various optimizations.
# Auto-select mode enables choice unsafe optimizations. # Auto-select mode enables choice unsafe optimizations.
# Accurate enables only safe optimizations. # Accurate enables only safe optimizations.

View File

@ -707,6 +707,14 @@ jdoubleArray Java_org_yuzu_yuzu_1emu_NativeLibrary_getPerfStats(JNIEnv* env, jcl
return j_stats; return j_stats;
} }
jstring Java_org_yuzu_yuzu_1emu_NativeLibrary_getCpuBackend(JNIEnv* env, jclass clazz) {
if (Settings::IsNceEnabled()) {
return ToJString(env, "NCE");
}
return ToJString(env, "JIT");
}
void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env, void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env,
jclass clazz, jclass clazz,
jstring j_path) {} jstring j_path) {}

View File

@ -175,6 +175,16 @@
<item>2</item> <item>2</item>
</integer-array> </integer-array>
<string-array name="cpuBackendNames">
<item>@string/cpu_backend_dynarmic</item>
<item>@string/cpu_backend_nce</item>
</string-array>
<integer-array name="cpuBackendValues">
<item>0</item>
<item>1</item>
</integer-array>
<string-array name="cpuAccuracyNames"> <string-array name="cpuAccuracyNames">
<item>@string/auto</item> <item>@string/auto</item>
<item>@string/cpu_accuracy_accurate</item> <item>@string/cpu_accuracy_accurate</item>

View File

@ -185,6 +185,7 @@
<string name="frame_limit_enable_description">Limits emulation speed to a specified percentage of normal speed.</string> <string name="frame_limit_enable_description">Limits emulation speed to a specified percentage of normal speed.</string>
<string name="frame_limit_slider">Limit speed percent</string> <string name="frame_limit_slider">Limit speed percent</string>
<string name="frame_limit_slider_description">Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit.</string> <string name="frame_limit_slider_description">Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit.</string>
<string name="cpu_backend">CPU Backend</string>
<string name="cpu_accuracy">CPU accuracy</string> <string name="cpu_accuracy">CPU accuracy</string>
<string name="value_with_units">%1$s%2$s</string> <string name="value_with_units">%1$s%2$s</string>
@ -416,6 +417,10 @@
<string name="ratio_force_sixteen_ten">Force 16:10</string> <string name="ratio_force_sixteen_ten">Force 16:10</string>
<string name="ratio_stretch">Stretch to window</string> <string name="ratio_stretch">Stretch to window</string>
<!-- CPU Backend -->
<string name="cpu_backend_dynarmic">Dynarmic (Slow)</string>
<string name="cpu_backend_nce">Native code execution (NCE)</string>
<!-- CPU Accuracy --> <!-- CPU Accuracy -->
<string name="cpu_accuracy_accurate">Accurate</string> <string name="cpu_accuracy_accurate">Accurate</string>
<string name="cpu_accuracy_unsafe">Unsafe</string> <string name="cpu_accuracy_unsafe">Unsafe</string>

View File

@ -52,6 +52,7 @@ add_library(common STATIC
fiber.cpp fiber.cpp
fiber.h fiber.h
fixed_point.h fixed_point.h
free_region_manager.h
fs/file.cpp fs/file.cpp
fs/file.h fs/file.h
fs/fs.cpp fs/fs.cpp
@ -166,6 +167,13 @@ if (WIN32)
target_link_libraries(common PRIVATE ntdll) target_link_libraries(common PRIVATE ntdll)
endif() endif()
if (NOT WIN32)
target_sources(common PRIVATE
signal_chain.cpp
signal_chain.h
)
endif()
if(ANDROID) if(ANDROID)
target_sources(common target_sources(common
PRIVATE PRIVATE

View File

@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <boost/icl/interval_set.hpp>
namespace Common {
class FreeRegionManager {
public:
explicit FreeRegionManager() = default;
~FreeRegionManager() = default;
void SetAddressSpace(void* start, size_t size) {
this->FreeBlock(start, size);
}
std::pair<void*, size_t> FreeBlock(void* block_ptr, size_t size) {
std::scoped_lock lk(m_mutex);
// Check to see if we are adjacent to any regions.
auto start_address = reinterpret_cast<uintptr_t>(block_ptr);
auto end_address = start_address + size;
auto it = m_free_regions.find({start_address - 1, end_address + 1});
// If we are, join with them, ensuring we stay in bounds.
if (it != m_free_regions.end()) {
start_address = std::min(start_address, it->lower());
end_address = std::max(end_address, it->upper());
}
// Free the relevant region.
m_free_regions.insert({start_address, end_address});
// Return the adjusted pointers.
block_ptr = reinterpret_cast<void*>(start_address);
size = end_address - start_address;
return {block_ptr, size};
}
void AllocateBlock(void* block_ptr, size_t size) {
std::scoped_lock lk(m_mutex);
auto address = reinterpret_cast<uintptr_t>(block_ptr);
m_free_regions.subtract({address, address + size});
}
private:
std::mutex m_mutex;
boost::icl::interval_set<uintptr_t> m_free_regions;
};
} // namespace Common

View File

@ -21,15 +21,18 @@
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include <fcntl.h> #include <fcntl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/random.h>
#include <unistd.h> #include <unistd.h>
#include "common/scope_exit.h" #include "common/scope_exit.h"
#endif // ^^^ Linux ^^^ #endif // ^^^ Linux ^^^
#include <mutex> #include <mutex>
#include <random>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/free_region_manager.h"
#include "common/host_memory.h" #include "common/host_memory.h"
#include "common/logging/log.h" #include "common/logging/log.h"
@ -141,7 +144,7 @@ public:
Release(); Release();
} }
void Map(size_t virtual_offset, size_t host_offset, size_t length) { void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
std::unique_lock lock{placeholder_mutex}; std::unique_lock lock{placeholder_mutex};
if (!IsNiechePlaceholder(virtual_offset, length)) { if (!IsNiechePlaceholder(virtual_offset, length)) {
Split(virtual_offset, length); Split(virtual_offset, length);
@ -160,7 +163,7 @@ public:
} }
} }
void Protect(size_t virtual_offset, size_t length, bool read, bool write) { void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
DWORD new_flags{}; DWORD new_flags{};
if (read && write) { if (read && write) {
new_flags = PAGE_READWRITE; new_flags = PAGE_READWRITE;
@ -186,6 +189,11 @@ public:
} }
} }
void EnableDirectMappedAddress() {
// TODO
UNREACHABLE();
}
const size_t backing_size; ///< Size of the backing memory in bytes const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
@ -353,6 +361,64 @@ private:
#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv #elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv
#ifdef ARCHITECTURE_arm64
static uint64_t GetRandomU64() {
uint64_t ret;
ASSERT(getrandom(&ret, sizeof(ret), 0) == 0);
return ret;
}
static void* ChooseVirtualBase(size_t virtual_size) {
constexpr uintptr_t Map39BitSize = (1ULL << 39);
constexpr uintptr_t Map36BitSize = (1ULL << 36);
// Seed the MT with some initial strong randomness.
//
// This is not a cryptographic application, we just want something more
// random than the current time.
std::mt19937_64 rng(GetRandomU64());
// We want to ensure we are allocating at an address aligned to the L2 block size.
// For Qualcomm devices, we must also allocate memory above 36 bits.
const size_t lower = Map36BitSize / HugePageSize;
const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
const size_t range = upper - lower;
// Try up to 64 times to allocate memory at random addresses in the range.
for (int i = 0; i < 64; i++) {
// Calculate a possible location.
uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize;
// Try to map.
// Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here.
void* map_pointer =
mmap(reinterpret_cast<void*>(hint_address), virtual_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
// If we successfully mapped, we're done.
if (reinterpret_cast<uintptr_t>(map_pointer) == hint_address) {
return map_pointer;
}
// Unmap if necessary, and try again.
if (map_pointer != MAP_FAILED) {
munmap(map_pointer, virtual_size);
}
}
return MAP_FAILED;
}
#else
static void* ChooseVirtualBase(size_t virtual_size) {
return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
}
#endif
class HostMemory::Impl { class HostMemory::Impl {
public: public:
explicit Impl(size_t backing_size_, size_t virtual_size_) explicit Impl(size_t backing_size_, size_t virtual_size_)
@ -415,8 +481,7 @@ public:
} }
} }
#else #else
virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE, virtual_base = virtual_map_base = static_cast<u8*>(ChooseVirtualBase(virtual_size));
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
if (virtual_base == MAP_FAILED) { if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
throw std::bad_alloc{}; throw std::bad_alloc{};
@ -424,7 +489,7 @@ public:
madvise(virtual_base, virtual_size, MADV_HUGEPAGE); madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
#endif #endif
placeholders.add({0, virtual_size}); free_manager.SetAddressSpace(virtual_base, virtual_size);
good = true; good = true;
} }
@ -432,14 +497,29 @@ public:
Release(); Release();
} }
void Map(size_t virtual_offset, size_t host_offset, size_t length) { void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
{ // Intersect the range with our address space.
std::scoped_lock lock{placeholder_mutex}; AdjustMap(&virtual_offset, &length);
placeholders.subtract({virtual_offset, virtual_offset + length});
}
void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, // We are removing a placeholder.
MAP_SHARED | MAP_FIXED, fd, host_offset); free_manager.AllocateBlock(virtual_base + virtual_offset, length);
// Deduce mapping protection flags.
int flags = PROT_NONE;
if (True(perms & MemoryPermission::Read)) {
flags |= PROT_READ;
}
if (True(perms & MemoryPermission::Write)) {
flags |= PROT_WRITE;
}
#ifdef ARCHITECTURE_arm64
if (True(perms & MemoryPermission::Execute)) {
flags |= PROT_EXEC;
}
#endif
void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd,
host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
} }
@ -447,47 +527,54 @@ public:
// The method name is wrong. We're still talking about the virtual range. // The method name is wrong. We're still talking about the virtual range.
// We don't want to unmap, we want to reserve this memory. // We don't want to unmap, we want to reserve this memory.
{ // Intersect the range with our address space.
std::scoped_lock lock{placeholder_mutex}; AdjustMap(&virtual_offset, &length);
auto it = placeholders.find({virtual_offset - 1, virtual_offset + length + 1});
if (it != placeholders.end()) { // Merge with any adjacent placeholder mappings.
size_t prev_upper = virtual_offset + length; auto [merged_pointer, merged_size] =
virtual_offset = std::min(virtual_offset, it->lower()); free_manager.FreeBlock(virtual_base + virtual_offset, length);
length = std::max(it->upper(), prev_upper) - virtual_offset;
}
placeholders.add({virtual_offset, virtual_offset + length}); void* ret = mmap(merged_pointer, merged_size, PROT_NONE,
}
void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
} }
void Protect(size_t virtual_offset, size_t length, bool read, bool write) { void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
int flags = 0; // Intersect the range with our address space.
AdjustMap(&virtual_offset, &length);
int flags = PROT_NONE;
if (read) { if (read) {
flags |= PROT_READ; flags |= PROT_READ;
} }
if (write) { if (write) {
flags |= PROT_WRITE; flags |= PROT_WRITE;
} }
#ifdef ARCHITECTURE_arm64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(virtual_base + virtual_offset, length, flags); int ret = mprotect(virtual_base + virtual_offset, length, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
} }
void EnableDirectMappedAddress() {
virtual_base = nullptr;
}
const size_t backing_size; ///< Size of the backing memory in bytes const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)}; u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)}; u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
u8* virtual_map_base{reinterpret_cast<u8*>(MAP_FAILED)};
private: private:
/// Release all resources in the object /// Release all resources in the object
void Release() { void Release() {
if (virtual_base != MAP_FAILED) { if (virtual_map_base != MAP_FAILED) {
int ret = munmap(virtual_base, virtual_size); int ret = munmap(virtual_map_base, virtual_size);
ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
} }
@ -502,10 +589,29 @@ private:
} }
} }
int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create void AdjustMap(size_t* virtual_offset, size_t* length) {
if (virtual_base != nullptr) {
return;
}
boost::icl::interval_set<size_t> placeholders; ///< Mapped placeholders // If we are direct mapped, we want to make sure we are operating on a region
std::mutex placeholder_mutex; ///< Mutex for placeholders // that is in range of our virtual mapping.
size_t intended_start = *virtual_offset;
size_t intended_end = intended_start + *length;
size_t address_space_start = reinterpret_cast<size_t>(virtual_map_base);
size_t address_space_end = address_space_start + virtual_size;
if (address_space_start > intended_end || intended_start > address_space_end) {
*virtual_offset = 0;
*length = 0;
} else {
*virtual_offset = std::max(intended_start, address_space_start);
*length = std::min(intended_end, address_space_end) - *virtual_offset;
}
}
int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
FreeRegionManager free_manager{};
}; };
#else // ^^^ Linux ^^^ vvv Generic vvv #else // ^^^ Linux ^^^ vvv Generic vvv
@ -518,11 +624,11 @@ public:
throw std::bad_alloc{}; throw std::bad_alloc{};
} }
void Map(size_t virtual_offset, size_t host_offset, size_t length) {} void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {}
void Unmap(size_t virtual_offset, size_t length) {} void Unmap(size_t virtual_offset, size_t length) {}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {} void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {}
u8* backing_base{nullptr}; u8* backing_base{nullptr};
u8* virtual_base{nullptr}; u8* virtual_base{nullptr};
@ -535,15 +641,16 @@ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
try { try {
// Try to allocate a fastmem arena. // Try to allocate a fastmem arena.
// The implementation will fail with std::bad_alloc on errors. // The implementation will fail with std::bad_alloc on errors.
impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment), impl =
AlignUp(virtual_size, PageAlignment) + std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment),
3 * HugePageSize); AlignUp(virtual_size, PageAlignment) + HugePageSize);
backing_base = impl->backing_base; backing_base = impl->backing_base;
virtual_base = impl->virtual_base; virtual_base = impl->virtual_base;
if (virtual_base) { if (virtual_base) {
virtual_base += 2 * HugePageSize - 1; // Ensure the virtual base is aligned to the L2 block size.
virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); virtual_base = reinterpret_cast<u8*>(
Common::AlignUp(reinterpret_cast<uintptr_t>(virtual_base), HugePageSize));
virtual_base_offset = virtual_base - impl->virtual_base; virtual_base_offset = virtual_base - impl->virtual_base;
} }
@ -562,7 +669,8 @@ HostMemory::HostMemory(HostMemory&&) noexcept = default;
HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
MemoryPermission perms) {
ASSERT(virtual_offset % PageAlignment == 0); ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(host_offset % PageAlignment == 0); ASSERT(host_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0); ASSERT(length % PageAlignment == 0);
@ -571,7 +679,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
if (length == 0 || !virtual_base || !impl) { if (length == 0 || !virtual_base || !impl) {
return; return;
} }
impl->Map(virtual_offset + virtual_base_offset, host_offset, length); impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms);
} }
void HostMemory::Unmap(size_t virtual_offset, size_t length) { void HostMemory::Unmap(size_t virtual_offset, size_t length) {
@ -584,14 +692,22 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) {
impl->Unmap(virtual_offset + virtual_base_offset, length); impl->Unmap(virtual_offset + virtual_base_offset, length);
} }
void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write,
bool execute) {
ASSERT(virtual_offset % PageAlignment == 0); ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0); ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size); ASSERT(virtual_offset + length <= virtual_size);
if (length == 0 || !virtual_base || !impl) { if (length == 0 || !virtual_base || !impl) {
return; return;
} }
impl->Protect(virtual_offset + virtual_base_offset, length, read, write); impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute);
}
void HostMemory::EnableDirectMappedAddress() {
if (impl) {
impl->EnableDirectMappedAddress();
virtual_size += reinterpret_cast<uintptr_t>(virtual_base);
}
} }
} // namespace Common } // namespace Common

View File

@ -4,11 +4,20 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/virtual_buffer.h" #include "common/virtual_buffer.h"
namespace Common { namespace Common {
enum class MemoryPermission : u32 {
Read = 1 << 0,
Write = 1 << 1,
ReadWrite = Read | Write,
Execute = 1 << 2,
};
DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission)
/** /**
* A low level linear memory buffer, which supports multiple mappings * A low level linear memory buffer, which supports multiple mappings
* Its purpose is to rebuild a given sparse memory layout, including mirrors. * Its purpose is to rebuild a given sparse memory layout, including mirrors.
@ -31,11 +40,13 @@ public:
HostMemory(HostMemory&& other) noexcept; HostMemory(HostMemory&& other) noexcept;
HostMemory& operator=(HostMemory&& other) noexcept; HostMemory& operator=(HostMemory&& other) noexcept;
void Map(size_t virtual_offset, size_t host_offset, size_t length); void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms);
void Unmap(size_t virtual_offset, size_t length); void Unmap(size_t virtual_offset, size_t length);
void Protect(size_t virtual_offset, size_t length, bool read, bool write); void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute = false);
void EnableDirectMappedAddress();
[[nodiscard]] u8* BackingBasePointer() noexcept { [[nodiscard]] u8* BackingBasePointer() noexcept {
return backing_base; return backing_base;

View File

@ -41,6 +41,7 @@ SWITCHABLE(AspectRatio, true);
SWITCHABLE(AstcDecodeMode, true); SWITCHABLE(AstcDecodeMode, true);
SWITCHABLE(AstcRecompression, true); SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true); SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true); SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FullscreenMode, true); SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true); SWITCHABLE(GpuAccuracy, true);
@ -155,6 +156,22 @@ bool IsFastmemEnabled() {
return true; return true;
} }
static bool is_nce_enabled = false;
void SetNceEnabled(bool is_39bit) {
const bool is_nce_selected = values.cpu_backend.GetValue() == CpuBackend::Nce;
is_nce_enabled = IsFastmemEnabled() && is_nce_selected && is_39bit;
if (is_nce_selected && !is_nce_enabled) {
LOG_WARNING(
Common,
"Program does not utilize 39-bit address space, unable to natively execute code");
}
}
bool IsNceEnabled() {
return is_nce_enabled;
}
bool IsDockedMode() { bool IsDockedMode() {
return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked; return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked;
} }

View File

@ -63,6 +63,7 @@ SWITCHABLE(AspectRatio, true);
SWITCHABLE(AstcDecodeMode, true); SWITCHABLE(AstcDecodeMode, true);
SWITCHABLE(AstcRecompression, true); SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true); SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true); SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FullscreenMode, true); SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true); SWITCHABLE(GpuAccuracy, true);
@ -179,6 +180,14 @@ struct Values {
&use_speed_limit}; &use_speed_limit};
// Cpu // Cpu
SwitchableSetting<CpuBackend, true> cpu_backend{
linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic,
#ifdef ARCHITECTURE_arm64
CpuBackend::Nce,
#else
CpuBackend::Dynarmic,
#endif
"cpu_backend", Category::Cpu};
SwitchableSetting<CpuAccuracy, true> cpu_accuracy{linkage, CpuAccuracy::Auto, SwitchableSetting<CpuAccuracy, true> cpu_accuracy{linkage, CpuAccuracy::Auto,
CpuAccuracy::Auto, CpuAccuracy::Paranoid, CpuAccuracy::Auto, CpuAccuracy::Paranoid,
"cpu_accuracy", Category::Cpu}; "cpu_accuracy", Category::Cpu};
@ -358,6 +367,8 @@ struct Values {
Category::RendererDebug}; Category::RendererDebug};
// TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control // TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control
bool renderer_amdvlk_depth_bias_workaround{}; bool renderer_amdvlk_depth_bias_workaround{};
Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
Category::RendererDebug};
// System // System
SwitchableSetting<Language, true> language_index{linkage, SwitchableSetting<Language, true> language_index{linkage,
@ -534,6 +545,8 @@ bool IsGPULevelExtreme();
bool IsGPULevelHigh(); bool IsGPULevelHigh();
bool IsFastmemEnabled(); bool IsFastmemEnabled();
void SetNceEnabled(bool is_64bit);
bool IsNceEnabled();
bool IsDockedMode(); bool IsDockedMode();

View File

@ -129,6 +129,8 @@ ENUM(ShaderBackend, Glsl, Glasm, SpirV);
ENUM(GpuAccuracy, Normal, High, Extreme); ENUM(GpuAccuracy, Normal, High, Extreme);
ENUM(CpuBackend, Dynarmic, Nce);
ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid);
ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb); ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb);

View File

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <dlfcn.h>
#include "common/assert.h"
#include "common/dynamic_library.h"
#include "common/scope_exit.h"
#include "common/signal_chain.h"
namespace Common {
template <typename T>
T* LookupLibcSymbol(const char* name) {
#if defined(__BIONIC__)
Common::DynamicLibrary provider("libc.so");
if (!provider.IsOpen()) {
UNREACHABLE_MSG("Failed to open libc!");
}
#else
// For other operating environments, we assume the symbol is not overridden.
const char* base = nullptr;
Common::DynamicLibrary provider(base);
#endif
void* sym = provider.GetSymbolAddress(name);
if (sym == nullptr) {
sym = dlsym(RTLD_DEFAULT, name);
}
if (sym == nullptr) {
UNREACHABLE_MSG("Unable to find symbol {}!", name);
}
return reinterpret_cast<T*>(sym);
}
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) {
static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
return libc_sigaction(signum, act, oldact);
}
} // namespace Common

19
src/common/signal_chain.h Normal file
View File

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#ifndef _WIN32
#include <signal.h>
namespace Common {
// Android's ART overrides sigaction with its own wrapper. This is problematic for SIGSEGV
// in particular, because ART's handler accesses tpidr_el0, which conflicts with NCE.
// This extracts the libc symbol and calls it directly.
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact);
} // namespace Common
#endif

View File

@ -921,6 +921,23 @@ if (ENABLE_WEB_SERVICE)
target_link_libraries(core PRIVATE web_service) target_link_libraries(core PRIVATE web_service)
endif() endif()
if (ARCHITECTURE_arm64 AND (ANDROID OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
target_compile_definitions(core PRIVATE -DHAS_NCE)
enable_language(C ASM)
set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp")
target_sources(core PRIVATE
arm/nce/arm_nce.cpp
arm/nce/arm_nce.h
arm/nce/arm_nce.s
arm/nce/guest_context.h
arm/nce/patch.cpp
arm/nce/patch.h
arm/nce/instructions.h
)
target_link_libraries(core PRIVATE merry::oaknut)
endif()
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_sources(core PRIVATE target_sources(core PRIVATE
arm/dynarmic/arm_dynarmic.h arm/dynarmic/arm_dynarmic.h

View File

@ -201,6 +201,8 @@ void ARM_Interface::Run() {
if (True(hr & HaltReason::DataAbort)) { if (True(hr & HaltReason::DataAbort)) {
if (system.DebuggerEnabled()) { if (system.DebuggerEnabled()) {
system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint()); system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
} else {
LogBacktrace();
} }
current_thread->RequestSuspend(SuspendType::Debug); current_thread->RequestSuspend(SuspendType::Debug);
break; break;

View File

@ -81,6 +81,9 @@ public:
// thread context to be 800 bytes in size. // thread context to be 800 bytes in size.
static_assert(sizeof(ThreadContext64) == 0x320); static_assert(sizeof(ThreadContext64) == 0x320);
/// Perform any backend-specific initialization.
virtual void Initialize() {}
/// Runs the CPU until an event happens /// Runs the CPU until an event happens
void Run(); void Run();

View File

@ -0,0 +1,400 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cinttypes>
#include <memory>
#include "common/signal_chain.h"
#include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/patch.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/hle/kernel/k_process.h"
#include <signal.h>
#include <sys/syscall.h>
#include <unistd.h>
namespace Core {
namespace {
struct sigaction g_orig_action;
// Verify assembly offsets.
using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext);
static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
while (header->magic != FPSIMD_MAGIC) {
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
}
return reinterpret_cast<fpsimd_context*>(header);
}
} // namespace
void* ARM_NCE::RestoreGuestContext(void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Thread-local parameters will be located in x9.
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
// Save host callee-saved registers.
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
sizeof(guest_ctx->host_ctx.host_saved_vregs));
std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
sizeof(guest_ctx->host_ctx.host_saved_regs));
// Save stack pointer.
guest_ctx->host_ctx.host_sp = host_ctx.sp;
// Restore all guest state except tpidr_el0.
host_ctx.sp = guest_ctx->sp;
host_ctx.pc = guest_ctx->pc;
host_ctx.pstate = guest_ctx->pstate;
fpctx->fpcr = guest_ctx->fpcr;
fpctx->fpsr = guest_ctx->fpsr;
std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
// Return the new thread-local storage pointer.
return tpidr;
}
void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
// Save all guest registers except tpidr_el0.
std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
guest_ctx->fpsr = fpctx->fpsr;
guest_ctx->fpcr = fpctx->fpcr;
guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
guest_ctx->pc = host_ctx.pc;
guest_ctx->sp = host_ctx.sp;
// Restore stack pointer.
host_ctx.sp = guest_ctx->host_ctx.host_sp;
// Restore host callee-saved registers.
std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
sizeof(guest_ctx->host_ctx.host_saved_regs));
std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
sizeof(guest_ctx->host_ctx.host_saved_vregs));
// Return from the call on exit by setting pc to x30.
host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
// Clear esr_el1 and return it.
host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
}
bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
auto* info = static_cast<siginfo_t*>(raw_info);
// Try to handle an invalid access.
// TODO: handle accesses which split a page?
const Common::ProcessAddress addr =
(reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
// We handled the access successfully and are returning to guest code.
return true;
}
// We can't handle the access, so determine why we crashed.
const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
// For data aborts, skip the instruction and return to guest code.
// This will allow games to continue in many scenarios where they would otherwise crash.
if (!is_prefetch_abort) {
host_ctx.pc += 4;
return true;
}
// This is a prefetch abort.
guest_ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::PrefetchAbort));
// Forcibly mark the context as locked. We are still running.
// We may race with SignalInterrupt here:
// - If we lose the race, then SignalInterrupt will send us a signal we are masking,
// and it will do nothing when it is unmasked, as we have already left guest code.
// - If we win the race, then SignalInterrupt will wait for us to unlock first.
auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
thread_params.lock.store(SpinLockLocked);
// Return to host.
SaveGuestContext(guest_ctx, raw_context);
return false;
}
void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
}
HaltReason ARM_NCE::RunJit() {
// Get the thread parameters.
// TODO: pass the current thread down from ::Run
auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
auto* thread_params = &thread->GetNativeExecutionParameters();
{
// Lock our core context.
std::scoped_lock lk{lock};
// We should not be running.
ASSERT(running_thread == nullptr);
// Check if we need to run. If we have already been halted, we are done.
u64 halt = guest_ctx.esr_el1.exchange(0);
if (halt != 0) {
return static_cast<HaltReason>(halt);
}
// Mark that we are running.
running_thread = thread;
// Acquire the lock on the thread parameters.
// This allows us to force synchronization with SignalInterrupt.
LockThreadParameters(thread_params);
}
// Assign current members.
guest_ctx.parent = this;
thread_params->native_context = &guest_ctx;
thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
thread_params->is_running = true;
HaltReason halt{};
// TODO: finding and creating the post handler needs to be locked
// to deal with dynamic loading of NROs.
const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
} else {
halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
}
// Unload members.
// The thread does not change, so we can persist the old reference.
guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
thread_params->native_context = nullptr;
thread_params->is_running = false;
// Unlock the thread parameters.
UnlockThreadParameters(thread_params);
{
// Lock the core context.
std::scoped_lock lk{lock};
// On exit, we no longer have an active thread.
running_thread = nullptr;
}
// Return the halt reason.
return halt;
}
HaltReason ARM_NCE::StepJit() {
return HaltReason::StepThread;
}
u32 ARM_NCE::GetSvcNumber() const {
return guest_ctx.svc_swi;
}
ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
: ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
guest_ctx.system = &system_;
}
ARM_NCE::~ARM_NCE() = default;
void ARM_NCE::Initialize() {
thread_id = gettid();
// Setup our signals
static std::once_flag flag;
std::call_once(flag, [] {
using HandlerType = decltype(sigaction::sa_sigaction);
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal);
sigaddset(&signal_mask, BreakFromRunCodeSignal);
sigaddset(&signal_mask, GuestFaultSignal);
struct sigaction return_to_run_code_action {};
return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
&ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
return_to_run_code_action.sa_mask = signal_mask;
Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
nullptr);
struct sigaction break_from_run_code_action {};
break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
break_from_run_code_action.sa_sigaction =
reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
break_from_run_code_action.sa_mask = signal_mask;
Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);
struct sigaction fault_action {};
fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
fault_action.sa_sigaction =
reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
fault_action.sa_mask = signal_mask;
Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);
// Simplify call for g_orig_action.
// These fields occupy the same space in memory, so this should be a no-op in practice.
if (!(g_orig_action.sa_flags & SA_SIGINFO)) {
g_orig_action.sa_sigaction =
reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler);
}
});
}
void ARM_NCE::SetPC(u64 pc) {
guest_ctx.pc = pc;
}
u64 ARM_NCE::GetPC() const {
return guest_ctx.pc;
}
u64 ARM_NCE::GetSP() const {
return guest_ctx.sp;
}
u64 ARM_NCE::GetReg(int index) const {
return guest_ctx.cpu_registers[index];
}
void ARM_NCE::SetReg(int index, u64 value) {
guest_ctx.cpu_registers[index] = value;
}
u128 ARM_NCE::GetVectorReg(int index) const {
return guest_ctx.vector_registers[index];
}
void ARM_NCE::SetVectorReg(int index, u128 value) {
guest_ctx.vector_registers[index] = value;
}
u32 ARM_NCE::GetPSTATE() const {
return guest_ctx.pstate;
}
void ARM_NCE::SetPSTATE(u32 pstate) {
guest_ctx.pstate = pstate;
}
u64 ARM_NCE::GetTlsAddress() const {
return guest_ctx.tpidrro_el0;
}
void ARM_NCE::SetTlsAddress(u64 address) {
guest_ctx.tpidrro_el0 = address;
}
u64 ARM_NCE::GetTPIDR_EL0() const {
return guest_ctx.tpidr_el0;
}
void ARM_NCE::SetTPIDR_EL0(u64 value) {
guest_ctx.tpidr_el0 = value;
}
void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
ctx.cpu_registers = guest_ctx.cpu_registers;
ctx.sp = guest_ctx.sp;
ctx.pc = guest_ctx.pc;
ctx.pstate = guest_ctx.pstate;
ctx.vector_registers = guest_ctx.vector_registers;
ctx.fpcr = guest_ctx.fpcr;
ctx.fpsr = guest_ctx.fpsr;
ctx.tpidr = guest_ctx.tpidr_el0;
}
void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
guest_ctx.cpu_registers = ctx.cpu_registers;
guest_ctx.sp = ctx.sp;
guest_ctx.pc = ctx.pc;
guest_ctx.pstate = ctx.pstate;
guest_ctx.vector_registers = ctx.vector_registers;
guest_ctx.fpcr = ctx.fpcr;
guest_ctx.fpsr = ctx.fpsr;
guest_ctx.tpidr_el0 = ctx.tpidr;
}
void ARM_NCE::SignalInterrupt() {
// Lock core context.
std::scoped_lock lk{lock};
// Add break loop condition.
guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
// If there is no thread running, we are done.
if (running_thread == nullptr) {
return;
}
// Lock the thread context.
auto* params = &running_thread->GetNativeExecutionParameters();
LockThreadParameters(params);
if (params->is_running) {
// We should signal to the running thread.
// The running thread will unlock the thread context.
syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
} else {
// If the thread is no longer running, we have nothing to do.
UnlockThreadParameters(params);
}
}
void ARM_NCE::ClearInterrupt() {
guest_ctx.esr_el1 = {};
}
void ARM_NCE::ClearInstructionCache() {
// TODO: This is not possible to implement correctly on Linux because
// we do not have any access to ic iallu.
// Require accesses to complete.
std::atomic_thread_fence(std::memory_order_seq_cst);
}
void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
this->ClearInstructionCache();
}
void ARM_NCE::ClearExclusiveState() {
// No-op.
}
void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
std::size_t new_address_space_size_in_bits) {
// No-op. Page table is never used.
}
} // namespace Core

108
src/core/arm/nce/arm_nce.h Normal file
View File

@ -0,0 +1,108 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <memory>
#include <span>
#include <unordered_map>
#include <vector>
#include "core/arm/arm_interface.h"
#include "core/arm/nce/guest_context.h"
namespace Core::Memory {
class Memory;
}
namespace Core {
class System;
class ARM_NCE final : public ARM_Interface {
public:
ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
~ARM_NCE() override;
void Initialize() override;
void SetPC(u64 pc) override;
u64 GetPC() const override;
u64 GetSP() const override;
u64 GetReg(int index) const override;
void SetReg(int index, u64 value) override;
u128 GetVectorReg(int index) const override;
void SetVectorReg(int index, u128 value) override;
u32 GetPSTATE() const override;
void SetPSTATE(u32 pstate) override;
u64 GetTlsAddress() const override;
void SetTlsAddress(u64 address) override;
void SetTPIDR_EL0(u64 value) override;
u64 GetTPIDR_EL0() const override;
Architecture GetArchitecture() const override {
return Architecture::Aarch64;
}
void SaveContext(ThreadContext32& ctx) const override {}
void SaveContext(ThreadContext64& ctx) const override;
void LoadContext(const ThreadContext32& ctx) override {}
void LoadContext(const ThreadContext64& ctx) override;
void SignalInterrupt() override;
void ClearInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
void InvalidateCacheRange(u64 addr, std::size_t size) override;
void PageTableChanged(Common::PageTable& new_page_table,
std::size_t new_address_space_size_in_bits) override;
protected:
HaltReason RunJit() override;
HaltReason StepJit() override;
u32 GetSvcNumber() const override;
const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
return nullptr;
}
void RewindBreakpointInstruction() override {}
private:
// Assembly definitions.
static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
u64 trampoline_addr);
static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr);
static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info,
void* raw_context);
static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context);
static void GuestFaultSignalHandler(int sig, void* info, void* raw_context);
static void LockThreadParameters(void* tpidr);
static void UnlockThreadParameters(void* tpidr);
private:
// C++ implementation functions for assembly definitions.
static void* RestoreGuestContext(void* raw_context);
static void SaveGuestContext(GuestContext* ctx, void* raw_context);
static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context);
static void HandleHostFault(int sig, void* info, void* raw_context);
public:
// Members set on initialization.
std::size_t core_index{};
pid_t thread_id{-1};
// Core context.
GuestContext guest_ctx;
// Thread and invalidation info.
std::mutex lock;
Kernel::KThread* running_thread{};
};
} // namespace Core

222
src/core/arm/nce/arm_nce.s Normal file
View File

@ -0,0 +1,222 @@
/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "core/arm/nce/arm_nce_asm_definitions.h"
#define LOAD_IMMEDIATE_32(reg, val) \
mov reg, #(((val) >> 0x00) & 0xFFFF); \
movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16
/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
/* Back up host sp to x3. */
/* Back up host tpidr_el0 to x4. */
mov x3, sp
mrs x4, tpidr_el0
/* Load guest sp. x5 is used as a scratch register. */
ldr x5, [x1, #(GuestContextSp)]
mov sp, x5
/* Offset GuestContext pointer to the host member. */
add x5, x1, #(GuestContextHostContext)
/* Save original host sp and tpidr_el0 (x3, x4) to host context. */
stp x3, x4, [x5, #(HostContextSpTpidrEl0)]
/* Save all callee-saved host GPRs. */
stp x19, x20, [x5, #(HostContextRegs+0x0)]
stp x21, x22, [x5, #(HostContextRegs+0x10)]
stp x23, x24, [x5, #(HostContextRegs+0x20)]
stp x25, x26, [x5, #(HostContextRegs+0x30)]
stp x27, x28, [x5, #(HostContextRegs+0x40)]
stp x29, x30, [x5, #(HostContextRegs+0x50)]
/* Save all callee-saved host FPRs. */
stp q8, q9, [x5, #(HostContextVregs+0x0)]
stp q10, q11, [x5, #(HostContextVregs+0x20)]
stp q12, q13, [x5, #(HostContextVregs+0x40)]
stp q14, q15, [x5, #(HostContextVregs+0x60)]
/* Load guest tpidr_el0 from argument. */
msr tpidr_el0, x0
/* Tail call the trampoline to restore guest state. */
br x2
/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
/* This jumps to the signal handler, which will restore the entire context. */
/* On entry, x0 = thread id, which is already in the right place. */
/* Move tpidr to x9 so it is not trampled. */
mov x9, x1
/* Set up arguments. */
mov x8, #(__NR_tkill)
mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
/* Tail call the signal handler. */
svc #0
/* Block execution from flowing here. */
brk #1000
/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
stp x29, x30, [sp, #-0x10]!
mov x29, sp
/* Call the context restorer with the raw context. */
mov x0, x2
bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv
/* Save the old value of tpidr_el0. */
mrs x8, tpidr_el0
ldr x9, [x0, #(TpidrEl0NativeContext)]
str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)]
/* Set our new tpidr_el0. */
msr tpidr_el0, x0
/* Unlock the context. */
bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
/* Returning from here will enter the guest. */
ldp x29, x30, [sp], #0x10
ret
/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
LOAD_IMMEDIATE_32(w10, TlsMagic)
cmp w9, w10
b.ne 1f
/* Correct TLS magic, so this is a guest interrupt. */
/* Restore host tpidr_el0. */
ldr x0, [x8, #(TpidrEl0NativeContext)]
ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
msr tpidr_el0, x3
/* Tail call the restorer. */
mov x1, x2
b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
/* Returning from here will enter host code. */
1:
/* Incorrect TLS magic, so this is a spurious signal. */
ret
/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
LOAD_IMMEDIATE_32(w10, TlsMagic)
cmp w9, w10
b.eq 1f
/* Incorrect TLS magic, so this is a host fault. */
/* Tail call the handler. */
b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
1:
/* Correct TLS magic, so this is a guest fault. */
stp x29, x30, [sp, #-0x20]!
str x19, [sp, #0x10]
mov x29, sp
/* Save the old tpidr_el0. */
mov x19, x8
/* Restore host tpidr_el0. */
ldr x0, [x8, #(TpidrEl0NativeContext)]
ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
msr tpidr_el0, x3
/* Call the handler. */
bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
/* If the handler returned false, we want to preserve the host tpidr_el0. */
cbz x0, 2f
/* Otherwise, restore guest tpidr_el0. */
msr tpidr_el0, x19
2:
ldr x19, [sp, #0x10]
ldp x29, x30, [sp], #0x20
ret
/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
.global _ZN4Core7ARM_NCE20LockThreadParametersEPv
.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
_ZN4Core7ARM_NCE20LockThreadParametersEPv:
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)
1:
/* Clear the monitor. */
clrex
2:
/* Load-linked with acquire ordering. */
ldaxr w1, [x0]
/* If the value was SpinLockLocked, clear monitor and retry. */
cbz w1, 1b
/* Store-conditional SpinLockLocked with relaxed ordering. */
stxr w1, wzr, [x0]
/* If we failed to store, retry. */
cbnz w1, 2b
ret
/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)
/* Load SpinLockUnlocked. */
mov w1, #(SpinLockUnlocked)
/* Store value with release ordering. */
stlr w1, [x0]
ret

View File

@ -0,0 +1,29 @@
/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#define __ASSEMBLY__
#include <asm-generic/signal.h>
#include <asm-generic/unistd.h>
#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
#define BreakFromRunCodeSignal SIGURG
#define GuestFaultSignal SIGSEGV
#define GuestContextSp 0xF8
#define GuestContextHostContext 0x320
#define HostContextSpTpidrEl0 0xE0
#define HostContextTpidrEl0 0xE8
#define HostContextRegs 0x0
#define HostContextVregs 0x60
#define TpidrEl0NativeContext 0x10
#define TpidrEl0Lock 0x18
#define TpidrEl0TlsMagic 0x20
#define TlsMagic 0x555a5559
#define SpinLockLocked 0
#define SpinLockUnlocked 1

View File

@ -0,0 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/arm/arm_interface.h"
#include "core/arm/nce/arm_nce_asm_definitions.h"
namespace Core {
class ARM_NCE;
class System;
struct HostContext {
alignas(16) std::array<u64, 12> host_saved_regs{};
alignas(16) std::array<u128, 8> host_saved_vregs{};
u64 host_sp{};
void* host_tpidr_el0{};
};
struct GuestContext {
std::array<u64, 31> cpu_registers{};
u64 sp{};
u64 pc{};
u32 fpcr{};
u32 fpsr{};
std::array<u128, 32> vector_registers{};
u32 pstate{};
alignas(16) HostContext host_ctx{};
u64 tpidrro_el0{};
u64 tpidr_el0{};
std::atomic<u64> esr_el1{};
u32 nzcv{};
u32 svc_swi{};
System* system{};
ARM_NCE* parent{};
};
// Verify assembly offsets.
static_assert(offsetof(GuestContext, sp) == GuestContextSp);
static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext);
static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0);
static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0);
static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0);
static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs);
static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs);
} // namespace Core

View File

@ -0,0 +1,147 @@
// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors
// SPDX-License-Identifier: MPL-2.0
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Core::NCE {
enum SystemRegister : u32 {
TpidrEl0 = 0x5E82,
TpidrroEl0 = 0x5E83,
CntfrqEl0 = 0x5F00,
CntpctEl0 = 0x5F01,
};
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call-
union SVC {
constexpr explicit SVC(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0);
}
constexpr u32 GetSig0() {
return decltype(sig0)::ExtractValue(raw);
}
constexpr u32 GetValue() {
return decltype(value)::ExtractValue(raw);
}
constexpr u32 GetSig1() {
return decltype(sig1)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> sig0; // 0x1
BitField<5, 16, u32> value; // 16-bit immediate
BitField<21, 11, u32> sig1; // 0x6A0
};
static_assert(sizeof(SVC) == sizeof(u32));
static_assert(SVC(0xD40000C1).Verify());
static_assert(SVC(0xD40000C1).GetValue() == 0x6);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register-
union MRS {
constexpr explicit MRS(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return (this->GetSig() == 0xD53);
}
constexpr u32 GetRt() {
return decltype(rt)::ExtractValue(raw);
}
constexpr u32 GetSystemReg() {
return decltype(system_reg)::ExtractValue(raw);
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // destination register
BitField<5, 15, u32> system_reg; // source system register
BitField<20, 12, u32> sig; // 0xD53
};
static_assert(sizeof(MRS) == sizeof(u32));
static_assert(MRS(0xD53BE020).Verify());
static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0);
static_assert(MRS(0xD53BE020).GetRt() == 0x0);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-
union MSR {
constexpr explicit MSR(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return this->GetSig() == 0xD51;
}
constexpr u32 GetRt() {
return decltype(rt)::ExtractValue(raw);
}
constexpr u32 GetSystemReg() {
return decltype(system_reg)::ExtractValue(raw);
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // source register
BitField<5, 15, u32> system_reg; // destination system register
BitField<20, 12, u32> sig; // 0xD51
};
static_assert(sizeof(MSR) == sizeof(u32));
static_assert(MSR(0xD51BD040).Verify());
static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0);
static_assert(MSR(0xD51BD040).GetRt() == 0x0);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers-
union Exclusive {
constexpr explicit Exclusive(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return this->GetSig() == 0x10;
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
constexpr u32 AsOrdered() {
return raw | decltype(o0)::FormatValue(1);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // memory operand
BitField<5, 5, u32> rn; // register operand 1
BitField<10, 5, u32> rt2; // register operand 2
BitField<15, 1, u32> o0; // ordered
BitField<16, 5, u32> rs; // status register
BitField<21, 2, u32> l; // operation type
BitField<23, 7, u32> sig; // 0x10
BitField<30, 2, u32> size; // size
};
static_assert(Exclusive(0xC85FFC00).Verify());
static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00);
static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00);
static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440);
} // namespace Core::NCE

472
src/core/arm/nce/patch.cpp Normal file
View File

@ -0,0 +1,472 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arm64/native_clock.h"
#include "common/bit_cast.h"
#include "common/literals.h"
#include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/guest_context.h"
#include "core/arm/nce/instructions.h"
#include "core/arm/nce/patch.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/kernel/svc.h"
namespace Core::NCE {
using namespace Common::Literals;
using namespace oaknut::util;
using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128_MiB;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
Patcher::Patcher() : c(m_patch_instructions) {}
Patcher::~Patcher() = default;
void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code) {
// Write save context helper function.
c.l(m_save_context);
WriteSaveContext();
// Write load context helper function.
c.l(m_load_context);
WriteLoadContext();
// Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size);
const auto text_words =
std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
// Loop through instructions, patching as needed.
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
const u32 inst = text_words[i];
const auto AddRelocations = [&] {
const uintptr_t this_offset = i * sizeof(u32);
const uintptr_t next_offset = this_offset + sizeof(u32);
// Relocate from here to patch.
this->BranchToPatch(this_offset);
// Relocate from patch to next instruction.
return next_offset;
};
// SVC
if (auto svc = SVC{inst}; svc.Verify()) {
WriteSvcTrampoline(AddRelocations(), svc.GetValue());
continue;
}
// MRS Xn, TPIDR_EL0
// MRS Xn, TPIDRRO_EL0
if (auto mrs = MRS{inst};
mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
: oaknut::SystemReg::TPIDR_EL0;
const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
continue;
}
// MRS Xn, CNTPCT_EL0
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
continue;
}
// MRS Xn, CNTFRQ_EL0
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
UNREACHABLE();
}
// MSR TPIDR_EL0, Xn
if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
continue;
}
}
// Determine patching mode for the final relocation step
const size_t image_size = program_image.size();
this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
}
void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
const Kernel::CodeSet::Segment& code,
Kernel::PhysicalMemory& program_image,
EntryTrampolines* out_trampolines) {
const size_t patch_size = GetSectionSize();
const size_t image_size = program_image.size();
// Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size);
const auto text_words =
std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
if (mode == PatchMode::PreText) {
rc.B(rel.patch_offset - patch_size - rel.module_offset);
} else {
rc.B(image_size - rel.module_offset + rel.patch_offset);
}
};
const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
if (mode == PatchMode::PreText) {
rc.B(patch_size - rel.patch_offset + rel.module_offset);
} else {
rc.B(rel.module_offset - image_size - rel.patch_offset);
}
};
const auto RebasePatch = [&](ptrdiff_t patch_offset) {
if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_offset;
} else {
return GetInteger(load_base) + image_size + patch_offset;
}
};
const auto RebasePc = [&](uintptr_t module_offset) {
if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_size + module_offset;
} else {
return GetInteger(load_base) + module_offset;
}
};
// We are now ready to relocate!
for (const Relocation& rel : m_branch_to_patch_relocations) {
ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
}
for (const Relocation& rel : m_branch_to_module_relocations) {
ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
rel);
}
// Rewrite PC constants and record post trampolines
for (const Relocation& rel : m_write_module_pc_relocations) {
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
rc.dx(RebasePc(rel.module_offset));
}
for (const Trampoline& rel : m_trampolines) {
out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
}
// Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
// Convert to ordered to preserve this assumption.
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
const u32 inst = text_words[i];
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
text_words[i] = exclusive.AsOrdered();
}
}
// Copy to program image
if (this->mode == PatchMode::PreText) {
std::memcpy(program_image.data(), m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32));
} else {
program_image.resize(image_size + patch_size);
std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32));
}
}
size_t Patcher::GetSectionSize() const noexcept {
return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
}
void Patcher::WriteLoadContext() {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
// of stack.
c.STR(X30, SP, 8);
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
// Load system registers.
c.LDR(W0, X30, offsetof(GuestContext, fpsr));
c.MSR(oaknut::SystemReg::FPSR, X0);
c.LDR(W0, X30, offsetof(GuestContext, fpcr));
c.MSR(oaknut::SystemReg::FPCR, X0);
c.LDR(W0, X30, offsetof(GuestContext, nzcv));
c.MSR(oaknut::SystemReg::NZCV, X0);
// Load all vector registers.
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
for (int i = 0; i <= 30; i += 2) {
c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
}
// Load all general-purpose registers except X30.
for (int i = 0; i <= 28; i += 2) {
c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
}
// Reload our return X30 from the stack and return.
// The patch code will reload the guest X30 for us.
c.LDR(X30, SP, 8);
c.RET();
}
void Patcher::WriteSaveContext() {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
// stack.
c.STR(X30, SP, 8);
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
// Store all general-purpose registers except X30.
for (int i = 0; i <= 28; i += 2) {
c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
}
// Store all vector registers.
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
for (int i = 0; i <= 30; i += 2) {
c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
}
// Store guest system registers, X30 and SP, using X0 as a scratch register.
c.STR(X0, SP, PRE_INDEXED, -16);
c.LDR(X0, SP, 16);
c.STR(X0, X30, 8 * 30);
c.ADD(X0, SP, 32);
c.STR(X0, X30, offsetof(GuestContext, sp));
c.MRS(X0, oaknut::SystemReg::FPSR);
c.STR(W0, X30, offsetof(GuestContext, fpsr));
c.MRS(X0, oaknut::SystemReg::FPCR);
c.STR(W0, X30, offsetof(GuestContext, fpcr));
c.MRS(X0, oaknut::SystemReg::NZCV);
c.STR(W0, X30, offsetof(GuestContext, nzcv));
c.LDR(X0, SP, POST_INDEXED, 16);
// Reload our return X30 from the stack, and return.
c.LDR(X30, SP, 8);
c.RET();
}
void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
// We are about to start saving state, so we need to lock the context.
this->LockContext();
// Store guest X30 to the stack. Then, save the context and restore the stack.
// This will save all registers except PC, but we know PC at patch time.
c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_save_context);
c.LDR(X30, SP, POST_INDEXED, 16);
// Now that we've saved all registers, we can use any registers as scratch.
// Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
oaknut::Label pc_after_svc;
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
c.LDR(X2, pc_after_svc);
c.STR(X2, X1, offsetof(GuestContext, pc));
// Store SVC number to execute when we return
c.MOV(X2, svc_id);
c.STR(W2, X1, offsetof(GuestContext, svc_swi));
// We are calling a SVC. Clear esr_el1 and return it.
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
oaknut::Label retry;
c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
c.l(retry);
c.LDAXR(X0, X2);
c.STLXR(W3, XZR, X2);
c.CBNZ(W3, retry);
// Add "calling SVC" flag. Since this is X0, this is now our return value.
c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
// Offset the GuestContext pointer to the HostContext member.
// STP has limited range of [-512, 504] which we can't reach otherwise
// NB: Due to this all offsets below are from the start of HostContext.
c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
// Reload host TPIDR_EL0 and SP.
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
c.MOV(SP, X2);
c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
// Load callee-saved host registers and return to host.
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
c.LDP(X19, X20, X1, HOST_REGS_OFF);
c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
c.RET();
// Write the post-SVC trampoline address, which will jump back to the guest after restoring its
// state.
m_trampolines.push_back({c.offset(), module_dest});
// Host called this location. Save the return address so we can
// unwind the stack properly when jumping back.
c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
// Reload all guest registers except X30 and PC.
// The function also expects 16 bytes of stack already allocated.
c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_load_context);
c.LDR(X30, SP, POST_INDEXED, 16);
// Use X1 as a scratch register to restore X30.
c.STR(X1, SP, PRE_INDEXED, -16);
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
c.LDR(X1, SP, POST_INDEXED, 16);
// Unlock the context.
this->UnlockContext();
// Jump back to the instruction after the emulated SVC.
this->BranchToModule(module_dest);
// Store PC after call.
c.l(pc_after_svc);
this->WriteModulePc(module_dest);
}
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg) {
// Retrieve emulated TLS register from GuestContext.
c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
} else {
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
}
// Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest);
}
void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
c.STR(scratch_reg, SP, PRE_INDEXED, -16);
// Save guest value to NativeExecutionParameters::tpidr_el0.
c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
// Restore scratch register.
c.LDR(scratch_reg, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MSR.
this->BranchToModule(module_dest);
}
void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
static Common::Arm64::NativeClock clock{};
const auto factor = clock.GetGuestCNTFRQFactor();
const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
oaknut::Label factorlo;
oaknut::Label factorhi;
// Save scratches.
c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
// Load counter value.
c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
// Load scaling factor.
c.LDR(scratch0, factorlo);
c.LDR(scratch1, factorhi);
// Multiply low bits and get result.
c.UMULH(scratch0, dest_reg, scratch0);
// Multiply high bits and add low bit result.
c.MADD(dest_reg, dest_reg, scratch1, scratch0);
// Reload scratches.
c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest);
// Scaling factor constant values.
c.l(factorlo);
c.dx(raw_factor[0]);
c.l(factorhi);
c.dx(raw_factor[1]);
}
void Patcher::LockContext() {
oaknut::Label retry;
// Save scratches.
c.STP(X0, X1, SP, PRE_INDEXED, -16);
// Reload lock pointer.
c.l(retry);
c.CLREX();
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
static_assert(SpinLockLocked == 0);
// Load-linked with acquire ordering.
c.LDAXR(W1, X0);
// If the value was SpinLockLocked, clear monitor and retry.
c.CBZ(W1, retry);
// Store-conditional SpinLockLocked with relaxed ordering.
c.STXR(W1, WZR, X0);
// If we failed to store, retry.
c.CBNZ(W1, retry);
// We succeeded! Reload scratches.
c.LDP(X0, X1, SP, POST_INDEXED, 16);
}
void Patcher::UnlockContext() {
// Save scratches.
c.STP(X0, X1, SP, PRE_INDEXED, -16);
// Load lock pointer.
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
// Load SpinLockUnlocked.
c.MOV(W1, SpinLockUnlocked);
// Store value with release ordering.
c.STLR(W1, X0);
// Load scratches.
c.LDP(X0, X1, SP, POST_INDEXED, 16);
}
} // namespace Core::NCE

101
src/core/arm/nce/patch.h Normal file
View File

@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <unordered_map>
#include <vector>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#pragma GCC diagnostic pop
#include "common/common_types.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/k_typed_address.h"
#include "core/hle/kernel/physical_memory.h"
namespace Core::NCE {
enum class PatchMode : u32 {
None,
PreText, ///< Patch section is inserted before .text
PostData, ///< Patch section is inserted after .data
};
using ModuleTextAddress = u64;
using PatchTextAddress = u64;
using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
class Patcher {
public:
explicit Patcher();
~Patcher();
void PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code);
void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
size_t GetSectionSize() const noexcept;
[[nodiscard]] PatchMode GetPatchMode() const noexcept {
return mode;
}
private:
using ModuleDestLabel = uintptr_t;
struct Trampoline {
ptrdiff_t patch_offset;
uintptr_t module_offset;
};
void WriteLoadContext();
void WriteSaveContext();
void LockContext();
void UnlockContext();
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg);
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
private:
void BranchToPatch(uintptr_t module_dest) {
m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
}
void BranchToModule(uintptr_t module_dest) {
m_branch_to_module_relocations.push_back({c.offset(), module_dest});
c.dw(0);
}
void WriteModulePc(uintptr_t module_dest) {
m_write_module_pc_relocations.push_back({c.offset(), module_dest});
c.dx(0);
}
private:
// List of patch instructions we have generated.
std::vector<u32> m_patch_instructions{};
// Relocation type for relative branch from module to patch.
struct Relocation {
ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
};
oaknut::VectorCodeGenerator c;
std::vector<Trampoline> m_trampolines;
std::vector<Relocation> m_branch_to_patch_relocations{};
std::vector<Relocation> m_branch_to_module_relocations{};
std::vector<Relocation> m_write_module_pc_relocations{};
oaknut::Label m_save_context{};
oaknut::Label m_load_context{};
PatchMode mode{PatchMode::None};
};
} // namespace Core::NCE

View File

@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) {
system.GPU().ObtainContext(); system.GPU().ObtainContext();
} }
system.ArmInterface(core).Initialize();
auto& kernel = system.Kernel(); auto& kernel = system.Kernel();
auto& scheduler = *kernel.CurrentScheduler(); auto& scheduler = *kernel.CurrentScheduler();
auto* thread = scheduler.GetSchedulerCurrentThread(); auto* thread = scheduler.GetSchedulerCurrentThread();

View File

@ -6,7 +6,7 @@
namespace Core { namespace Core {
#ifdef ANDROID #ifdef HAS_NCE
constexpr size_t VirtualReserveSize = 1ULL << 38; constexpr size_t VirtualReserveSize = 1ULL << 38;
#else #else
constexpr size_t VirtualReserveSize = 1ULL << 39; constexpr size_t VirtualReserveSize = 1ULL << 39;
@ -15,6 +15,7 @@ constexpr size_t VirtualReserveSize = 1ULL << 39;
DeviceMemory::DeviceMemory() DeviceMemory::DeviceMemory()
: buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(),
VirtualReserveSize} {} VirtualReserveSize} {}
DeviceMemory::~DeviceMemory() = default; DeviceMemory::~DeviceMemory() = default;
} // namespace Core } // namespace Core

View File

@ -75,12 +75,26 @@ struct CodeSet final {
return segments[2]; return segments[2];
} }
#ifdef HAS_NCE
Segment& PatchSegment() {
return patch_segment;
}
const Segment& PatchSegment() const {
return patch_segment;
}
#endif
/// The overall data that backs this code set. /// The overall data that backs this code set.
Kernel::PhysicalMemory memory; Kernel::PhysicalMemory memory;
/// The segments that comprise this code set. /// The segments that comprise this code set.
std::array<Segment, 3> segments; std::array<Segment, 3> segments;
#ifdef HAS_NCE
Segment patch_segment;
#endif
/// The entry point address for this code set. /// The entry point address for this code set.
KProcessAddress entrypoint = 0; KProcessAddress entrypoint = 0;
}; };

View File

@ -25,8 +25,8 @@ constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
{ .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, }, { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
{ .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, }, { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, },
{ .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, }, { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, },
#ifdef ANDROID #ifdef HAS_NCE
// With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region.
{ .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
#else #else
{ .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },

View File

@ -88,6 +88,22 @@ Result FlushDataCache(AddressType addr, u64 size) {
R_SUCCEED(); R_SUCCEED();
} }
constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission perm) {
Common::MemoryPermission perms{};
if (True(perm & KMemoryPermission::UserRead)) {
perms |= Common::MemoryPermission::Read;
}
if (True(perm & KMemoryPermission::UserWrite)) {
perms |= Common::MemoryPermission::Write;
}
#ifdef HAS_NCE
if (True(perm & KMemoryPermission::UserExecute)) {
perms |= Common::MemoryPermission::Execute;
}
#endif
return perms;
}
} // namespace } // namespace
void KPageTableBase::MemoryRange::Open() { void KPageTableBase::MemoryRange::Open() {
@ -170,7 +186,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
KMemoryManager::Pool pool, KProcessAddress code_address, KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource, size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit, KResourceLimit* resource_limit,
Core::Memory::Memory& memory) { Core::Memory::Memory& memory,
KProcessAddress aslr_space_start) {
// Calculate region extents. // Calculate region extents.
const size_t as_width = GetAddressSpaceWidth(as_type); const size_t as_width = GetAddressSpaceWidth(as_type);
const KProcessAddress start = 0; const KProcessAddress start = 0;
@ -211,7 +228,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap); heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap);
stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack); stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack);
kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall); kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall);
m_code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit); m_code_region_start = m_address_space_start + aslr_space_start +
GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit);
m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit); m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit);
m_alias_code_region_start = m_code_region_start; m_alias_code_region_start = m_code_region_start;
m_alias_code_region_end = m_code_region_end; m_alias_code_region_end = m_code_region_end;
@ -5643,7 +5661,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
case OperationType::Map: { case OperationType::Map: {
ASSERT(virt_addr != 0); ASSERT(virt_addr != 0);
ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize)); ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize));
m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr); m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr,
ConvertToMemoryPermission(properties.perm));
// Open references to pages, if we should. // Open references to pages, if we should.
if (this->IsHeapPhysicalAddress(phys_addr)) { if (this->IsHeapPhysicalAddress(phys_addr)) {
@ -5658,8 +5677,18 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
} }
case OperationType::ChangePermissions: case OperationType::ChangePermissions:
case OperationType::ChangePermissionsAndRefresh: case OperationType::ChangePermissionsAndRefresh:
case OperationType::ChangePermissionsAndRefreshAndFlush: case OperationType::ChangePermissionsAndRefreshAndFlush: {
const bool read = True(properties.perm & Kernel::KMemoryPermission::UserRead);
const bool write = True(properties.perm & Kernel::KMemoryPermission::UserWrite);
// todo: this doesn't really belong here and should go into m_memory to handle rasterizer
// access todo: ignore exec on non-direct-mapped case
const bool exec = True(properties.perm & Kernel::KMemoryPermission::UserExecute);
if (Settings::IsFastmemEnabled()) {
m_system.DeviceMemory().buffer.Protect(GetInteger(virt_addr), num_pages * PageSize,
read, write, exec);
}
R_SUCCEED(); R_SUCCEED();
}
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -5687,7 +5716,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
const size_t size{node.GetNumPages() * PageSize}; const size_t size{node.GetNumPages() * PageSize};
// Map the pages. // Map the pages.
m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress()); m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress(),
ConvertToMemoryPermission(properties.perm));
virt_addr += size; virt_addr += size;
} }

View File

@ -235,7 +235,8 @@ public:
bool enable_device_address_space_merge, bool from_back, bool enable_device_address_space_merge, bool from_back,
KMemoryManager::Pool pool, KProcessAddress code_address, KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource, size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit, Core::Memory::Memory& memory); KResourceLimit* resource_limit, Core::Memory::Memory& memory,
KProcessAddress aslr_space_start);
void Finalize(); void Finalize();

View File

@ -300,7 +300,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
params.code_address, params.code_num_pages * PageSize, params.code_address, params.code_num_pages * PageSize,
m_system_resource, res_limit, this->GetMemory())); m_system_resource, res_limit, this->GetMemory(), 0));
} }
ON_RESULT_FAILURE_2 { ON_RESULT_FAILURE_2 {
m_page_table.Finalize(); m_page_table.Finalize();
@ -332,7 +332,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
Result KProcess::Initialize(const Svc::CreateProcessParameter& params, Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
std::span<const u32> user_caps, KResourceLimit* res_limit, std::span<const u32> user_caps, KResourceLimit* res_limit,
KMemoryManager::Pool pool) { KMemoryManager::Pool pool, KProcessAddress aslr_space_start) {
ASSERT(res_limit != nullptr); ASSERT(res_limit != nullptr);
// Set members. // Set members.
@ -393,7 +393,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
params.code_address, code_size, m_system_resource, res_limit, params.code_address, code_size, m_system_resource, res_limit,
this->GetMemory())); this->GetMemory(), aslr_space_start));
} }
ON_RESULT_FAILURE_2 { ON_RESULT_FAILURE_2 {
m_page_table.Finalize(); m_page_table.Finalize();
@ -1128,7 +1128,7 @@ KProcess::KProcess(KernelCore& kernel)
KProcess::~KProcess() = default; KProcess::~KProcess() = default;
Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
bool is_hbl) { KProcessAddress aslr_space_start, bool is_hbl) {
// Create a resource limit for the process. // Create a resource limit for the process.
const auto physical_memory_size = const auto physical_memory_size =
m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application);
@ -1179,7 +1179,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
.name = {}, .name = {},
.version = {}, .version = {},
.program_id = metadata.GetTitleID(), .program_id = metadata.GetTitleID(),
.code_address = code_address, .code_address = code_address + GetInteger(aslr_space_start),
.code_num_pages = static_cast<s32>(code_size / PageSize), .code_num_pages = static_cast<s32>(code_size / PageSize),
.flags = flag, .flags = flag,
.reslimit = Svc::InvalidHandle, .reslimit = Svc::InvalidHandle,
@ -1193,7 +1193,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
// Initialize for application process. // Initialize for application process.
R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit,
KMemoryManager::Pool::Application)); KMemoryManager::Pool::Application, aslr_space_start));
// Assign remaining properties. // Assign remaining properties.
m_is_hbl = is_hbl; m_is_hbl = is_hbl;
@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute);
ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read);
ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
#ifdef ARCHITECTURE_arm64
if (Settings::IsNceEnabled()) {
auto& buffer = m_kernel.System().DeviceMemory().buffer;
const auto& code = code_set.CodeSegment();
const auto& patch = code_set.PatchSegment();
buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true);
buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true);
ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
}
#endif
} }
bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) {

View File

@ -120,6 +120,9 @@ private:
std::atomic<s64> m_num_ipc_messages{}; std::atomic<s64> m_num_ipc_messages{};
std::atomic<s64> m_num_ipc_replies{}; std::atomic<s64> m_num_ipc_replies{};
std::atomic<s64> m_num_ipc_receives{}; std::atomic<s64> m_num_ipc_receives{};
#ifdef HAS_NCE
std::unordered_map<u64, u64> m_post_handlers{};
#endif
private: private:
Result StartTermination(); Result StartTermination();
@ -150,7 +153,8 @@ public:
std::span<const u32> caps, KResourceLimit* res_limit, std::span<const u32> caps, KResourceLimit* res_limit,
KMemoryManager::Pool pool, bool immortal); KMemoryManager::Pool pool, bool immortal);
Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps, Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps,
KResourceLimit* res_limit, KMemoryManager::Pool pool); KResourceLimit* res_limit, KMemoryManager::Pool pool,
KProcessAddress aslr_space_start);
void Exit(); void Exit();
const char* GetName() const { const char* GetName() const {
@ -466,6 +470,12 @@ public:
static void Switch(KProcess* cur_process, KProcess* next_process); static void Switch(KProcess* cur_process, KProcess* next_process);
#ifdef HAS_NCE
std::unordered_map<u64, u64>& GetPostHandlers() noexcept {
return m_post_handlers;
}
#endif
public: public:
// Attempts to insert a watchpoint into a free slot. Returns false if none are available. // Attempts to insert a watchpoint into a free slot. Returns false if none are available.
bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type);
@ -479,7 +489,7 @@ public:
public: public:
Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
bool is_hbl); KProcessAddress aslr_space_start, bool is_hbl);
void LoadModule(CodeSet code_set, KProcessAddress base_addr); void LoadModule(CodeSet code_set, KProcessAddress base_addr);

View File

@ -23,10 +23,11 @@ public:
Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge, Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge,
bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address, bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource, size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit, Core::Memory::Memory& memory) { KResourceLimit* resource_limit, Core::Memory::Memory& memory,
R_RETURN(m_page_table.InitializeForProcess(as_type, enable_aslr, enable_das_merge, KProcessAddress aslr_space_start) {
from_back, pool, code_address, code_size, R_RETURN(m_page_table.InitializeForProcess(
system_resource, resource_limit, memory)); as_type, enable_aslr, enable_das_merge, from_back, pool, code_address, code_size,
system_resource, resource_limit, memory, aslr_space_start));
} }
void Finalize() { void Finalize() {

View File

@ -655,6 +655,21 @@ public:
return m_stack_top; return m_stack_top;
} }
public:
// TODO: This shouldn't be defined in kernel namespace
struct NativeExecutionParameters {
u64 tpidr_el0{};
u64 tpidrro_el0{};
void* native_context{};
std::atomic<u32> lock{1};
bool is_running{};
u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')};
};
NativeExecutionParameters& GetNativeExecutionParameters() {
return m_native_execution_parameters;
}
private: private:
KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key,
bool is_kernel_address_key); bool is_kernel_address_key);
@ -914,6 +929,7 @@ private:
ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; ThreadWaitReasonForDebugging m_wait_reason_for_debugging{};
uintptr_t m_argument{}; uintptr_t m_argument{};
KProcessAddress m_stack_top{}; KProcessAddress m_stack_top{};
NativeExecutionParameters m_native_execution_parameters{};
public: public:
using ConditionVariableThreadTreeType = ConditionVariableThreadTree; using ConditionVariableThreadTreeType = ConditionVariableThreadTree;

View File

@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_32.h"
#include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/arm/dynarmic/arm_dynarmic_64.h"
#ifdef HAS_NCE
#include "core/arm/nce/arm_nce.h"
#endif
#include "core/core.h" #include "core/core.h"
#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/kernel.h"
@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
: m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} {
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
// TODO(bunnei): Initialization relies on a core being available. We may later replace this with // TODO(bunnei): Initialization relies on a core being available. We may later replace this with
// a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager. // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU
// manager.
auto& kernel = system.Kernel(); auto& kernel = system.Kernel();
m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
system, kernel.IsMulticore(), system, kernel.IsMulticore(),
@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
PhysicalCore::~PhysicalCore() = default; PhysicalCore::~PhysicalCore() = default;
void PhysicalCore::Initialize(bool is_64_bit) { void PhysicalCore::Initialize(bool is_64_bit) {
#if defined(HAS_NCE)
if (Settings::IsNceEnabled()) {
m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(),
m_core_index);
return;
}
#endif
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
auto& kernel = m_system.Kernel(); auto& kernel = m_system.Kernel();
if (!is_64_bit) { if (!is_64_bit) {

View File

@ -32,7 +32,7 @@ public:
{10200, nullptr, "SendFriendRequestForApplication"}, {10200, nullptr, "SendFriendRequestForApplication"},
{10211, nullptr, "AddFacedFriendRequestForApplication"}, {10211, nullptr, "AddFacedFriendRequestForApplication"},
{10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
{10420, nullptr, "IsBlockedUserListCacheAvailable"}, {10420, &IFriendService::CheckBlockedUserListAvailability, "CheckBlockedUserListAvailability"},
{10421, nullptr, "EnsureBlockedUserListAvailable"}, {10421, nullptr, "EnsureBlockedUserListAvailable"},
{10500, nullptr, "GetProfileList"}, {10500, nullptr, "GetProfileList"},
{10600, nullptr, "DeclareOpenOnlinePlaySession"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"},
@ -206,6 +206,17 @@ private:
rb.Push(true); rb.Push(true);
} }
void CheckBlockedUserListAvailability(HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto uuid{rp.PopRaw<Common::UUID>()};
LOG_WARNING(Service_Friend, "(STUBBED) called, uuid=0x{}", uuid.RawString());
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(true);
}
KernelHelpers::ServiceContext service_context; KernelHelpers::ServiceContext service_context;
Kernel::KEvent* completion_event; Kernel::KEvent* completion_event;

View File

@ -3,6 +3,7 @@
#include <cstring> #include <cstring>
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "core/file_sys/content_archive.h" #include "core/file_sys/content_archive.h"
#include "core/file_sys/control_metadata.h" #include "core/file_sys/control_metadata.h"
@ -14,6 +15,10 @@
#include "core/loader/deconstructed_rom_directory.h" #include "core/loader/deconstructed_rom_directory.h"
#include "core/loader/nso.h" #include "core/loader/nso.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader { namespace Loader {
AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_,
@ -124,21 +129,43 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
} }
metadata.Print(); metadata.Print();
const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", // Enable NCE only for programs with 39-bit address space.
"subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", const bool is_39bit =
"subsdk8", "subsdk9", "sdk"}; metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit;
Settings::SetNceEnabled(is_39bit);
const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2",
"subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7",
"subsdk8", "subsdk9", "sdk"};
std::size_t code_size{};
// Define an nce patch context for each potential module.
#ifdef HAS_NCE
std::array<Core::NCE::Patcher, 13> module_patchers;
#endif
const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
#ifdef HAS_NCE
if (Settings::IsNceEnabled()) {
return &module_patchers[i];
}
#endif
return nullptr;
};
// Use the NSO module loader to figure out the code layout // Use the NSO module loader to figure out the code layout
std::size_t code_size{}; for (size_t i = 0; i < static_modules.size(); i++) {
for (const auto& module : static_modules) { const auto& module = static_modules[i];
const FileSys::VirtualFile module_file{dir->GetFile(module)}; const FileSys::VirtualFile module_file{dir->GetFile(module)};
if (!module_file) { if (!module_file) {
continue; continue;
} }
const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( const auto tentative_next_load_addr =
process, system, *module_file, code_size, should_pass_arguments, false); AppLoader_NSO::LoadModule(process, system, *module_file, code_size,
should_pass_arguments, false, {}, GetPatcher(i));
if (!tentative_next_load_addr) { if (!tentative_next_load_addr) {
return {ResultStatus::ErrorLoadingNSO, {}}; return {ResultStatus::ErrorLoadingNSO, {}};
} }
@ -146,8 +173,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
code_size = *tentative_next_load_addr; code_size = *tentative_next_load_addr;
} }
// Enable direct memory mapping in case of NCE.
const u64 fastmem_base = [&]() -> size_t {
if (Settings::IsNceEnabled()) {
auto& buffer = system.DeviceMemory().buffer;
buffer.EnableDirectMappedAddress();
return reinterpret_cast<u64>(buffer.VirtualBasePointer());
}
return 0;
}();
// Setup the process code layout // Setup the process code layout
if (process.LoadFromMetadata(metadata, code_size, is_hbl).IsError()) { if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) {
return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
} }
@ -157,7 +194,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
VAddr next_load_addr{base_address}; VAddr next_load_addr{base_address};
const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(),
system.GetContentProvider()}; system.GetContentProvider()};
for (const auto& module : static_modules) { for (size_t i = 0; i < static_modules.size(); i++) {
const auto& module = static_modules[i];
const FileSys::VirtualFile module_file{dir->GetFile(module)}; const FileSys::VirtualFile module_file{dir->GetFile(module)};
if (!module_file) { if (!module_file) {
continue; continue;
@ -165,15 +203,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
const VAddr load_addr{next_load_addr}; const VAddr load_addr{next_load_addr};
const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( const auto tentative_next_load_addr =
process, system, *module_file, load_addr, should_pass_arguments, true, pm); AppLoader_NSO::LoadModule(process, system, *module_file, load_addr,
should_pass_arguments, true, pm, GetPatcher(i));
if (!tentative_next_load_addr) { if (!tentative_next_load_addr) {
return {ResultStatus::ErrorLoadingNSO, {}}; return {ResultStatus::ErrorLoadingNSO, {}};
} }
next_load_addr = *tentative_next_load_addr; next_load_addr = *tentative_next_load_addr;
modules.insert_or_assign(load_addr, module); modules.insert_or_assign(load_addr, module);
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr);
} }
// Find the RomFS by searching for a ".romfs" file in this directory // Find the RomFS by searching for a ".romfs" file in this directory

View File

@ -91,7 +91,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process,
// Setup the process code layout // Setup the process code layout
if (process if (process
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0,
false)
.IsError()) { .IsError()) {
return {ResultStatus::ErrorNotInitialized, {}}; return {ResultStatus::ErrorNotInitialized, {}};
} }

View File

@ -22,6 +22,10 @@
#include "core/loader/nso.h" #include "core/loader/nso.h"
#include "core/memory.h" #include "core/memory.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader { namespace Loader {
struct NroSegmentHeader { struct NroSegmentHeader {
@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) {
return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK);
} }
static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) { static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process,
const std::vector<u8>& data) {
if (data.size() < sizeof(NroHeader)) { if (data.size() < sizeof(NroHeader)) {
return {}; return {};
} }
@ -194,14 +199,61 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
codeset.DataSegment().size += bss_size; codeset.DataSegment().size += bss_size;
program_image.resize(static_cast<u32>(program_image.size()) + bss_size); program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
size_t image_size = program_image.size();
#ifdef HAS_NCE
const auto& code = codeset.CodeSegment();
// NROs always have a 39-bit address space.
Settings::SetNceEnabled(true);
// Create NCE patcher
Core::NCE::Patcher patch{};
if (Settings::IsNceEnabled()) {
// Patch SVCs and MRS calls in the guest code
patch.PatchText(program_image, code);
// We only support PostData patching for NROs.
ASSERT(patch.GetPatchMode() == Core::NCE::PatchMode::PostData);
// Update patch section.
auto& patch_segment = codeset.PatchSegment();
patch_segment.addr = image_size;
patch_segment.size = static_cast<u32>(patch.GetSectionSize());
// Add patch section size to the module size.
image_size += patch_segment.size;
}
#endif
// Enable direct memory mapping in case of NCE.
const u64 fastmem_base = [&]() -> size_t {
if (Settings::IsNceEnabled()) {
auto& buffer = system.DeviceMemory().buffer;
buffer.EnableDirectMappedAddress();
return reinterpret_cast<u64>(buffer.VirtualBasePointer());
}
return 0;
}();
// Setup the process code layout // Setup the process code layout
if (process if (process
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base,
false)
.IsError()) { .IsError()) {
return false; return false;
} }
// Relocate code patch and copy to the program_image if running under NCE.
// This needs to be after LoadFromMetadata so we can use the process entry point.
#ifdef HAS_NCE
if (Settings::IsNceEnabled()) {
patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image,
&process.GetPostHandlers());
}
#endif
// Load codeset for current process // Load codeset for current process
codeset.memory = std::move(program_image); codeset.memory = std::move(program_image);
process.LoadModule(std::move(codeset), process.GetEntryPoint()); process.LoadModule(std::move(codeset), process.GetEntryPoint());
@ -209,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
return true; return true;
} }
bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) { bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process,
return LoadNroImpl(process, nro_file.ReadAllBytes()); const FileSys::VfsFile& nro_file) {
return LoadNroImpl(system, process, nro_file.ReadAllBytes());
} }
AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) {
@ -218,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S
return {ResultStatus::ErrorAlreadyLoaded, {}}; return {ResultStatus::ErrorAlreadyLoaded, {}};
} }
if (!LoadNro(process, *file)) { if (!LoadNro(system, process, *file)) {
return {ResultStatus::ErrorLoadingNRO, {}}; return {ResultStatus::ErrorLoadingNRO, {}};
} }

View File

@ -54,7 +54,7 @@ public:
bool IsRomFSUpdatable() const override; bool IsRomFSUpdatable() const override;
private: private:
bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file); bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file);
std::vector<u8> icon_data; std::vector<u8> icon_data;
std::unique_ptr<FileSys::NACP> nacp; std::unique_ptr<FileSys::NACP> nacp;

View File

@ -20,6 +20,10 @@
#include "core/loader/nso.h" #include "core/loader/nso.h"
#include "core/memory.h" #include "core/memory.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader { namespace Loader {
namespace { namespace {
struct MODHeader { struct MODHeader {
@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) {
std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system,
const FileSys::VfsFile& nso_file, VAddr load_base, const FileSys::VfsFile& nso_file, VAddr load_base,
bool should_pass_arguments, bool load_into_process, bool should_pass_arguments, bool load_into_process,
std::optional<FileSys::PatchManager> pm) { std::optional<FileSys::PatchManager> pm,
Core::NCE::Patcher* patch) {
if (nso_file.GetSize() < sizeof(NSOHeader)) { if (nso_file.GetSize() < sizeof(NSOHeader)) {
return std::nullopt; return std::nullopt;
} }
@ -86,6 +91,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
return std::nullopt; return std::nullopt;
} }
// Allocate some space at the beginning if we are patching in PreText mode.
const size_t module_start = [&]() -> size_t {
#ifdef HAS_NCE
if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
return patch->GetSectionSize();
}
#endif
return 0;
}();
// Build program image // Build program image
Kernel::CodeSet codeset; Kernel::CodeSet codeset;
Kernel::PhysicalMemory program_image; Kernel::PhysicalMemory program_image;
@ -95,11 +110,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
if (nso_header.IsSegmentCompressed(i)) { if (nso_header.IsSegmentCompressed(i)) {
data = DecompressSegment(data, nso_header.segments[i]); data = DecompressSegment(data, nso_header.segments[i]);
} }
program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size())); program_image.resize(module_start + nso_header.segments[i].location +
std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), static_cast<u32>(data.size()));
data.size()); std::memcpy(program_image.data() + module_start + nso_header.segments[i].location,
codeset.segments[i].addr = nso_header.segments[i].location; data.data(), data.size());
codeset.segments[i].offset = nso_header.segments[i].location; codeset.segments[i].addr = module_start + nso_header.segments[i].location;
codeset.segments[i].offset = module_start + nso_header.segments[i].location;
codeset.segments[i].size = nso_header.segments[i].size; codeset.segments[i].size = nso_header.segments[i].size;
} }
@ -118,7 +134,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
} }
codeset.DataSegment().size += nso_header.segments[2].bss_size; codeset.DataSegment().size += nso_header.segments[2].bss_size;
const u32 image_size{ u32 image_size{
PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)};
program_image.resize(image_size); program_image.resize(image_size);
@ -129,16 +145,45 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
// Apply patches if necessary // Apply patches if necessary
const auto name = nso_file.GetName(); const auto name = nso_file.GetName();
if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) { if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) {
std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size()); std::span<u8> patchable_section(program_image.data() + module_start,
program_image.size() - module_start);
std::vector<u8> pi_header(sizeof(NSOHeader) + patchable_section.size());
std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader)); std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader));
std::memcpy(pi_header.data() + sizeof(NSOHeader), program_image.data(), std::memcpy(pi_header.data() + sizeof(NSOHeader), patchable_section.data(),
program_image.size()); patchable_section.size());
pi_header = pm->PatchNSO(pi_header, name); pi_header = pm->PatchNSO(pi_header, name);
std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), patchable_section.data());
} }
#ifdef HAS_NCE
// If we are computing the process code layout and using nce backend, patch.
const auto& code = codeset.CodeSegment();
if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) {
// Patch SVCs and MRS calls in the guest code
patch->PatchText(program_image, code);
// Add patch section size to the module size.
image_size += patch->GetSectionSize();
} else if (patch) {
// Relocate code patch and copy to the program_image.
patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers());
// Update patch section.
auto& patch_segment = codeset.PatchSegment();
patch_segment.addr =
patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size;
patch_segment.size = static_cast<u32>(patch->GetSectionSize());
// Add patch section size to the module size. In PreText mode image_size
// already contains the patch segment as part of module_start.
if (patch->GetPatchMode() == Core::NCE::PatchMode::PostData) {
image_size += patch_segment.size;
}
}
#endif
// If we aren't actually loading (i.e. just computing the process code layout), we are done // If we aren't actually loading (i.e. just computing the process code layout), we are done
if (!load_into_process) { if (!load_into_process) {
return load_base + image_size; return load_base + image_size;

View File

@ -15,6 +15,10 @@ namespace Core {
class System; class System;
} }
namespace Core::NCE {
class Patcher;
}
namespace Kernel { namespace Kernel {
class KProcess; class KProcess;
} }
@ -88,7 +92,8 @@ public:
static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system, static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system,
const FileSys::VfsFile& nso_file, VAddr load_base, const FileSys::VfsFile& nso_file, VAddr load_base,
bool should_pass_arguments, bool load_into_process, bool should_pass_arguments, bool load_into_process,
std::optional<FileSys::PatchManager> pm = {}); std::optional<FileSys::PatchManager> pm = {},
Core::NCE::Patcher* patch = nullptr);
LoadResult Load(Kernel::KProcess& process, Core::System& system) override; LoadResult Load(Kernel::KProcess& process, Core::System& system) override;

View File

@ -53,7 +53,7 @@ struct Memory::Impl {
} }
void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target) { Common::PhysicalAddress target, Common::MemoryPermission perms) {
ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base)); ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base));
ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}", ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}",
@ -63,7 +63,7 @@ struct Memory::Impl {
if (Settings::IsFastmemEnabled()) { if (Settings::IsFastmemEnabled()) {
system.DeviceMemory().buffer.Map(GetInteger(base), system.DeviceMemory().buffer.Map(GetInteger(base),
GetInteger(target) - DramMemoryMap::Base, size); GetInteger(target) - DramMemoryMap::Base, size, perms);
} }
} }
@ -831,8 +831,8 @@ void Memory::SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
} }
void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target) { Common::PhysicalAddress target, Common::MemoryPermission perms) {
impl->MapMemoryRegion(page_table, base, size, target); impl->MapMemoryRegion(page_table, base, size, target, perms);
} }
void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) { void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) {
@ -1001,4 +1001,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->FlushRegion(dest_addr, size); impl->FlushRegion(dest_addr, size);
} }
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
bool mapped = true;
u8* const ptr = impl->GetPointerImpl(
GetInteger(vaddr),
[&] {
LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size,
GetInteger(vaddr));
mapped = false;
},
[&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); });
return mapped && ptr != nullptr;
}
} // namespace Core::Memory } // namespace Core::Memory

View File

@ -15,8 +15,9 @@
#include "core/hle/result.h" #include "core/hle/result.h"
namespace Common { namespace Common {
enum class MemoryPermission : u32;
struct PageTable; struct PageTable;
} } // namespace Common
namespace Core { namespace Core {
class System; class System;
@ -82,9 +83,10 @@ public:
* @param size The amount of bytes to map. Must be page-aligned. * @param size The amount of bytes to map. Must be page-aligned.
* @param target Buffer with the memory backing the mapping. Must be of length at least * @param target Buffer with the memory backing the mapping. Must be of length at least
* `size`. * `size`.
* @param perms The permissions to map the memory with.
*/ */
void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target); Common::PhysicalAddress target, Common::MemoryPermission perms);
/** /**
* Unmaps a region of the emulated process address space. * Unmaps a region of the emulated process address space.
@ -472,6 +474,7 @@ public:
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
void FlushRegion(Common::ProcessAddress dest_addr, size_t size); void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
private: private:

View File

@ -11,6 +11,7 @@ using namespace Common::Literals;
static constexpr size_t VIRTUAL_SIZE = 1ULL << 39; static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
static constexpr size_t BACKING_SIZE = 4_GiB; static constexpr size_t BACKING_SIZE = 4_GiB;
static constexpr auto PERMS = Common::MemoryPermission::ReadWrite;
TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
{ HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
@ -19,7 +20,7 @@ TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
TEST_CASE("HostMemory: Simple map", "[common]") { TEST_CASE("HostMemory: Simple map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x8000, 0x1000); mem.Map(0x5000, 0x8000, 0x1000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000; volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50; data[0] = 50;
@ -28,8 +29,8 @@ TEST_CASE("HostMemory: Simple map", "[common]") {
TEST_CASE("HostMemory: Simple mirror map", "[common]") { TEST_CASE("HostMemory: Simple mirror map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000); mem.Map(0x5000, 0x3000, 0x2000, PERMS);
mem.Map(0x8000, 0x4000, 0x1000); mem.Map(0x8000, 0x4000, 0x1000, PERMS);
volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000; volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000;
volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000; volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000;
@ -39,7 +40,7 @@ TEST_CASE("HostMemory: Simple mirror map", "[common]") {
TEST_CASE("HostMemory: Simple unmap", "[common]") { TEST_CASE("HostMemory: Simple unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000); mem.Map(0x5000, 0x3000, 0x2000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000; volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[75] = 50; data[75] = 50;
@ -50,7 +51,7 @@ TEST_CASE("HostMemory: Simple unmap", "[common]") {
TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000); mem.Map(0x5000, 0x3000, 0x2000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000; volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50; data[0] = 50;
@ -58,79 +59,79 @@ TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
mem.Unmap(0x5000, 0x2000); mem.Unmap(0x5000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000); mem.Map(0x5000, 0x3000, 0x2000, PERMS);
REQUIRE(data[0] == 50); REQUIRE(data[0] == 50);
mem.Map(0x7000, 0x2000, 0x5000); mem.Map(0x7000, 0x2000, 0x5000, PERMS);
REQUIRE(data[0x3000] == 50); REQUIRE(data[0x3000] == 50);
} }
TEST_CASE("HostMemory: Nieche allocation", "[common]") { TEST_CASE("HostMemory: Nieche allocation", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x20000); mem.Map(0x0000, 0, 0x20000, PERMS);
mem.Unmap(0x0000, 0x4000); mem.Unmap(0x0000, 0x4000);
mem.Map(0x1000, 0, 0x2000); mem.Map(0x1000, 0, 0x2000, PERMS);
mem.Map(0x3000, 0, 0x1000); mem.Map(0x3000, 0, 0x1000, PERMS);
mem.Map(0, 0, 0x1000); mem.Map(0, 0, 0x1000, PERMS);
} }
TEST_CASE("HostMemory: Full unmap", "[common]") { TEST_CASE("HostMemory: Full unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000); mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x8000, 0x4000); mem.Unmap(0x8000, 0x4000);
mem.Map(0x6000, 0, 0x16000); mem.Map(0x6000, 0, 0x16000, PERMS);
} }
TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") { TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000); mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Unmap(0x2000, 0x4000); mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0x80000, 0x4000); mem.Map(0x2000, 0x80000, 0x4000, PERMS);
} }
TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") { TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000); mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x6000, 0x4000); mem.Unmap(0x6000, 0x4000);
mem.Map(0x8000, 0, 0x2000); mem.Map(0x8000, 0, 0x2000, PERMS);
} }
TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") { TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000); mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x1b000); mem.Map(0x4000, 0, 0x1b000, PERMS);
mem.Unmap(0x3000, 0x1c000); mem.Unmap(0x3000, 0x1c000);
mem.Map(0x3000, 0, 0x20000); mem.Map(0x3000, 0, 0x20000, PERMS);
} }
TEST_CASE("HostMemory: Unmap between placeholders", "[common]") { TEST_CASE("HostMemory: Unmap between placeholders", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000); mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x4000); mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Unmap(0x2000, 0x4000); mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0, 0x4000); mem.Map(0x2000, 0, 0x4000, PERMS);
} }
TEST_CASE("HostMemory: Unmap to origin", "[common]") { TEST_CASE("HostMemory: Unmap to origin", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000); mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Map(0x8000, 0, 0x4000); mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x4000, 0x4000); mem.Unmap(0x4000, 0x4000);
mem.Map(0, 0, 0x4000); mem.Map(0, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x4000); mem.Map(0x4000, 0, 0x4000, PERMS);
} }
TEST_CASE("HostMemory: Unmap to right", "[common]") { TEST_CASE("HostMemory: Unmap to right", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000); mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Map(0x8000, 0, 0x4000); mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x8000, 0x4000); mem.Unmap(0x8000, 0x4000);
mem.Map(0x8000, 0, 0x4000); mem.Map(0x8000, 0, 0x4000, PERMS);
} }
TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000); mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x1000] = 17; ptr[0x1000] = 17;
@ -142,7 +143,7 @@ TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000); mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x3000] = 19; ptr[0x3000] = 19;
@ -156,7 +157,7 @@ TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000); mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19; ptr[0x0000] = 19;
@ -170,8 +171,8 @@ TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") { TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x2000); mem.Map(0x4000, 0x10000, 0x2000, PERMS);
mem.Map(0x6000, 0x20000, 0x2000); mem.Map(0x6000, 0x20000, 0x2000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19; ptr[0x0000] = 19;

View File

@ -4,7 +4,7 @@
add_subdirectory(host_shaders) add_subdirectory(host_shaders)
if(LIBVA_FOUND) if(LIBVA_FOUND)
set_source_files_properties(host1x/codecs/codec.cpp set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
endif() endif()
@ -15,6 +15,7 @@ add_library(video_core STATIC
buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h buffer_cache/buffer_cache.h
buffer_cache/memory_tracker_base.h buffer_cache/memory_tracker_base.h
buffer_cache/usage_tracker.h
buffer_cache/word_manager.h buffer_cache/word_manager.h
cache_types.h cache_types.h
cdma_pusher.cpp cdma_pusher.cpp
@ -66,6 +67,8 @@ add_library(video_core STATIC
host1x/codecs/vp9.cpp host1x/codecs/vp9.cpp
host1x/codecs/vp9.h host1x/codecs/vp9.h
host1x/codecs/vp9_types.h host1x/codecs/vp9_types.h
host1x/ffmpeg/ffmpeg.cpp
host1x/ffmpeg/ffmpeg.h
host1x/control.cpp host1x/control.cpp
host1x/control.h host1x/control.h
host1x/host1x.cpp host1x/host1x.cpp

View File

@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
if (!channel_state) { if (!channel_state) {
return; return;
} }
runtime.TickFrame(slot_buffers);
// Calculate hits and shots and move hit bits to the right // Calculate hits and shots and move hit bits to the right
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
for (const IntervalType& add_interval : tmp_intervals) { for (const IntervalType& add_interval : tmp_intervals) {
common_ranges.add(add_interval); common_ranges.add(add_interval);
} }
runtime.CopyBuffer(dest_buffer, src_buffer, copies); const auto& copy = copies[0];
src_buffer.MarkUsage(copy.src_offset, copy.size);
dest_buffer.MarkUsage(copy.dst_offset, copy.size);
runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
if (has_new_downloads) { if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
} }
@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
common_ranges.subtract(subtract_interval); common_ranges.subtract(subtract_interval);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
auto& dest_buffer = slot_buffers[buffer]; Buffer& dest_buffer = slot_buffers[buffer];
const u32 offset = dest_buffer.Offset(*cpu_dst_address); const u32 offset = dest_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dest_buffer, offset, size, value); runtime.ClearBuffer(dest_buffer, offset, size, value);
dest_buffer.MarkUsage(offset, size);
return true; return true;
} }
@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
async_downloads += std::make_pair(base_interval, 1); async_downloads += std::make_pair(base_interval, 1);
buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
normalized_copies.push_back(second_copy); normalized_copies.push_back(second_copy);
} }
@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
// Have in mind the staging buffer offset for the copy // Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset; copy.dst_offset += download_staging.offset;
const std::array copies{copy}; const std::array copies{copy};
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, Buffer& buffer = slot_buffers[buffer_id];
false); buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
} }
runtime.PostCopyBarrier(); runtime.PostCopyBarrier();
runtime.Finish(); runtime.Finish();
@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(), std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size); draw_state.inline_index_draw_indexes.data(), size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else { } else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
} }
@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size); runtime.BindIndexBuffer(buffer, new_offset, size);
} else { } else {
buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count, draw_state.index_buffer.first, draw_state.index_buffer.count,
buffer, offset, size); buffer, offset, size);
@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const u32 stride = maxwell3d->regs.vertex_streams[index].stride; const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, binding.size);
host_bindings.buffers.push_back(&buffer); host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset); host_bindings.offsets.push_back(offset);
@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
} }
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) { if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else { } else {
@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
if (is_written) { if (is_written) {
@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format; const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format); runtime.BindImageBuffer(buffer, offset, size, format);
@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
host_bindings.buffers.push_back(&buffer); host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset); host_bindings.offsets.push_back(offset);
host_bindings.sizes.push_back(binding.size); host_bindings.sizes.push_back(size);
} }
if (host_bindings.buffers.size() > 0) { if (host_bindings.buffers.size() > 0) {
runtime.BindTransformFeedbackBuffers(host_bindings); runtime.BindTransformFeedbackBuffers(host_bindings);
@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) { if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
++binding_index; ++binding_index;
@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written = const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0; ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format; const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format); runtime.BindImageBuffer(buffer, offset, size, format);
@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
} }
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
channel_state->vertex_buffers[index] = Binding{ channel_state->vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr, .cpu_addr = *cpu_addr,
.size = size, .size = size,
.buffer_id = FindBuffer(*cpu_addr, size), .buffer_id = buffer_id,
}; };
} }
@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
.dst_offset = dst_base_offset, .dst_offset = dst_base_offset,
.size = overlap.SizeBytes(), .size = overlap.SizeBytes(),
}); });
runtime.CopyBuffer(new_buffer, overlap, copies); new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
runtime.CopyBuffer(new_buffer, overlap, copies, true);
DeleteBuffer(overlap_id, true); DeleteBuffer(overlap_id, true);
} }
@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const u32 size = static_cast<u32>(overlap.end - overlap.begin); const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id]; auto& new_buffer = slot_buffers[new_buffer_id];
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); const size_t size_bytes = new_buffer.SizeBytes();
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
new_buffer.MarkUsage(0, size_bytes);
for (const BufferId overlap_id : overlap.ids) { for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
} }
@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
template <class P> template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
return SynchronizeBufferImpl(buffer, cpu_addr, size);
}
template <class P>
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies; boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0; u64 total_size_bytes = 0;
u64 largest_copy = 0; u64 largest_copy = 0;
@ -1493,51 +1508,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
return false; return false;
} }
template <class P>
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
IntervalSet found_sets{};
auto make_copies = [&] {
for (auto& interval : found_sets) {
const std::size_t sub_size = interval.upper() - interval.lower();
const VAddr cpu_addr_ = interval.lower();
copies.push_back(BufferCopy{
.src_offset = total_size_bytes,
.dst_offset = cpu_addr_ - buffer.CpuAddr(),
.size = sub_size,
});
total_size_bytes += sub_size;
largest_copy = std::max<u64>(largest_copy, sub_size);
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
};
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
const VAddr base_adr = cpu_addr_out;
const VAddr end_adr = base_adr + range_size;
const IntervalType add_interval{base_adr, end_adr};
found_sets.add(add_interval);
});
if (found_sets.empty()) {
return true;
}
const IntervalType search_interval{cpu_addr, cpu_addr + size};
auto it = common_ranges.lower_bound(search_interval);
auto it_end = common_ranges.upper_bound(search_interval);
if (it == common_ranges.end()) {
make_copies();
return false;
}
while (it != it_end) {
found_sets.subtract(*it);
it++;
}
make_copies();
return false;
}
template <class P> template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) { std::span<BufferCopy> copies) {
@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset // Apply the staging offset
copy.src_offset += upload_staging.offset; copy.src_offset += upload_staging.offset;
} }
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} }
} }
@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
}}; }};
u8* const src_pointer = upload_staging.mapped_span.data(); u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size); std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else { } else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
} }
@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
for (BufferCopy& copy : copies) { for (BufferCopy& copy : copies) {
// Modify copies to have the staging offset in mind // Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset; copy.dst_offset += download_staging.offset;
buffer.MarkUsage(copy.src_offset, copy.size);
} }
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish(); runtime.Finish();
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;

View File

@ -529,10 +529,6 @@ private:
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies); std::span<BufferCopy> copies);

View File

@ -0,0 +1,79 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/alignment.h"
#include "common/common_types.h"
namespace VideoCommon {
class UsageTracker {
static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
public:
explicit UsageTracker(size_t size) {
const size_t num_pages = (size >> PAGE_SHIFT) + 1;
pages.resize(num_pages, 0ULL);
}
void Reset() noexcept {
std::ranges::fill(pages, 0ULL);
}
void Track(u64 offset, u64 size) noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
TrackPage(page, offset, size);
if (page == page_end) {
return;
}
for (size_t i = page + 1; i < page_end; i++) {
pages[i] = ~u64{0};
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
[[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
if (IsPageUsed(page, offset, size)) {
return true;
}
for (size_t i = page + 1; i < page_end; i++) {
if (pages[i] != 0) {
return true;
}
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
private:
void TrackPage(u64 page, u64 offset, u64 size) noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
pages[page] |= (~u64{0} & mask) << first_bit;
}
bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
const size_t mask2 = (~u64{0} & mask) << first_bit;
return (pages[page] & mask2) != 0;
}
private:
std::vector<u64> pages;
};
} // namespace VideoCommon

View File

@ -1,11 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <fstream>
#include <vector>
#include "common/assert.h" #include "common/assert.h"
#include "common/scope_exit.h"
#include "common/settings.h" #include "common/settings.h"
#include "video_core/host1x/codecs/codec.h" #include "video_core/host1x/codecs/codec.h"
#include "video_core/host1x/codecs/h264.h" #include "video_core/host1x/codecs/h264.h"
@ -14,242 +10,17 @@
#include "video_core/host1x/host1x.h" #include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
extern "C" {
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#ifdef LIBVA_FOUND
// for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h>
#endif
}
namespace Tegra { namespace Tegra {
namespace {
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
constexpr std::array PREFERRED_GPU_DECODERS = {
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
// last resort for Linux Flatpak (w/ NVIDIA)
AV_HWDEVICE_TYPE_VULKAN,
};
void AVPacketDeleter(AVPacket* ptr) {
av_packet_free(&ptr);
}
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == av_codec_ctx->pix_fmt) {
return av_codec_ctx->pix_fmt;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
av_buffer_unref(&av_codec_ctx->hw_device_ctx);
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
// List all the currently available hwcontext in ffmpeg
std::vector<AVHWDeviceType> ListSupportedContexts() {
std::vector<AVHWDeviceType> contexts{};
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
do {
current_device_type = av_hwdevice_iterate_types(current_device_type);
contexts.push_back(current_device_type);
} while (current_device_type != AV_HWDEVICE_TYPE_NONE);
return contexts;
}
} // namespace
void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr);
}
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)), : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)), vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {} vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() { Codec::~Codec() = default;
if (!initialized) {
return;
}
// Free libav memory
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
if (filters_initialized) {
avfilter_graph_free(&av_filter_graph);
}
}
bool Codec::CreateGpuAvDevice() {
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static const auto supported_contexts = ListSupportedContexts();
for (const auto& type : PREFERRED_GPU_DECODERS) {
if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
[&type](const auto& context) { return context == type; })) {
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
// Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
av_buffer_unref(&av_gpu_decoder);
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
av_hwdevice_get_type_name(type), hwdevice_res);
continue;
}
#ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) {
// we need to determine if this is an impersonated VAAPI driver
AVHWDeviceContext* hwctx =
static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
const char* vendor_name = vaQueryVendorString(vactx->display);
if (strstr(vendor_name, "VDPAU backend")) {
// VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
continue;
} else {
// according to some user testing, certain vaapi driver (Intel?) could be buggy
// so let's log the driver name which may help the developers/supporters
LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
}
}
#endif
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
if (!config) {
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
}
}
}
return false;
}
void Codec::InitializeAvCodecContext() {
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
av_codec_ctx->thread_count = 0;
av_codec_ctx->thread_type &= ~FF_THREAD_FRAME;
}
void Codec::InitializeGpuDecoder() {
if (!CreateGpuAvDevice()) {
av_buffer_unref(&av_gpu_decoder);
return;
}
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetGpuFormat;
}
void Codec::InitializeAvFilters(AVFrame* frame) {
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT({
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
});
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width,
frame->height, frame->format);
av_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(),
nullptr, av_filter_graph);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret);
return;
}
ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr,
av_filter_graph);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret);
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = av_filter_sink_ctx;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = av_filter_src_ctx;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret);
return;
}
ret = avfilter_graph_config(av_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret);
return;
}
filters_initialized = true;
}
void Codec::Initialize() { void Codec::Initialize() {
const AVCodecID codec = [&] { initialized = decode_api.Initialize(current_codec);
switch (current_codec) {
case Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return AV_CODEC_ID_NONE;
}
}();
av_codec = avcodec_find_decoder(codec);
InitializeAvCodecContext();
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
InitializeGpuDecoder();
}
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
return;
}
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
initialized = true;
} }
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
@ -264,14 +35,18 @@ void Codec::Decode() {
if (is_first_frame) { if (is_first_frame) {
Initialize(); Initialize();
} }
if (!initialized) { if (!initialized) {
return; return;
} }
// Assemble bitstream.
bool vp9_hidden_frame = false; bool vp9_hidden_frame = false;
const auto& frame_data = [&]() { size_t configuration_size = 0;
const auto packet_data = [&]() {
switch (current_codec) { switch (current_codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264: case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, is_first_frame); return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state); return vp8_decoder->ComposeFrame(state);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
@ -283,89 +58,35 @@ void Codec::Decode() {
return std::span<const u8>{}; return std::span<const u8>{};
} }
}(); }();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) { // Send assembled bitstream to decoder.
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); if (!decode_api.SendPacket(packet_data, configuration_size)) {
return; return;
} }
packet->data = const_cast<u8*>(frame_data.data());
packet->size = static_cast<s32>(frame_data.size()); // Only receive/store visible frames.
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return;
}
// Only receive/store visible frames
if (vp9_hidden_frame) { if (vp9_hidden_frame) {
return; return;
} }
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
AVFramePtr final_frame{nullptr, AVFrameDeleter};
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
return;
}
if (initial_frame->width == 0 || initial_frame->height == 0) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
return;
}
bool is_interlaced = initial_frame->interlaced_frame != 0;
if (av_codec_ctx->hw_device_ctx) {
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
final_frame->format = PREFERRED_GPU_FMT;
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
} else {
final_frame = std::move(initial_frame);
}
if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
if (!is_interlaced) {
av_frames.push(std::move(final_frame));
} else {
if (!filters_initialized) {
InitializeAvFilters(final_frame.get());
}
if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret) {
LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret);
return;
}
while (true) {
auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); // Receive output frames from decoder.
decode_api.ReceiveFrames(frames);
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) while (frames.size() > 10) {
break; LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
if (ret < 0) { frames.pop();
LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret);
return;
}
av_frames.push(std::move(filter_frame));
}
}
while (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();
} }
} }
AVFramePtr Codec::GetCurrentFrame() { std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
// Sometimes VIC will request more frames than have been decoded. // Sometimes VIC will request more frames than have been decoded.
// in this case, return a nullptr and don't overwrite previous frame data // in this case, return a blank frame and don't overwrite previous data.
if (av_frames.empty()) { if (frames.empty()) {
return AVFramePtr{nullptr, AVFrameDeleter}; return {};
} }
AVFramePtr frame = std::move(av_frames.front());
av_frames.pop(); auto frame = std::move(frames.front());
frames.pop();
return frame; return frame;
} }

View File

@ -4,28 +4,15 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <optional>
#include <string_view> #include <string_view>
#include <queue> #include <queue>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h" #include "video_core/host1x/nvdec_common.h"
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
namespace Tegra { namespace Tegra {
void AVFrameDeleter(AVFrame* ptr);
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
namespace Decoder { namespace Decoder {
class H264; class H264;
class VP8; class VP8;
@ -51,7 +38,7 @@ public:
void Decode(); void Decode();
/// Returns next decoded frame /// Returns next decoded frame
[[nodiscard]] AVFramePtr GetCurrentFrame(); [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
/// Returns the value of current_codec /// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
@ -60,25 +47,9 @@ public:
[[nodiscard]] std::string_view GetCurrentCodecName() const; [[nodiscard]] std::string_view GetCurrentCodecName() const;
private: private:
void InitializeAvCodecContext();
void InitializeAvFilters(AVFrame* frame);
void InitializeGpuDecoder();
bool CreateGpuAvDevice();
bool initialized{}; bool initialized{};
bool filters_initialized{};
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
FFmpeg::DecodeApi decode_api;
const AVCodec* av_codec{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
AVFilterContext* av_filter_src_ctx{nullptr};
AVFilterContext* av_filter_sink_ctx{nullptr};
AVFilterGraph* av_filter_graph{nullptr};
Host1x::Host1x& host1x; Host1x::Host1x& host1x;
const Host1x::NvdecCommon::NvdecRegisters& state; const Host1x::NvdecCommon::NvdecRegisters& state;
@ -86,7 +57,7 @@ private:
std::unique_ptr<Decoder::VP8> vp8_decoder; std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<AVFramePtr> av_frames{}; std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
}; };
} // namespace Tegra } // namespace Tegra

View File

@ -30,7 +30,7 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
H264::~H264() = default; H264::~H264() = default;
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
bool is_first_frame) { size_t* out_configuration_size, bool is_first_frame) {
H264DecoderContext context; H264DecoderContext context;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
sizeof(H264DecoderContext)); sizeof(H264DecoderContext));
@ -39,6 +39,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
if (!is_first_frame && frame_number != 0) { if (!is_first_frame && frame_number != 0) {
frame.resize_destructive(context.stream_len); frame.resize_destructive(context.stream_len);
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
*out_configuration_size = 0;
return frame; return frame;
} }
@ -157,6 +158,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
frame.resize(encoded_header.size() + context.stream_len); frame.resize(encoded_header.size() + context.stream_len);
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
*out_configuration_size = encoded_header.size();
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
frame.data() + encoded_header.size(), context.stream_len); frame.data() + encoded_header.size(), context.stream_len);

View File

@ -67,6 +67,7 @@ public:
/// Compose the H264 frame for FFmpeg decoding /// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size,
bool is_first_frame = false); bool is_first_frame = false);
private: private:

View File

@ -0,0 +1,419 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
extern "C" {
#ifdef LIBVA_FOUND
// for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h>
#endif
}
namespace FFmpeg {
namespace {
constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
constexpr std::array PreferredGpuDecoders = {
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
// last resort for Linux Flatpak (w/ NVIDIA)
AV_HWDEVICE_TYPE_VULKAN,
};
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == codec_context->pix_fmt) {
return codec_context->pix_fmt;
}
}
LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
av_buffer_unref(&codec_context->hw_device_ctx);
codec_context->pix_fmt = PreferredCpuFormat;
return codec_context->pix_fmt;
}
std::string AVError(int errnum) {
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
return errbuf;
}
} // namespace
Packet::Packet(std::span<const u8> data) {
m_packet = av_packet_alloc();
m_packet->data = const_cast<u8*>(data.data());
m_packet->size = static_cast<s32>(data.size());
}
Packet::~Packet() {
av_packet_free(&m_packet);
}
Frame::Frame() {
m_frame = av_frame_alloc();
}
Frame::~Frame() {
av_frame_free(&m_frame);
}
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
const AVCodecID av_codec = [&] {
switch (codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", codec);
return AV_CODEC_ID_NONE;
}
}();
m_codec = avcodec_find_decoder(av_codec);
}
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
if (!config) {
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
av_hwdevice_get_type_name(type));
break;
}
if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
config->device_type == type) {
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
*out_pix_fmt = config->pix_fmt;
return true;
}
}
return false;
}
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
std::vector<AVHWDeviceType> types;
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
while (true) {
current_device_type = av_hwdevice_iterate_types(current_device_type);
if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
return types;
}
types.push_back(current_device_type);
}
}
HardwareContext::~HardwareContext() {
av_buffer_unref(&m_gpu_decoder);
}
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
const Decoder& decoder) {
const auto supported_types = GetSupportedDeviceTypes();
for (const auto type : PreferredGpuDecoders) {
AVPixelFormat hw_pix_fmt;
if (std::ranges::find(supported_types, type) == supported_types.end()) {
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
if (!this->InitializeWithType(type)) {
continue;
}
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
return true;
}
}
return false;
}
bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
av_buffer_unref(&m_gpu_decoder);
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
ret < 0) {
LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
AVError(ret));
return false;
}
#ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) {
// We need to determine if this is an impersonated VAAPI driver.
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
const char* vendor_name = vaQueryVendorString(vactx->display);
if (strstr(vendor_name, "VDPAU backend")) {
// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
return false;
} else {
// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
// Log the driver name just in case.
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
}
}
#endif
return true;
}
DecoderContext::DecoderContext(const Decoder& decoder) {
m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
}
DecoderContext::~DecoderContext() {
av_buffer_unref(&m_codec_context->hw_device_ctx);
avcodec_free_context(&m_codec_context);
}
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
AVPixelFormat hw_pix_fmt) {
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
m_codec_context->get_format = GetGpuFormat;
m_codec_context->pix_fmt = hw_pix_fmt;
}
bool DecoderContext::OpenContext(const Decoder& decoder) {
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
return false;
}
if (!m_codec_context->hw_device_ctx) {
LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
}
return true;
}
bool DecoderContext::SendPacket(const Packet& packet) {
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
auto dst_frame = std::make_unique<Frame>();
const auto ReceiveImpl = [&](AVFrame* frame) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
return false;
}
*out_is_interlaced = frame->interlaced_frame != 0;
return true;
};
if (m_codec_context->hw_device_ctx) {
// If we have a hardware context, make a separate frame here to receive the
// hardware result before sending it to the output.
Frame intermediate_frame;
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
return {};
}
dst_frame->SetFormat(PreferredGpuFormat);
if (const int ret =
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
} else {
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(dst_frame->GetFrame())) {
return {};
}
}
return dst_frame;
}
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT({
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
});
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
m_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
nullptr, m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
return;
}
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = m_sink_context;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = m_source_context;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
return;
}
ret = avfilter_graph_config(m_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
return;
}
m_initialized = true;
}
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
auto dst_frame = std::make_unique<Frame>();
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
return {};
}
if (ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
return {};
}
return dst_frame;
}
DeinterlaceFilter::~DeinterlaceFilter() {
avfilter_graph_free(&m_filter_graph);
}
void DecodeApi::Reset() {
m_deinterlace_filter.reset();
m_hardware_context.reset();
m_decoder_context.reset();
m_decoder.reset();
}
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
this->Reset();
m_decoder.emplace(codec);
m_decoder_context.emplace(*m_decoder);
// Enable GPU decoding if requested.
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
m_hardware_context.emplace();
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
}
// Open the decoder context.
if (!m_decoder_context->OpenContext(*m_decoder)) {
this->Reset();
return false;
}
return true;
}
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
FFmpeg::Packet packet(packet_data);
return m_decoder_context->SendPacket(packet);
}
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
// Receive raw frame from decoder.
bool is_interlaced;
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
if (!frame) {
return;
}
if (!is_interlaced) {
// If the frame is not interlaced, we can pend it now.
frame_queue.push(std::move(frame));
} else {
// Create the deinterlacer if needed.
if (!m_deinterlace_filter) {
m_deinterlace_filter.emplace(*frame);
}
// Add the frame we just received.
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
return;
}
// Pend output fields.
while (true) {
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
if (!filter_frame) {
break;
}
frame_queue.push(std::move(filter_frame));
}
}
}
} // namespace FFmpeg

View File

@ -0,0 +1,213 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <optional>
#include <span>
#include <vector>
#include <queue>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/host1x/nvdec_common.h"
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
namespace FFmpeg {
class Packet;
class Frame;
class Decoder;
class HardwareContext;
class DecoderContext;
class DeinterlaceFilter;
// Wraps an AVPacket, a container for compressed bitstream data.
class Packet {
public:
YUZU_NON_COPYABLE(Packet);
YUZU_NON_MOVEABLE(Packet);
explicit Packet(std::span<const u8> data);
~Packet();
AVPacket* GetPacket() const {
return m_packet;
}
private:
AVPacket* m_packet{};
};
// Wraps an AVFrame, a container for audio and video stream data.
class Frame {
public:
YUZU_NON_COPYABLE(Frame);
YUZU_NON_MOVEABLE(Frame);
explicit Frame();
~Frame();
int GetWidth() const {
return m_frame->width;
}
int GetHeight() const {
return m_frame->height;
}
AVPixelFormat GetPixelFormat() const {
return static_cast<AVPixelFormat>(m_frame->format);
}
int GetStride(int plane) const {
return m_frame->linesize[plane];
}
int* GetStrides() const {
return m_frame->linesize;
}
u8* GetData(int plane) const {
return m_frame->data[plane];
}
u8** GetPlanes() const {
return m_frame->data;
}
void SetFormat(int format) {
m_frame->format = format;
}
AVFrame* GetFrame() const {
return m_frame;
}
private:
AVFrame* m_frame{};
};
// Wraps an AVCodec, a type containing information about a codec.
class Decoder {
public:
YUZU_NON_COPYABLE(Decoder);
YUZU_NON_MOVEABLE(Decoder);
explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
~Decoder() = default;
bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
const AVCodec* GetCodec() const {
return m_codec;
}
private:
const AVCodec* m_codec{};
};
// Wraps AVBufferRef for an accelerated decoder.
class HardwareContext {
public:
YUZU_NON_COPYABLE(HardwareContext);
YUZU_NON_MOVEABLE(HardwareContext);
static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
explicit HardwareContext() = default;
~HardwareContext();
bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
AVBufferRef* GetBufferRef() const {
return m_gpu_decoder;
}
private:
bool InitializeWithType(AVHWDeviceType type);
AVBufferRef* m_gpu_decoder{};
};
// Wraps an AVCodecContext.
class DecoderContext {
public:
YUZU_NON_COPYABLE(DecoderContext);
YUZU_NON_MOVEABLE(DecoderContext);
explicit DecoderContext(const Decoder& decoder);
~DecoderContext();
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
bool OpenContext(const Decoder& decoder);
bool SendPacket(const Packet& packet);
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
AVCodecContext* GetCodecContext() const {
return m_codec_context;
}
private:
AVCodecContext* m_codec_context{};
};
// Wraps an AVFilterGraph.
class DeinterlaceFilter {
public:
YUZU_NON_COPYABLE(DeinterlaceFilter);
YUZU_NON_MOVEABLE(DeinterlaceFilter);
explicit DeinterlaceFilter(const Frame& frame);
~DeinterlaceFilter();
bool AddSourceFrame(const Frame& frame);
std::unique_ptr<Frame> DrainSinkFrame();
private:
AVFilterGraph* m_filter_graph{};
AVFilterContext* m_source_context{};
AVFilterContext* m_sink_context{};
bool m_initialized{};
};
class DecodeApi {
public:
YUZU_NON_COPYABLE(DecodeApi);
YUZU_NON_MOVEABLE(DecodeApi);
DecodeApi() = default;
~DecodeApi() = default;
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
void Reset();
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
private:
std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context;
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
};
} // namespace FFmpeg

View File

@ -28,7 +28,7 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) {
} }
} }
AVFramePtr Nvdec::GetFrame() { std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
return codec->GetCurrentFrame(); return codec->GetCurrentFrame();
} }

View File

@ -23,7 +23,7 @@ public:
void ProcessMethod(u32 method, u32 argument); void ProcessMethod(u32 method, u32 argument);
/// Return most recently decoded frame /// Return most recently decoded frame
[[nodiscard]] AVFramePtr GetFrame(); [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
private: private:
/// Invoke codec to decode a frame /// Invoke codec to decode a frame

View File

@ -82,27 +82,26 @@ void Vic::Execute() {
return; return;
} }
const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); auto frame = nvdec_processor->GetFrame();
const auto* frame = frame_ptr.get();
if (!frame) { if (!frame) {
return; return;
} }
const u64 surface_width = config.surface_width_minus1 + 1; const u64 surface_width = config.surface_width_minus1 + 1;
const u64 surface_height = config.surface_height_minus1 + 1; const u64 surface_height = config.surface_height_minus1 + 1;
if (static_cast<u64>(frame->width) != surface_width || if (static_cast<u64>(frame->GetWidth()) != surface_width ||
static_cast<u64>(frame->height) != surface_height) { static_cast<u64>(frame->GetHeight()) != surface_height) {
// TODO: Properly support multiple video streams with differing frame dimensions // TODO: Properly support multiple video streams with differing frame dimensions
LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
frame->width, frame->height, surface_width, surface_height); frame->GetWidth(), frame->GetHeight(), surface_width, surface_height);
} }
switch (config.pixel_format) { switch (config.pixel_format) {
case VideoPixelFormat::RGBA8: case VideoPixelFormat::RGBA8:
case VideoPixelFormat::BGRA8: case VideoPixelFormat::BGRA8:
case VideoPixelFormat::RGBX8: case VideoPixelFormat::RGBX8:
WriteRGBFrame(frame, config); WriteRGBFrame(std::move(frame), config);
break; break;
case VideoPixelFormat::YUV420: case VideoPixelFormat::YUV420:
WriteYUVFrame(frame, config); WriteYUVFrame(std::move(frame), config);
break; break;
default: default:
UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
@ -110,10 +109,14 @@ void Vic::Execute() {
} }
} }
void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { const auto frame_width = frame->GetWidth();
const auto frame_height = frame->GetHeight();
const auto frame_format = frame->GetPixelFormat();
if (!scaler_ctx || frame_width != scaler_width || frame_height != scaler_height) {
const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
switch (pixel_format) { switch (pixel_format) {
case VideoPixelFormat::RGBA8: case VideoPixelFormat::RGBA8:
@ -129,27 +132,26 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
sws_freeContext(scaler_ctx); sws_freeContext(scaler_ctx);
// Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
scaler_ctx = sws_getContext(frame->width, frame->height, scaler_ctx = sws_getContext(frame_width, frame_height, frame_format, frame_width,
static_cast<AVPixelFormat>(frame->format), frame->width, frame_height, target_format, 0, nullptr, nullptr, nullptr);
frame->height, target_format, 0, nullptr, nullptr, nullptr); scaler_width = frame_width;
scaler_width = frame->width; scaler_height = frame_height;
scaler_height = frame->height;
converted_frame_buffer.reset(); converted_frame_buffer.reset();
} }
if (!converted_frame_buffer) { if (!converted_frame_buffer) {
const size_t frame_size = frame->width * frame->height * 4; const size_t frame_size = frame_width * frame_height * 4;
converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
} }
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; const std::array<int, 4> converted_stride{frame_width * 4, frame_height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, sws_scale(scaler_ctx, frame->GetPlanes(), frame->GetStrides(), 0, frame_height,
converted_stride.data()); &converted_frame_buf_addr, converted_stride.data());
// Use the minimum of surface/frame dimensions to avoid buffer overflow. // Use the minimum of surface/frame dimensions to avoid buffer overflow.
const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); const u32 width = std::min(surface_width, static_cast<u32>(frame_width));
const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); const u32 height = std::min(surface_height, static_cast<u32>(frame_height));
const u32 blk_kind = static_cast<u32>(config.block_linear_kind); const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) { if (blk_kind != 0) {
// swizzle pitch linear to block linear // swizzle pitch linear to block linear
@ -169,23 +171,23 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
} }
} }
void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
const std::size_t surface_width = config.surface_width_minus1 + 1; const std::size_t surface_width = config.surface_width_minus1 + 1;
const std::size_t surface_height = config.surface_height_minus1 + 1; const std::size_t surface_height = config.surface_height_minus1 + 1;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
// Use the minimum of surface/frame dimensions to avoid buffer overflow. // Use the minimum of surface/frame dimensions to avoid buffer overflow.
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->GetWidth()));
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->GetHeight()));
const auto stride = static_cast<size_t>(frame->linesize[0]); const auto stride = static_cast<size_t>(frame->GetStride(0));
luma_buffer.resize_destructive(aligned_width * surface_height); luma_buffer.resize_destructive(aligned_width * surface_height);
chroma_buffer.resize_destructive(aligned_width * surface_height / 2); chroma_buffer.resize_destructive(aligned_width * surface_height / 2);
// Populate luma buffer // Populate luma buffer
const u8* luma_src = frame->data[0]; const u8* luma_src = frame->GetData(0);
for (std::size_t y = 0; y < frame_height; ++y) { for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride; const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width; const std::size_t dst = y * aligned_width;
@ -196,16 +198,16 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
// Chroma // Chroma
const std::size_t half_height = frame_height / 2; const std::size_t half_height = frame_height / 2;
const auto half_stride = static_cast<size_t>(frame->linesize[1]); const auto half_stride = static_cast<size_t>(frame->GetStride(1));
switch (frame->format) { switch (frame->GetPixelFormat()) {
case AV_PIX_FMT_YUV420P: { case AV_PIX_FMT_YUV420P: {
// Frame from FFmpeg software // Frame from FFmpeg software
// Populate chroma buffer from both channels with interleaving. // Populate chroma buffer from both channels with interleaving.
const std::size_t half_width = frame_width / 2; const std::size_t half_width = frame_width / 2;
u8* chroma_buffer_data = chroma_buffer.data(); u8* chroma_buffer_data = chroma_buffer.data();
const u8* chroma_b_src = frame->data[1]; const u8* chroma_b_src = frame->GetData(1);
const u8* chroma_r_src = frame->data[2]; const u8* chroma_r_src = frame->GetData(2);
for (std::size_t y = 0; y < half_height; ++y) { for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * half_stride; const std::size_t src = y * half_stride;
const std::size_t dst = y * aligned_width; const std::size_t dst = y * aligned_width;
@ -219,7 +221,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
case AV_PIX_FMT_NV12: { case AV_PIX_FMT_NV12: {
// Frame from VA-API hardware // Frame from VA-API hardware
// This is already interleaved so just copy // This is already interleaved so just copy
const u8* chroma_src = frame->data[1]; const u8* chroma_src = frame->GetData(1);
for (std::size_t y = 0; y < half_height; ++y) { for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * stride; const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width; const std::size_t dst = y * aligned_width;

View File

@ -39,9 +39,9 @@ public:
private: private:
void Execute(); void Execute();
void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
Host1x& host1x; Host1x& host1x;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;

View File

@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
} }
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) { std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
} }
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) { std::span<const VideoCommon::BufferCopy> copies, bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
} }
void BufferCacheRuntime::PreCopyBarrier() { void BufferCacheRuntime::PreCopyBarrier() {

View File

@ -30,6 +30,8 @@ public:
void MakeResident(GLenum access) noexcept; void MakeResident(GLenum access) noexcept;
void MarkUsage(u64 offset, u64 size) {}
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@ -66,22 +68,29 @@ public:
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
return false;
}
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies); std::span<const VideoCommon::BufferCopy> copies, bool);
void PreCopyBarrier(); void PreCopyBarrier();
void PostCopyBarrier(); void PostCopyBarrier();
void Finish(); void Finish();
void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);

View File

@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
} // Anonymous namespace } // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_) VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{ device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { tracker{SizeBytes()} {
if (runtime.device.HasDebuggingToolAttached()) { if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
} }
@ -355,12 +355,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage(); return device.CanReportMemoryUsage();
} }
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
}
void BufferCacheRuntime::Finish() { void BufferCacheRuntime::Finish() {
scheduler.Finish(); scheduler.Finish();
} }
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
std::span<const VideoCommon::BufferCopy> copies) {
if (Settings::values.disable_buffer_reorder) {
return false;
}
const bool can_use_upload_cmdbuf =
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
});
return can_use_upload_cmdbuf;
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) { std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return; return;
} }
@ -376,9 +395,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
}; };
// Measuring a popular game, this number never exceeds the specified size once data is warmed up // Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
});
return;
}
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) { if (barrier) {

View File

@ -5,6 +5,7 @@
#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@ -34,6 +35,18 @@ public:
return *buffer; return *buffer;
} }
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
return tracker.IsUsed(offset, size);
}
void MarkUsage(u64 offset, u64 size) noexcept {
tracker.Track(offset, size);
}
void ResetUsageTracking() noexcept {
tracker.Reset();
}
operator VkBuffer() const noexcept { operator VkBuffer() const noexcept {
return *buffer; return *buffer;
} }
@ -49,6 +62,7 @@ private:
const Device* device{}; const Device* device{};
vk::Buffer buffer; vk::Buffer buffer;
std::vector<BufferView> views; std::vector<BufferView> views;
VideoCommon::UsageTracker tracker;
}; };
class QuadArrayIndexBuffer; class QuadArrayIndexBuffer;
@ -67,6 +81,8 @@ public:
ComputePassDescriptorQueue& compute_pass_descriptor_queue, ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool); DescriptorPool& descriptor_pool);
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
void Finish(); void Finish();
u64 GetDeviceLocalMemory() const; u64 GetDeviceLocalMemory() const;
@ -79,12 +95,15 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
void FreeDeferredStagingBuffer(StagingBufferRef& ref); void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void PreCopyBarrier(); void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void PostCopyBarrier(); void PostCopyBarrier();

View File

@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
Refresh(); Refresh();
} }
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick) { VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
if (semaphore) { if (semaphore) {
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
host_tick);
} else { } else {
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
} }
} }
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
}; };
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) { VkSemaphore wait_semaphore, u64 host_tick) {
const VkSemaphore timeline_semaphore = *semaphore; const VkSemaphore timeline_semaphore = *semaphore;
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
const std::array signal_values{host_tick, u64(0)}; const std::array signal_values{host_tick, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const VkTimelineSemaphoreSubmitInfo timeline_si{ const VkTimelineSemaphoreSubmitInfo timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
.waitSemaphoreCount = num_wait_semaphores, .waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore, .pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1, .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuf.address(), .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores, .signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(), .pSignalSemaphores = signal_semaphores.data(),
}; };
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
return device.GetGraphicsQueue().Submit(submit_info); return device.GetGraphicsQueue().Submit(submit_info);
} }
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick) { vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const VkSubmitInfo submit_info{ const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr, .pNext = nullptr,
.waitSemaphoreCount = num_wait_semaphores, .waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore, .pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1, .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuf.address(), .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores, .signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal_semaphore, .pSignalSemaphores = &signal_semaphore,
}; };

View File

@ -52,14 +52,16 @@ public:
void Wait(u64 tick); void Wait(u64 tick);
/// Submits the device graphics queue, updating the tick as necessary /// Submits the device graphics queue, updating the tick as necessary
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick); VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
private: private:
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick); VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, u64 host_tick);
VkSemaphore wait_semaphore, u64 host_tick); VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick);
void WaitThread(std::stop_token token); void WaitThread(std::stop_token token);

View File

@ -22,11 +22,12 @@ namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker); MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
vk::CommandBuffer upload_cmdbuf) {
auto command = first; auto command = first;
while (command != nullptr) { while (command != nullptr) {
auto next = command->GetNext(); auto next = command->GetNext();
command->Execute(cmdbuf); command->Execute(cmdbuf, upload_cmdbuf);
command->~Command(); command->~Command();
command = next; command = next;
} }
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
// Perform the work, tracking whether the chunk was a submission // Perform the work, tracking whether the chunk was a submission
// before executing. // before executing.
const bool has_submit = work->HasSubmit(); const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf); work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
// If the chunk was a submission, reallocate the command buffer. // If the chunk was a submission, reallocate the command buffer.
if (has_submit) { if (has_submit) {
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr, .pInheritanceInfo = nullptr,
}); });
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_upload_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
} }
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
InvalidateState(); InvalidateState();
const u64 signal_value = master_semaphore->NextTick(); const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
upload_cmdbuf.End();
cmdbuf.End(); cmdbuf.End();
if (on_submit) { if (on_submit) {
@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
std::scoped_lock lock{submit_mutex}; std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue( switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS: case VK_SUCCESS:
break; break;
case VK_ERROR_DEVICE_LOST: case VK_ERROR_DEVICE_LOST:

View File

@ -80,7 +80,8 @@ public:
/// Send work to a separate thread. /// Send work to a separate thread.
template <typename T> template <typename T>
void Record(T&& command) { requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
void RecordWithUploadBuffer(T&& command) {
if (chunk->Record(command)) { if (chunk->Record(command)) {
return; return;
} }
@ -88,6 +89,15 @@ public:
(void)chunk->Record(command); (void)chunk->Record(command);
} }
template <typename T>
requires std::is_invocable_v<T, vk::CommandBuffer>
void Record(T&& c) {
this->RecordWithUploadBuffer(
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
command(cmdbuf);
});
}
/// Returns the current command buffer tick. /// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept { [[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick(); return master_semaphore->CurrentTick();
@ -119,7 +129,7 @@ private:
public: public:
virtual ~Command() = default; virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
Command* GetNext() const { Command* GetNext() const {
return next; return next;
@ -142,8 +152,8 @@ private:
TypedCommand(TypedCommand&&) = delete; TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override { void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
command(cmdbuf); command(cmdbuf, upload_cmdbuf);
} }
private: private:
@ -152,7 +162,7 @@ private:
class CommandChunk final { class CommandChunk final {
public: public:
void ExecuteAll(vk::CommandBuffer cmdbuf); void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
template <typename T> template <typename T>
bool Record(T& command) { bool Record(T& command) {
@ -228,6 +238,7 @@ private:
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr<CommandChunk> chunk; std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit; std::function<void()> on_submit;

View File

@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
scheduler.Record([&](vk::CommandBuffer& cmdbuf) { scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) { for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) { for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]); ClearColorImage(cmdbuf, *images.images[i]);
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
UpdateDescriptorSets(source_image_view, image_index); UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,

View File

@ -36,6 +36,10 @@ public:
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref); void FreeDeferred(StagingBufferRef& ref);
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
return *stream_buffer;
}
void TickFrame(); void TickFrame();
private: private:

View File

@ -138,6 +138,10 @@ public:
return Iterator(this, SlotId{SlotId::INVALID_INDEX}); return Iterator(this, SlotId{SlotId::INVALID_INDEX});
} }
[[nodiscard]] size_t size() const noexcept {
return values_capacity - free_list.size();
}
private: private:
struct NonTrivialDummy { struct NonTrivialDummy {
NonTrivialDummy() noexcept {} NonTrivialDummy() noexcept {}

View File

@ -1101,6 +1101,10 @@ public:
return &handle; return &handle;
} }
VkCommandBuffer operator*() const noexcept {
return handle;
}
void Begin(const VkCommandBufferBeginInfo& begin_info) const { void Begin(const VkCommandBufferBeginInfo& begin_info) const {
Check(dld->vkBeginCommandBuffer(handle, &begin_info)); Check(dld->vkBeginCommandBuffer(handle, &begin_info));
} }

View File

@ -252,6 +252,7 @@ file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*)
if (ENABLE_QT_TRANSLATION) if (ENABLE_QT_TRANSLATION)
set(YUZU_QT_LANGUAGES "${PROJECT_SOURCE_DIR}/dist/languages" CACHE PATH "Path to the translation bundle for the Qt frontend") set(YUZU_QT_LANGUAGES "${PROJECT_SOURCE_DIR}/dist/languages" CACHE PATH "Path to the translation bundle for the Qt frontend")
option(GENERATE_QT_TRANSLATION "Generate en.ts as the translation source file" OFF) option(GENERATE_QT_TRANSLATION "Generate en.ts as the translation source file" OFF)
option(WORKAROUND_BROKEN_LUPDATE "Run lupdate directly through CMake if Qt's convenience wrappers don't work" OFF)
# Update source TS file if enabled # Update source TS file if enabled
if (GENERATE_QT_TRANSLATION) if (GENERATE_QT_TRANSLATION)
@ -259,19 +260,51 @@ if (ENABLE_QT_TRANSLATION)
# these calls to qt_create_translation also creates a rule to generate en.qm which conflicts with providing english plurals # these calls to qt_create_translation also creates a rule to generate en.qm which conflicts with providing english plurals
# so we have to set a OUTPUT_LOCATION so that we don't have multiple rules to generate en.qm # so we have to set a OUTPUT_LOCATION so that we don't have multiple rules to generate en.qm
set_source_files_properties(${YUZU_QT_LANGUAGES}/en.ts PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/translations") set_source_files_properties(${YUZU_QT_LANGUAGES}/en.ts PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/translations")
qt_create_translation(QM_FILES if (WORKAROUND_BROKEN_LUPDATE)
${SRCS} add_custom_command(OUTPUT ${YUZU_QT_LANGUAGES}/en.ts
${UIS} COMMAND lupdate
${YUZU_QT_LANGUAGES}/en.ts -source-language en_US
OPTIONS -target-language en_US
-source-language en_US ${SRCS}
-target-language en_US ${UIS}
) -ts ${YUZU_QT_LANGUAGES}/en.ts
DEPENDS
${SRCS}
${UIS}
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)
else()
qt_create_translation(QM_FILES
${SRCS}
${UIS}
${YUZU_QT_LANGUAGES}/en.ts
OPTIONS
-source-language en_US
-target-language en_US
)
endif()
# Generate plurals into dist/english_plurals/generated_en.ts so it can be used to revise dist/english_plurals/en.ts # Generate plurals into dist/english_plurals/generated_en.ts so it can be used to revise dist/english_plurals/en.ts
set(GENERATED_PLURALS_FILE ${PROJECT_SOURCE_DIR}/dist/english_plurals/generated_en.ts) set(GENERATED_PLURALS_FILE ${PROJECT_SOURCE_DIR}/dist/english_plurals/generated_en.ts)
set_source_files_properties(${GENERATED_PLURALS_FILE} PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/plurals") set_source_files_properties(${GENERATED_PLURALS_FILE} PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/plurals")
qt_create_translation(QM_FILES ${SRCS} ${UIS} ${GENERATED_PLURALS_FILE} OPTIONS -pluralonly -source-language en_US -target-language en_US) if (WORKAROUND_BROKEN_LUPDATE)
add_custom_command(OUTPUT ${GENERATED_PLURALS_FILE}
COMMAND lupdate
-source-language en_US
-target-language en_US
${SRCS}
${UIS}
-ts ${GENERATED_PLURALS_FILE}
DEPENDS
${SRCS}
${UIS}
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)
else()
qt_create_translation(QM_FILES ${SRCS} ${UIS} ${GENERATED_PLURALS_FILE} OPTIONS -pluralonly -source-language en_US -target-language en_US)
endif()
add_custom_target(translation ALL DEPENDS ${YUZU_QT_LANGUAGES}/en.ts ${GENERATED_PLURALS_FILE}) add_custom_target(translation ALL DEPENDS ${YUZU_QT_LANGUAGES}/en.ts ${GENERATED_PLURALS_FILE})
endif() endif()

View File

@ -51,6 +51,8 @@ void ConfigureDebug::SetConfiguration() {
ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue()); ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue());
ui->enable_renderdoc_hotkey->setEnabled(runtime_lock); ui->enable_renderdoc_hotkey->setEnabled(runtime_lock);
ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue()); ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue());
ui->disable_buffer_reorder->setEnabled(runtime_lock);
ui->disable_buffer_reorder->setChecked(Settings::values.disable_buffer_reorder.GetValue());
ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setEnabled(runtime_lock);
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
ui->enable_shader_feedback->setEnabled(runtime_lock); ui->enable_shader_feedback->setEnabled(runtime_lock);
@ -96,6 +98,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked(); Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked(); Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked();
Settings::values.disable_buffer_reorder = ui->disable_buffer_reorder->isChecked();
Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked(); Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();

View File

@ -271,19 +271,6 @@
</widget> </widget>
</item> </item>
<item row="8" column="0"> <item row="8" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="dump_macros"> <widget class="QCheckBox" name="dump_macros">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -306,59 +293,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="6" column="0"> <item row="6" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="dump_shaders"> <widget class="QCheckBox" name="dump_shaders">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -378,6 +313,81 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="10" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="disable_buffer_reorder">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;When checked, disables reording of mapped memory uploads which allows to associate uploads with specific draws. May reduce performance in some cases.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Disable Buffer Reorder</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View File

@ -1,17 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/time_zone.h"
#include "yuzu/configuration/shared_translation.h" #include "yuzu/configuration/shared_translation.h"
#include <map> #include <map>
#include <memory> #include <memory>
#include <tuple> #include <tuple>
#include <utility> #include <utility>
#include <QCoreApplication>
#include <QWidget> #include <QWidget>
#include "common/settings.h" #include "common/settings.h"
#include "common/settings_enums.h" #include "common/settings_enums.h"
#include "common/settings_setting.h" #include "common/settings_setting.h"
#include "common/time_zone.h"
#include "yuzu/uisettings.h" #include "yuzu/uisettings.h"
namespace ConfigurationShared { namespace ConfigurationShared {
@ -21,123 +22,135 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
const auto& tr = [parent](const char* text) -> QString { return parent->tr(text); }; const auto& tr = [parent](const char* text) -> QString { return parent->tr(text); };
#define INSERT(SETTINGS, ID, NAME, TOOLTIP) \ #define INSERT(SETTINGS, ID, NAME, TOOLTIP) \
translations->insert(std::pair{SETTINGS::values.ID.Id(), std::pair{tr((NAME)), tr((TOOLTIP))}}) translations->insert(std::pair{SETTINGS::values.ID.Id(), std::pair{(NAME), (TOOLTIP)}})
// A setting can be ignored by giving it a blank name // A setting can be ignored by giving it a blank name
// Audio // Audio
INSERT(Settings, sink_id, "Output Engine:", ""); INSERT(Settings, sink_id, tr("Output Engine:"), QStringLiteral());
INSERT(Settings, audio_output_device_id, "Output Device:", ""); INSERT(Settings, audio_output_device_id, tr("Output Device:"), QStringLiteral());
INSERT(Settings, audio_input_device_id, "Input Device:", ""); INSERT(Settings, audio_input_device_id, tr("Input Device:"), QStringLiteral());
INSERT(Settings, audio_muted, "Mute audio", ""); INSERT(Settings, audio_muted, tr("Mute audio"), QStringLiteral());
INSERT(Settings, volume, "Volume:", ""); INSERT(Settings, volume, tr("Volume:"), QStringLiteral());
INSERT(Settings, dump_audio_commands, "", ""); INSERT(Settings, dump_audio_commands, QStringLiteral(), QStringLiteral());
INSERT(UISettings, mute_when_in_background, "Mute audio when in background", ""); INSERT(UISettings, mute_when_in_background, tr("Mute audio when in background"),
QStringLiteral());
// Core // Core
INSERT(Settings, use_multi_core, "Multicore CPU Emulation", ""); INSERT(Settings, use_multi_core, tr("Multicore CPU Emulation"), QStringLiteral());
INSERT(Settings, memory_layout_mode, "Memory Layout", ""); INSERT(Settings, memory_layout_mode, tr("Memory Layout"), QStringLiteral());
INSERT(Settings, use_speed_limit, "", ""); INSERT(Settings, use_speed_limit, QStringLiteral(), QStringLiteral());
INSERT(Settings, speed_limit, "Limit Speed Percent", ""); INSERT(Settings, speed_limit, tr("Limit Speed Percent"), QStringLiteral());
// Cpu // Cpu
INSERT(Settings, cpu_accuracy, "Accuracy:", ""); INSERT(Settings, cpu_accuracy, tr("Accuracy:"), QStringLiteral());
// Cpu Debug // Cpu Debug
// Cpu Unsafe // Cpu Unsafe
INSERT(Settings, cpuopt_unsafe_unfuse_fma,
"Unfuse FMA (improve performance on CPUs without FMA)",
"This option improves speed by reducing accuracy of fused-multiply-add instructions on "
"CPUs without native FMA support.");
INSERT(Settings, cpuopt_unsafe_reduce_fp_error, "Faster FRSQRTE and FRECPE",
"This option improves the speed of some approximate floating-point functions by using "
"less accurate native approximations.");
INSERT(Settings, cpuopt_unsafe_ignore_standard_fpcr, "Faster ASIMD instructions (32 bits only)",
"This option improves the speed of 32 bits ASIMD floating-point functions by running "
"with incorrect rounding modes.");
INSERT(Settings, cpuopt_unsafe_inaccurate_nan, "Inaccurate NaN handling",
"This option improves speed by removing NaN checking. Please note this also reduces "
"accuracy of certain floating-point instructions.");
INSERT( INSERT(
Settings, cpuopt_unsafe_fastmem_check, "Disable address space checks", Settings, cpuopt_unsafe_unfuse_fma,
"This option improves speed by eliminating a safety check before every memory read/write " tr("Unfuse FMA (improve performance on CPUs without FMA)"),
"in guest. Disabling it may allow a game to read/write the emulator's memory."); tr("This option improves speed by reducing accuracy of fused-multiply-add instructions on "
INSERT(Settings, cpuopt_unsafe_ignore_global_monitor, "Ignore global monitor", "CPUs without native FMA support."));
"This option improves speed by relying only on the semantics of cmpxchg to ensure " INSERT(
Settings, cpuopt_unsafe_reduce_fp_error, tr("Faster FRSQRTE and FRECPE"),
tr("This option improves the speed of some approximate floating-point functions by using "
"less accurate native approximations."));
INSERT(Settings, cpuopt_unsafe_ignore_standard_fpcr,
tr("Faster ASIMD instructions (32 bits only)"),
tr("This option improves the speed of 32 bits ASIMD floating-point functions by running "
"with incorrect rounding modes."));
INSERT(Settings, cpuopt_unsafe_inaccurate_nan, tr("Inaccurate NaN handling"),
tr("This option improves speed by removing NaN checking. Please note this also reduces "
"accuracy of certain floating-point instructions."));
INSERT(Settings, cpuopt_unsafe_fastmem_check, tr("Disable address space checks"),
tr("This option improves speed by eliminating a safety check before every memory "
"read/write "
"in guest. Disabling it may allow a game to read/write the emulator's memory."));
INSERT(
Settings, cpuopt_unsafe_ignore_global_monitor, tr("Ignore global monitor"),
tr("This option improves speed by relying only on the semantics of cmpxchg to ensure "
"safety of exclusive access instructions. Please note this may result in deadlocks and " "safety of exclusive access instructions. Please note this may result in deadlocks and "
"other race conditions."); "other race conditions."));
// Renderer // Renderer
INSERT(Settings, renderer_backend, "API:", ""); INSERT(Settings, renderer_backend, tr("API:"), QStringLiteral());
INSERT(Settings, vulkan_device, "Device:", ""); INSERT(Settings, vulkan_device, tr("Device:"), QStringLiteral());
INSERT(Settings, shader_backend, "Shader Backend:", ""); INSERT(Settings, shader_backend, tr("Shader Backend:"), QStringLiteral());
INSERT(Settings, resolution_setup, "Resolution:", ""); INSERT(Settings, resolution_setup, tr("Resolution:"), QStringLiteral());
INSERT(Settings, scaling_filter, "Window Adapting Filter:", ""); INSERT(Settings, scaling_filter, tr("Window Adapting Filter:"), QStringLiteral());
INSERT(Settings, fsr_sharpening_slider, "FSR Sharpness:", ""); INSERT(Settings, fsr_sharpening_slider, tr("FSR Sharpness:"), QStringLiteral());
INSERT(Settings, anti_aliasing, "Anti-Aliasing Method:", ""); INSERT(Settings, anti_aliasing, tr("Anti-Aliasing Method:"), QStringLiteral());
INSERT(Settings, fullscreen_mode, "Fullscreen Mode:", ""); INSERT(Settings, fullscreen_mode, tr("Fullscreen Mode:"), QStringLiteral());
INSERT(Settings, aspect_ratio, "Aspect Ratio:", ""); INSERT(Settings, aspect_ratio, tr("Aspect Ratio:"), QStringLiteral());
INSERT(Settings, use_disk_shader_cache, "Use disk pipeline cache", ""); INSERT(Settings, use_disk_shader_cache, tr("Use disk pipeline cache"), QStringLiteral());
INSERT(Settings, use_asynchronous_gpu_emulation, "Use asynchronous GPU emulation", ""); INSERT(Settings, use_asynchronous_gpu_emulation, tr("Use asynchronous GPU emulation"),
INSERT(Settings, nvdec_emulation, "NVDEC emulation:", ""); QStringLiteral());
INSERT(Settings, accelerate_astc, "ASTC Decoding Method:", ""); INSERT(Settings, nvdec_emulation, tr("NVDEC emulation:"), QStringLiteral());
INSERT(Settings, astc_recompression, "ASTC Recompression Method:", ""); INSERT(Settings, accelerate_astc, tr("ASTC Decoding Method:"), QStringLiteral());
INSERT(Settings, vsync_mode, "VSync Mode:", INSERT(Settings, astc_recompression, tr("ASTC Recompression Method:"), QStringLiteral());
"FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen " INSERT(
Settings, vsync_mode, tr("VSync Mode:"),
tr("FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen "
"refresh rate.\nFIFO Relaxed is similar to FIFO but allows tearing as it recovers from " "refresh rate.\nFIFO Relaxed is similar to FIFO but allows tearing as it recovers from "
"a slow down.\nMailbox can have lower latency than FIFO and does not tear but may drop " "a slow down.\nMailbox can have lower latency than FIFO and does not tear but may drop "
"frames.\nImmediate (no synchronization) just presents whatever is available and can " "frames.\nImmediate (no synchronization) just presents whatever is available and can "
"exhibit tearing."); "exhibit tearing."));
INSERT(Settings, bg_red, "", ""); INSERT(Settings, bg_red, QStringLiteral(), QStringLiteral());
INSERT(Settings, bg_green, "", ""); INSERT(Settings, bg_green, QStringLiteral(), QStringLiteral());
INSERT(Settings, bg_blue, "", ""); INSERT(Settings, bg_blue, QStringLiteral(), QStringLiteral());
// Renderer (Advanced Graphics) // Renderer (Advanced Graphics)
INSERT(Settings, async_presentation, "Enable asynchronous presentation (Vulkan only)", ""); INSERT(Settings, async_presentation, tr("Enable asynchronous presentation (Vulkan only)"),
INSERT(Settings, renderer_force_max_clock, "Force maximum clocks (Vulkan only)", QStringLiteral());
"Runs work in the background while waiting for graphics commands to keep the GPU from " INSERT(
"lowering its clock speed."); Settings, renderer_force_max_clock, tr("Force maximum clocks (Vulkan only)"),
INSERT(Settings, max_anisotropy, "Anisotropic Filtering:", ""); tr("Runs work in the background while waiting for graphics commands to keep the GPU from "
INSERT(Settings, gpu_accuracy, "Accuracy Level:", ""); "lowering its clock speed."));
INSERT(Settings, use_asynchronous_shaders, "Use asynchronous shader building (Hack)", INSERT(Settings, max_anisotropy, tr("Anisotropic Filtering:"), QStringLiteral());
"Enables asynchronous shader compilation, which may reduce shader stutter. This feature " INSERT(Settings, gpu_accuracy, tr("Accuracy Level:"), QStringLiteral());
"is experimental."); INSERT(
INSERT(Settings, use_fast_gpu_time, "Use Fast GPU Time (Hack)", Settings, use_asynchronous_shaders, tr("Use asynchronous shader building (Hack)"),
"Enables Fast GPU Time. This option will force most games to run at their highest " tr("Enables asynchronous shader compilation, which may reduce shader stutter. This feature "
"native resolution."); "is experimental."));
INSERT(Settings, use_vulkan_driver_pipeline_cache, "Use Vulkan pipeline cache", INSERT(Settings, use_fast_gpu_time, tr("Use Fast GPU Time (Hack)"),
"Enables GPU vendor-specific pipeline cache. This option can improve shader loading " tr("Enables Fast GPU Time. This option will force most games to run at their highest "
"time significantly in cases where the Vulkan driver does not store pipeline cache " "native resolution."));
"files internally."); INSERT(Settings, use_vulkan_driver_pipeline_cache, tr("Use Vulkan pipeline cache"),
INSERT(Settings, enable_compute_pipelines, "Enable Compute Pipelines (Intel Vulkan Only)", tr("Enables GPU vendor-specific pipeline cache. This option can improve shader loading "
"Enable compute pipelines, required by some games.\nThis setting only exists for Intel " "time significantly in cases where the Vulkan driver does not store pipeline cache "
"files internally."));
INSERT(
Settings, enable_compute_pipelines, tr("Enable Compute Pipelines (Intel Vulkan Only)"),
tr("Enable compute pipelines, required by some games.\nThis setting only exists for Intel "
"proprietary drivers, and may crash if enabled.\nCompute pipelines are always enabled " "proprietary drivers, and may crash if enabled.\nCompute pipelines are always enabled "
"on all other drivers."); "on all other drivers."));
INSERT(Settings, use_reactive_flushing, "Enable Reactive Flushing", INSERT(
"Uses reactive flushing instead of predictive flushing, allowing more accurate memory " Settings, use_reactive_flushing, tr("Enable Reactive Flushing"),
"syncing."); tr("Uses reactive flushing instead of predictive flushing, allowing more accurate memory "
INSERT(Settings, use_video_framerate, "Sync to framerate of video playback", "syncing."));
"Run the game at normal speed during video playback, even when the framerate is " INSERT(Settings, use_video_framerate, tr("Sync to framerate of video playback"),
"unlocked."); tr("Run the game at normal speed during video playback, even when the framerate is "
INSERT(Settings, barrier_feedback_loops, "Barrier feedback loops", "unlocked."));
"Improves rendering of transparency effects in specific games."); INSERT(Settings, barrier_feedback_loops, tr("Barrier feedback loops"),
tr("Improves rendering of transparency effects in specific games."));
// Renderer (Debug) // Renderer (Debug)
// System // System
INSERT(Settings, rng_seed, "RNG Seed", ""); INSERT(Settings, rng_seed, tr("RNG Seed"), QStringLiteral());
INSERT(Settings, rng_seed_enabled, "", ""); INSERT(Settings, rng_seed_enabled, QStringLiteral(), QStringLiteral());
INSERT(Settings, device_name, "Device Name", ""); INSERT(Settings, device_name, tr("Device Name"), QStringLiteral());
INSERT(Settings, custom_rtc, "Custom RTC", ""); INSERT(Settings, custom_rtc, tr("Custom RTC"), QStringLiteral());
INSERT(Settings, custom_rtc_enabled, "", ""); INSERT(Settings, custom_rtc_enabled, QStringLiteral(), QStringLiteral());
INSERT(Settings, language_index, INSERT(Settings, language_index, tr("Language:"),
"Language:", "Note: this can be overridden when region setting is auto-select"); tr("Note: this can be overridden when region setting is auto-select"));
INSERT(Settings, region_index, "Region:", ""); INSERT(Settings, region_index, tr("Region:"), QStringLiteral());
INSERT(Settings, time_zone_index, "Time Zone:", ""); INSERT(Settings, time_zone_index, tr("Time Zone:"), QStringLiteral());
INSERT(Settings, sound_index, "Sound Output Mode:", ""); INSERT(Settings, sound_index, tr("Sound Output Mode:"), QStringLiteral());
INSERT(Settings, use_docked_mode, "Console Mode:", ""); INSERT(Settings, use_docked_mode, tr("Console Mode:"), QStringLiteral());
INSERT(Settings, current_user, "", ""); INSERT(Settings, current_user, QStringLiteral(), QStringLiteral());
// Controls // Controls
@ -154,11 +167,14 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
// Ui // Ui
// Ui General // Ui General
INSERT(UISettings, select_user_on_boot, "Prompt for user on game boot", ""); INSERT(UISettings, select_user_on_boot, tr("Prompt for user on game boot"), QStringLiteral());
INSERT(UISettings, pause_when_in_background, "Pause emulation when in background", ""); INSERT(UISettings, pause_when_in_background, tr("Pause emulation when in background"),
INSERT(UISettings, confirm_before_stopping, "Confirm before stopping emulation", ""); QStringLiteral());
INSERT(UISettings, hide_mouse, "Hide mouse on inactivity", ""); INSERT(UISettings, confirm_before_stopping, tr("Confirm before stopping emulation"),
INSERT(UISettings, controller_applet_disabled, "Disable controller applet", ""); QStringLiteral());
INSERT(UISettings, hide_mouse, tr("Hide mouse on inactivity"), QStringLiteral());
INSERT(UISettings, controller_applet_disabled, tr("Disable controller applet"),
QStringLiteral());
// Ui Debugging // Ui Debugging
@ -178,140 +194,141 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
return parent->tr(text, context); return parent->tr(text, context);
}; };
#define PAIR(ENUM, VALUE, TRANSLATION) \ #define PAIR(ENUM, VALUE, TRANSLATION) {static_cast<u32>(Settings::ENUM::VALUE), (TRANSLATION)}
{ static_cast<u32>(Settings::ENUM::VALUE), tr(TRANSLATION) }
#define CTX_PAIR(ENUM, VALUE, TRANSLATION, CONTEXT) \
{ static_cast<u32>(Settings::ENUM::VALUE), tr(TRANSLATION, CONTEXT) }
// Intentionally skipping VSyncMode to let the UI fill that one out // Intentionally skipping VSyncMode to let the UI fill that one out
translations->insert({Settings::EnumMetadata<Settings::AstcDecodeMode>::Index(), translations->insert({Settings::EnumMetadata<Settings::AstcDecodeMode>::Index(),
{ {
PAIR(AstcDecodeMode, Cpu, "CPU"), PAIR(AstcDecodeMode, Cpu, tr("CPU")),
PAIR(AstcDecodeMode, Gpu, "GPU"), PAIR(AstcDecodeMode, Gpu, tr("GPU")),
PAIR(AstcDecodeMode, CpuAsynchronous, "CPU Asynchronous"), PAIR(AstcDecodeMode, CpuAsynchronous, tr("CPU Asynchronous")),
}});
translations->insert({Settings::EnumMetadata<Settings::AstcRecompression>::Index(),
{
PAIR(AstcRecompression, Uncompressed, "Uncompressed (Best quality)"),
PAIR(AstcRecompression, Bc1, "BC1 (Low quality)"),
PAIR(AstcRecompression, Bc3, "BC3 (Medium quality)"),
}}); }});
translations->insert(
{Settings::EnumMetadata<Settings::AstcRecompression>::Index(),
{
PAIR(AstcRecompression, Uncompressed, tr("Uncompressed (Best quality)")),
PAIR(AstcRecompression, Bc1, tr("BC1 (Low quality)")),
PAIR(AstcRecompression, Bc3, tr("BC3 (Medium quality)")),
}});
translations->insert({Settings::EnumMetadata<Settings::RendererBackend>::Index(), translations->insert({Settings::EnumMetadata<Settings::RendererBackend>::Index(),
{ {
#ifdef HAS_OPENGL #ifdef HAS_OPENGL
PAIR(RendererBackend, OpenGL, "OpenGL"), PAIR(RendererBackend, OpenGL, tr("OpenGL")),
#endif #endif
PAIR(RendererBackend, Vulkan, "Vulkan"), PAIR(RendererBackend, Vulkan, tr("Vulkan")),
PAIR(RendererBackend, Null, "Null"), PAIR(RendererBackend, Null, tr("Null")),
}});
translations->insert({Settings::EnumMetadata<Settings::ShaderBackend>::Index(),
{
PAIR(ShaderBackend, Glsl, "GLSL"),
PAIR(ShaderBackend, Glasm, "GLASM (Assembly Shaders, NVIDIA Only)"),
PAIR(ShaderBackend, SpirV, "SPIR-V (Experimental, Mesa Only)"),
}}); }});
translations->insert(
{Settings::EnumMetadata<Settings::ShaderBackend>::Index(),
{
PAIR(ShaderBackend, Glsl, tr("GLSL")),
PAIR(ShaderBackend, Glasm, tr("GLASM (Assembly Shaders, NVIDIA Only)")),
PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, Mesa Only)")),
}});
translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(), translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
{ {
PAIR(GpuAccuracy, Normal, "Normal"), PAIR(GpuAccuracy, Normal, tr("Normal")),
PAIR(GpuAccuracy, High, "High"), PAIR(GpuAccuracy, High, tr("High")),
PAIR(GpuAccuracy, Extreme, "Extreme"), PAIR(GpuAccuracy, Extreme, tr("Extreme")),
}});
translations->insert({Settings::EnumMetadata<Settings::CpuAccuracy>::Index(),
{
PAIR(CpuAccuracy, Auto, "Auto"),
PAIR(CpuAccuracy, Accurate, "Accurate"),
PAIR(CpuAccuracy, Unsafe, "Unsafe"),
PAIR(CpuAccuracy, Paranoid, "Paranoid (disables most optimizations)"),
}}); }});
translations->insert(
{Settings::EnumMetadata<Settings::CpuAccuracy>::Index(),
{
PAIR(CpuAccuracy, Auto, tr("Auto")),
PAIR(CpuAccuracy, Accurate, tr("Accurate")),
PAIR(CpuAccuracy, Unsafe, tr("Unsafe")),
PAIR(CpuAccuracy, Paranoid, tr("Paranoid (disables most optimizations)")),
}});
translations->insert({Settings::EnumMetadata<Settings::FullscreenMode>::Index(), translations->insert({Settings::EnumMetadata<Settings::FullscreenMode>::Index(),
{ {
PAIR(FullscreenMode, Borderless, "Borderless Windowed"), PAIR(FullscreenMode, Borderless, tr("Borderless Windowed")),
PAIR(FullscreenMode, Exclusive, "Exclusive Fullscreen"), PAIR(FullscreenMode, Exclusive, tr("Exclusive Fullscreen")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::NvdecEmulation>::Index(), translations->insert({Settings::EnumMetadata<Settings::NvdecEmulation>::Index(),
{ {
PAIR(NvdecEmulation, Off, "No Video Output"), PAIR(NvdecEmulation, Off, tr("No Video Output")),
PAIR(NvdecEmulation, Cpu, "CPU Video Decoding"), PAIR(NvdecEmulation, Cpu, tr("CPU Video Decoding")),
PAIR(NvdecEmulation, Gpu, "GPU Video Decoding (Default)"), PAIR(NvdecEmulation, Gpu, tr("GPU Video Decoding (Default)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ResolutionSetup>::Index(),
{
PAIR(ResolutionSetup, Res1_2X, "0.5X (360p/540p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res3_4X, "0.75X (540p/810p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res1X, "1X (720p/1080p)"),
PAIR(ResolutionSetup, Res3_2X, "1.5X (1080p/1620p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res2X, "2X (1440p/2160p)"),
PAIR(ResolutionSetup, Res3X, "3X (2160p/3240p)"),
PAIR(ResolutionSetup, Res4X, "4X (2880p/4320p)"),
PAIR(ResolutionSetup, Res5X, "5X (3600p/5400p)"),
PAIR(ResolutionSetup, Res6X, "6X (4320p/6480p)"),
PAIR(ResolutionSetup, Res7X, "7X (5040p/7560p)"),
PAIR(ResolutionSetup, Res8X, "8X (5760p/8640p)"),
}}); }});
translations->insert(
{Settings::EnumMetadata<Settings::ResolutionSetup>::Index(),
{
PAIR(ResolutionSetup, Res1_2X, tr("0.5X (360p/540p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res3_4X, tr("0.75X (540p/810p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res1X, tr("1X (720p/1080p)")),
PAIR(ResolutionSetup, Res3_2X, tr("1.5X (1080p/1620p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res2X, tr("2X (1440p/2160p)")),
PAIR(ResolutionSetup, Res3X, tr("3X (2160p/3240p)")),
PAIR(ResolutionSetup, Res4X, tr("4X (2880p/4320p)")),
PAIR(ResolutionSetup, Res5X, tr("5X (3600p/5400p)")),
PAIR(ResolutionSetup, Res6X, tr("6X (4320p/6480p)")),
PAIR(ResolutionSetup, Res7X, tr("7X (5040p/7560p)")),
PAIR(ResolutionSetup, Res8X, tr("8X (5760p/8640p)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ScalingFilter>::Index(), translations->insert({Settings::EnumMetadata<Settings::ScalingFilter>::Index(),
{ {
PAIR(ScalingFilter, NearestNeighbor, "Nearest Neighbor"), PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
PAIR(ScalingFilter, Bilinear, "Bilinear"), PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
PAIR(ScalingFilter, Bicubic, "Bicubic"), PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
PAIR(ScalingFilter, Gaussian, "Gaussian"), PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
PAIR(ScalingFilter, ScaleForce, "ScaleForce"), PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
PAIR(ScalingFilter, Fsr, "AMD FidelityFX™ Super Resolution"), PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™ Super Resolution")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(), translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(),
{ {
PAIR(AntiAliasing, None, "None"), PAIR(AntiAliasing, None, tr("None")),
PAIR(AntiAliasing, Fxaa, "FXAA"), PAIR(AntiAliasing, Fxaa, tr("FXAA")),
PAIR(AntiAliasing, Smaa, "SMAA"), PAIR(AntiAliasing, Smaa, tr("SMAA")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::AspectRatio>::Index(), translations->insert({Settings::EnumMetadata<Settings::AspectRatio>::Index(),
{ {
PAIR(AspectRatio, R16_9, "Default (16:9)"), PAIR(AspectRatio, R16_9, tr("Default (16:9)")),
PAIR(AspectRatio, R4_3, "Force 4:3"), PAIR(AspectRatio, R4_3, tr("Force 4:3")),
PAIR(AspectRatio, R21_9, "Force 21:9"), PAIR(AspectRatio, R21_9, tr("Force 21:9")),
PAIR(AspectRatio, R16_10, "Force 16:10"), PAIR(AspectRatio, R16_10, tr("Force 16:10")),
PAIR(AspectRatio, Stretch, "Stretch to Window"), PAIR(AspectRatio, Stretch, tr("Stretch to Window")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::AnisotropyMode>::Index(), translations->insert({Settings::EnumMetadata<Settings::AnisotropyMode>::Index(),
{ {
PAIR(AnisotropyMode, Automatic, "Automatic"), PAIR(AnisotropyMode, Automatic, tr("Automatic")),
PAIR(AnisotropyMode, Default, "Default"), PAIR(AnisotropyMode, Default, tr("Default")),
PAIR(AnisotropyMode, X2, "2x"), PAIR(AnisotropyMode, X2, tr("2x")),
PAIR(AnisotropyMode, X4, "4x"), PAIR(AnisotropyMode, X4, tr("4x")),
PAIR(AnisotropyMode, X8, "8x"), PAIR(AnisotropyMode, X8, tr("8x")),
PAIR(AnisotropyMode, X16, "16x"), PAIR(AnisotropyMode, X16, tr("16x")),
}}); }});
translations->insert( translations->insert(
{Settings::EnumMetadata<Settings::Language>::Index(), {Settings::EnumMetadata<Settings::Language>::Index(),
{ {
PAIR(Language, Japanese, "Japanese (日本語)"), PAIR(Language, Japanese, tr("Japanese (日本語)")),
PAIR(Language, EnglishAmerican, "American English"), PAIR(Language, EnglishAmerican, tr("American English")),
PAIR(Language, French, "French (français)"), PAIR(Language, French, tr("French (français)")),
PAIR(Language, German, "German (Deutsch)"), PAIR(Language, German, tr("German (Deutsch)")),
PAIR(Language, Italian, "Italian (italiano)"), PAIR(Language, Italian, tr("Italian (italiano)")),
PAIR(Language, Spanish, "Spanish (español)"), PAIR(Language, Spanish, tr("Spanish (español)")),
PAIR(Language, Chinese, "Chinese"), PAIR(Language, Chinese, tr("Chinese")),
PAIR(Language, Korean, "Korean (한국어)"), PAIR(Language, Korean, tr("Korean (한국어)")),
PAIR(Language, Dutch, "Dutch (Nederlands)"), PAIR(Language, Dutch, tr("Dutch (Nederlands)")),
PAIR(Language, Portuguese, "Portuguese (português)"), PAIR(Language, Portuguese, tr("Portuguese (português)")),
PAIR(Language, Russian, "Russian (Русский)"), PAIR(Language, Russian, tr("Russian (Русский)")),
PAIR(Language, Taiwanese, "Taiwanese"), PAIR(Language, Taiwanese, tr("Taiwanese")),
PAIR(Language, EnglishBritish, "British English"), PAIR(Language, EnglishBritish, tr("British English")),
PAIR(Language, FrenchCanadian, "Canadian French"), PAIR(Language, FrenchCanadian, tr("Canadian French")),
PAIR(Language, SpanishLatin, "Latin American Spanish"), PAIR(Language, SpanishLatin, tr("Latin American Spanish")),
PAIR(Language, ChineseSimplified, "Simplified Chinese"), PAIR(Language, ChineseSimplified, tr("Simplified Chinese")),
PAIR(Language, ChineseTraditional, "Traditional Chinese (正體中文)"), PAIR(Language, ChineseTraditional, tr("Traditional Chinese (正體中文)")),
PAIR(Language, PortugueseBrazilian, "Brazilian Portuguese (português do Brasil)"), PAIR(Language, PortugueseBrazilian, tr("Brazilian Portuguese (português do Brasil)")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::Region>::Index(), translations->insert({Settings::EnumMetadata<Settings::Region>::Index(),
{ {
PAIR(Region, Japan, "Japan"), PAIR(Region, Japan, tr("Japan")),
PAIR(Region, Usa, "USA"), PAIR(Region, Usa, tr("USA")),
PAIR(Region, Europe, "Europe"), PAIR(Region, Europe, tr("Europe")),
PAIR(Region, Australia, "Australia"), PAIR(Region, Australia, tr("Australia")),
PAIR(Region, China, "China"), PAIR(Region, China, tr("China")),
PAIR(Region, Korea, "Korea"), PAIR(Region, Korea, tr("Korea")),
PAIR(Region, Taiwan, "Taiwan"), PAIR(Region, Taiwan, tr("Taiwan")),
}}); }});
translations->insert( translations->insert(
{Settings::EnumMetadata<Settings::TimeZone>::Index(), {Settings::EnumMetadata<Settings::TimeZone>::Index(),
@ -323,72 +340,74 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
{static_cast<u32>(Settings::TimeZone::Default), {static_cast<u32>(Settings::TimeZone::Default),
tr("Default (%1)", "Default time zone") tr("Default (%1)", "Default time zone")
.arg(QString::fromStdString(Common::TimeZone::GetDefaultTimeZone()))}, .arg(QString::fromStdString(Common::TimeZone::GetDefaultTimeZone()))},
PAIR(TimeZone, Cet, "CET"), PAIR(TimeZone, Cet, tr("CET")),
PAIR(TimeZone, Cst6Cdt, "CST6CDT"), PAIR(TimeZone, Cst6Cdt, tr("CST6CDT")),
PAIR(TimeZone, Cuba, "Cuba"), PAIR(TimeZone, Cuba, tr("Cuba")),
PAIR(TimeZone, Eet, "EET"), PAIR(TimeZone, Eet, tr("EET")),
PAIR(TimeZone, Egypt, "Egypt"), PAIR(TimeZone, Egypt, tr("Egypt")),
PAIR(TimeZone, Eire, "Eire"), PAIR(TimeZone, Eire, tr("Eire")),
PAIR(TimeZone, Est, "EST"), PAIR(TimeZone, Est, tr("EST")),
PAIR(TimeZone, Est5Edt, "EST5EDT"), PAIR(TimeZone, Est5Edt, tr("EST5EDT")),
PAIR(TimeZone, Gb, "GB"), PAIR(TimeZone, Gb, tr("GB")),
PAIR(TimeZone, GbEire, "GB-Eire"), PAIR(TimeZone, GbEire, tr("GB-Eire")),
PAIR(TimeZone, Gmt, "GMT"), PAIR(TimeZone, Gmt, tr("GMT")),
PAIR(TimeZone, GmtPlusZero, "GMT+0"), PAIR(TimeZone, GmtPlusZero, tr("GMT+0")),
PAIR(TimeZone, GmtMinusZero, "GMT-0"), PAIR(TimeZone, GmtMinusZero, tr("GMT-0")),
PAIR(TimeZone, GmtZero, "GMT0"), PAIR(TimeZone, GmtZero, tr("GMT0")),
PAIR(TimeZone, Greenwich, "Greenwich"), PAIR(TimeZone, Greenwich, tr("Greenwich")),
PAIR(TimeZone, Hongkong, "Hongkong"), PAIR(TimeZone, Hongkong, tr("Hongkong")),
PAIR(TimeZone, Hst, "HST"), PAIR(TimeZone, Hst, tr("HST")),
PAIR(TimeZone, Iceland, "Iceland"), PAIR(TimeZone, Iceland, tr("Iceland")),
PAIR(TimeZone, Iran, "Iran"), PAIR(TimeZone, Iran, tr("Iran")),
PAIR(TimeZone, Israel, "Israel"), PAIR(TimeZone, Israel, tr("Israel")),
PAIR(TimeZone, Jamaica, "Jamaica"), PAIR(TimeZone, Jamaica, tr("Jamaica")),
PAIR(TimeZone, Japan, "Japan"), PAIR(TimeZone, Japan, tr("Japan")),
PAIR(TimeZone, Kwajalein, "Kwajalein"), PAIR(TimeZone, Kwajalein, tr("Kwajalein")),
PAIR(TimeZone, Libya, "Libya"), PAIR(TimeZone, Libya, tr("Libya")),
PAIR(TimeZone, Met, "MET"), PAIR(TimeZone, Met, tr("MET")),
PAIR(TimeZone, Mst, "MST"), PAIR(TimeZone, Mst, tr("MST")),
PAIR(TimeZone, Mst7Mdt, "MST7MDT"), PAIR(TimeZone, Mst7Mdt, tr("MST7MDT")),
PAIR(TimeZone, Navajo, "Navajo"), PAIR(TimeZone, Navajo, tr("Navajo")),
PAIR(TimeZone, Nz, "NZ"), PAIR(TimeZone, Nz, tr("NZ")),
PAIR(TimeZone, NzChat, "NZ-CHAT"), PAIR(TimeZone, NzChat, tr("NZ-CHAT")),
PAIR(TimeZone, Poland, "Poland"), PAIR(TimeZone, Poland, tr("Poland")),
PAIR(TimeZone, Portugal, "Portugal"), PAIR(TimeZone, Portugal, tr("Portugal")),
PAIR(TimeZone, Prc, "PRC"), PAIR(TimeZone, Prc, tr("PRC")),
PAIR(TimeZone, Pst8Pdt, "PST8PDT"), PAIR(TimeZone, Pst8Pdt, tr("PST8PDT")),
PAIR(TimeZone, Roc, "ROC"), PAIR(TimeZone, Roc, tr("ROC")),
PAIR(TimeZone, Rok, "ROK"), PAIR(TimeZone, Rok, tr("ROK")),
PAIR(TimeZone, Singapore, "Singapore"), PAIR(TimeZone, Singapore, tr("Singapore")),
PAIR(TimeZone, Turkey, "Turkey"), PAIR(TimeZone, Turkey, tr("Turkey")),
PAIR(TimeZone, Uct, "UCT"), PAIR(TimeZone, Uct, tr("UCT")),
PAIR(TimeZone, Universal, "Universal"), PAIR(TimeZone, Universal, tr("Universal")),
PAIR(TimeZone, Utc, "UTC"), PAIR(TimeZone, Utc, tr("UTC")),
PAIR(TimeZone, WSu, "W-SU"), PAIR(TimeZone, WSu, tr("W-SU")),
PAIR(TimeZone, Wet, "WET"), PAIR(TimeZone, Wet, tr("WET")),
PAIR(TimeZone, Zulu, "Zulu"), PAIR(TimeZone, Zulu, tr("Zulu")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::AudioMode>::Index(), translations->insert({Settings::EnumMetadata<Settings::AudioMode>::Index(),
{ {
PAIR(AudioMode, Mono, "Mono"), PAIR(AudioMode, Mono, tr("Mono")),
PAIR(AudioMode, Stereo, "Stereo"), PAIR(AudioMode, Stereo, tr("Stereo")),
PAIR(AudioMode, Surround, "Surround"), PAIR(AudioMode, Surround, tr("Surround")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::MemoryLayout>::Index(), translations->insert({Settings::EnumMetadata<Settings::MemoryLayout>::Index(),
{ {
PAIR(MemoryLayout, Memory_4Gb, "4GB DRAM (Default)"), PAIR(MemoryLayout, Memory_4Gb, tr("4GB DRAM (Default)")),
PAIR(MemoryLayout, Memory_6Gb, "6GB DRAM (Unsafe)"), PAIR(MemoryLayout, Memory_6Gb, tr("6GB DRAM (Unsafe)")),
PAIR(MemoryLayout, Memory_8Gb, "8GB DRAM (Unsafe)"), PAIR(MemoryLayout, Memory_8Gb, tr("8GB DRAM (Unsafe)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ConsoleMode>::Index(),
{
PAIR(ConsoleMode, Docked, tr("Docked")),
PAIR(ConsoleMode, Handheld, tr("Handheld")),
}}); }});
translations->insert(
{Settings::EnumMetadata<Settings::ConsoleMode>::Index(),
{PAIR(ConsoleMode, Docked, "Docked"), PAIR(ConsoleMode, Handheld, "Handheld")}});
translations->insert( translations->insert(
{Settings::EnumMetadata<Settings::ConfirmStop>::Index(), {Settings::EnumMetadata<Settings::ConfirmStop>::Index(),
{ {
PAIR(ConfirmStop, Ask_Always, "Always ask (Default)"), PAIR(ConfirmStop, Ask_Always, tr("Always ask (Default)")),
PAIR(ConfirmStop, Ask_Based_On_Game, "Only if game specifies not to stop"), PAIR(ConfirmStop, Ask_Based_On_Game, tr("Only if game specifies not to stop")),
PAIR(ConfirmStop, Ask_Never, "Never ask"), PAIR(ConfirmStop, Ask_Never, tr("Never ask")),
}}); }});
#undef PAIR #undef PAIR

View File

@ -194,7 +194,7 @@ QWidget* Widget::CreateRadioGroup(std::function<std::string()>& serializer,
return group; return group;
} }
const auto get_selected = [=]() -> int { const auto get_selected = [this]() -> int {
for (const auto& [id, button] : radio_buttons) { for (const auto& [id, button] : radio_buttons) {
if (button->isChecked()) { if (button->isChecked()) {
return id; return id;
@ -203,7 +203,7 @@ QWidget* Widget::CreateRadioGroup(std::function<std::string()>& serializer,
return -1; return -1;
}; };
const auto set_index = [=](u32 value) { const auto set_index = [this](u32 value) {
for (const auto& [id, button] : radio_buttons) { for (const auto& [id, button] : radio_buttons) {
button->setChecked(id == value); button->setChecked(id == value);
} }

View File

@ -479,6 +479,6 @@ void GameListWorker::run() {
} }
} }
RecordEvent([=](GameList* game_list) { game_list->DonePopulating(watch_list); }); RecordEvent([this](GameList* game_list) { game_list->DonePopulating(watch_list); });
processing_completed.Set(); processing_completed.Set();
} }