Compare commits

...

15 Commits

Author SHA1 Message Date
e62da0afe1 Android #139 2023-11-23 00:57:23 +00:00
4047e9071f Merge PR 12074 2023-11-23 00:57:23 +00:00
d8f125ab10 Merge PR 11535 2023-11-23 00:57:23 +00:00
91c12db070 Merge pull request #12123 from merryhime/explicit-this
Explicit this
2023-11-21 22:17:42 -05:00
b088a448cd game_list_worker: Explicit caputure of 'this' 2023-11-21 22:57:47 +00:00
c4f6c3b00b shared_widget: Explicit capture of 'this' 2023-11-21 22:57:09 +00:00
cddb28cf26 Merge pull request #12107 from daisymlleung/patch-1
Stub CheckBlockedUserListAvailability for Super Bomberman R 2
2023-11-21 09:19:41 -05:00
538e137bca Merge pull request #12100 from t895/android-translations
translations: Add android translations to transifex config
2023-11-21 09:19:34 -05:00
e69118042f Merge pull request #12045 from liamwhite/codec-refactor
video_core: refactor video frame and packet parsing
2023-11-21 09:19:26 -05:00
a6b8d85b34 Merge pull request #11984 from lat9nq/lupdate
shared_translation: Call tr more directly
2023-11-21 09:19:13 -05:00
0216aa55b4 Stub CheckBlockedUserListAvailability 2023-11-21 01:57:58 +08:00
0298c2cc5d translations: Add android translations to transifex config 2023-11-20 10:37:09 -05:00
4055a476aa video_core: refactor video frame and packet parsing 2023-11-16 17:01:38 -05:00
cb3559539a CMakeLists: Add option to call lupdate directly
qt_create_translation silently fails to run at all on my system. Since
there is no error, I was unable to determine a fix. This sidesteps the
convenience function by setting up the rules ourselves.

This is left as an option since this path likely does not work on
Windows.
2023-11-08 11:54:05 -05:00
71cdfa6ad5 shared_translation: Call tr for each string
Qt can't parse tr called within a macro, so we must call it on each
string.

shared_translation: Remove redundant include
2023-11-08 11:54:01 -05:00
89 changed files with 3673 additions and 947 deletions

View File

@ -3,4 +3,4 @@
[codespell]
skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res
ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink
ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nce,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink

3
.gitmodules vendored
View File

@ -61,3 +61,6 @@
[submodule "breakpad"]
path = externals/breakpad
url = https://github.com/yuzu-emu/breakpad.git
[submodule "oaknut"]
path = externals/oaknut
url = https://github.com/merryhime/oaknut

View File

@ -1,3 +1,13 @@
| Pull Request | Commit | Title | Author | Merged? |
|----|----|----|----|----|
| [11535](https://github.com/yuzu-emu/yuzu//pull/11535) | [`50bcfa5fb`](https://github.com/yuzu-emu/yuzu//pull/11535/files) | renderer_vulkan: Introduce separate cmd buffer for uploads | [GPUCode](https://github.com/GPUCode/) | Yes |
| [12074](https://github.com/yuzu-emu/yuzu//pull/12074) | [`20b671baa`](https://github.com/yuzu-emu/yuzu//pull/12074/files) | Implement Native Code Execution (NCE) | [GPUCode](https://github.com/GPUCode/) | Yes |
End of merge log. You can find the original README.md below the break.
-----
<!--
SPDX-FileCopyrightText: 2018 yuzu Emulator Project
SPDX-License-Identifier: GPL-2.0-or-later

View File

@ -6,3 +6,8 @@ file_filter = <lang>.ts
source_file = en.ts
source_lang = en
type = QT
[o:yuzu-emulator:p:yuzu:r:yuzu-android]
file_filter = ../../src/android/app/src/main/res/values-<lang>/strings.xml
source_file = ../../src/android/app/src/main/res/values/strings.xml
type = ANDROID

View File

@ -20,6 +20,10 @@ if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
endif()
# Dynarmic
if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut)
add_subdirectory(oaknut)
endif()
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
set(DYNARMIC_IGNORE_ASSERTS ON)
add_subdirectory(dynarmic)

1
externals/oaknut vendored Submodule

Submodule externals/oaknut added at 316d8869e8

View File

@ -301,6 +301,11 @@ object NativeLibrary {
*/
external fun getPerfStats(): DoubleArray
/**
* Returns the current CPU backend.
*/
external fun getCpuBackend(): String
/**
* Notifies the core emulation that the orientation has changed.
*/

View File

@ -10,6 +10,7 @@ enum class IntSetting(
override val category: Settings.Category,
override val androidDefault: Int? = null
) : AbstractIntSetting {
CPU_BACKEND("cpu_backend", Settings.Category.Cpu),
CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu),
REGION_INDEX("region_index", Settings.Category.System),
LANGUAGE_INDEX("language_index", Settings.Category.System),

View File

@ -77,6 +77,15 @@ abstract class SettingsItem(
"%"
)
)
put(
SingleChoiceSetting(
IntSetting.CPU_BACKEND,
R.string.cpu_backend,
0,
R.array.cpuBackendNames,
R.array.cpuBackendValues
)
)
put(
SingleChoiceSetting(
IntSetting.CPU_ACCURACY,

View File

@ -269,6 +269,7 @@ class SettingsFragmentPresenter(
add(BooleanSetting.RENDERER_DEBUG.key)
add(HeaderSetting(R.string.cpu))
add(IntSetting.CPU_BACKEND.key)
add(IntSetting.CPU_ACCURACY.key)
add(BooleanSetting.CPU_DEBUG_MODE.key)
add(SettingsItem.FASTMEM_COMBINED)

View File

@ -414,8 +414,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback {
perfStatsUpdater = {
if (emulationViewModel.emulationStarted.value) {
val perfStats = NativeLibrary.getPerfStats()
val cpuBackend = NativeLibrary.getCpuBackend()
if (_binding != null) {
binding.showFpsText.text = String.format("FPS: %.1f", perfStats[FPS])
binding.showFpsText.text =
String.format("FPS: %.1f\n%s", perfStats[FPS], cpuBackend)
}
perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800)
}

View File

@ -177,6 +177,7 @@ void Config::ReadValues() {
ReadSetting("Core", Settings::values.memory_layout_mode);
// Cpu
ReadSetting("Cpu", Settings::values.cpu_backend);
ReadSetting("Cpu", Settings::values.cpu_accuracy);
ReadSetting("Cpu", Settings::values.cpu_debug_mode);
ReadSetting("Cpu", Settings::values.cpuopt_page_tables);

View File

@ -153,6 +153,10 @@ use_multi_core =
use_unsafe_extended_memory_layout =
[Cpu]
Selects the preferred CPU backend for executing ARM instructions
# 0 (default): Dynarmic, 1: NCE
cpu_backend =
# Adjusts various optimizations.
# Auto-select mode enables choice unsafe optimizations.
# Accurate enables only safe optimizations.

View File

@ -707,6 +707,14 @@ jdoubleArray Java_org_yuzu_yuzu_1emu_NativeLibrary_getPerfStats(JNIEnv* env, jcl
return j_stats;
}
jstring Java_org_yuzu_yuzu_1emu_NativeLibrary_getCpuBackend(JNIEnv* env, jclass clazz) {
if (Settings::IsNceEnabled()) {
return ToJString(env, "NCE");
}
return ToJString(env, "JIT");
}
void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env,
jclass clazz,
jstring j_path) {}

View File

@ -175,6 +175,16 @@
<item>2</item>
</integer-array>
<string-array name="cpuBackendNames">
<item>@string/cpu_backend_dynarmic</item>
<item>@string/cpu_backend_nce</item>
</string-array>
<integer-array name="cpuBackendValues">
<item>0</item>
<item>1</item>
</integer-array>
<string-array name="cpuAccuracyNames">
<item>@string/auto</item>
<item>@string/cpu_accuracy_accurate</item>

View File

@ -185,6 +185,7 @@
<string name="frame_limit_enable_description">Limits emulation speed to a specified percentage of normal speed.</string>
<string name="frame_limit_slider">Limit speed percent</string>
<string name="frame_limit_slider_description">Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit.</string>
<string name="cpu_backend">CPU Backend</string>
<string name="cpu_accuracy">CPU accuracy</string>
<string name="value_with_units">%1$s%2$s</string>
@ -416,6 +417,10 @@
<string name="ratio_force_sixteen_ten">Force 16:10</string>
<string name="ratio_stretch">Stretch to window</string>
<!-- CPU Backend -->
<string name="cpu_backend_dynarmic">Dynarmic (Slow)</string>
<string name="cpu_backend_nce">Native code execution (NCE)</string>
<!-- CPU Accuracy -->
<string name="cpu_accuracy_accurate">Accurate</string>
<string name="cpu_accuracy_unsafe">Unsafe</string>

View File

@ -52,6 +52,7 @@ add_library(common STATIC
fiber.cpp
fiber.h
fixed_point.h
free_region_manager.h
fs/file.cpp
fs/file.h
fs/fs.cpp
@ -166,6 +167,13 @@ if (WIN32)
target_link_libraries(common PRIVATE ntdll)
endif()
if (NOT WIN32)
target_sources(common PRIVATE
signal_chain.cpp
signal_chain.h
)
endif()
if(ANDROID)
target_sources(common
PRIVATE

View File

@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <boost/icl/interval_set.hpp>
namespace Common {
class FreeRegionManager {
public:
explicit FreeRegionManager() = default;
~FreeRegionManager() = default;
void SetAddressSpace(void* start, size_t size) {
this->FreeBlock(start, size);
}
std::pair<void*, size_t> FreeBlock(void* block_ptr, size_t size) {
std::scoped_lock lk(m_mutex);
// Check to see if we are adjacent to any regions.
auto start_address = reinterpret_cast<uintptr_t>(block_ptr);
auto end_address = start_address + size;
auto it = m_free_regions.find({start_address - 1, end_address + 1});
// If we are, join with them, ensuring we stay in bounds.
if (it != m_free_regions.end()) {
start_address = std::min(start_address, it->lower());
end_address = std::max(end_address, it->upper());
}
// Free the relevant region.
m_free_regions.insert({start_address, end_address});
// Return the adjusted pointers.
block_ptr = reinterpret_cast<void*>(start_address);
size = end_address - start_address;
return {block_ptr, size};
}
void AllocateBlock(void* block_ptr, size_t size) {
std::scoped_lock lk(m_mutex);
auto address = reinterpret_cast<uintptr_t>(block_ptr);
m_free_regions.subtract({address, address + size});
}
private:
std::mutex m_mutex;
boost::icl::interval_set<uintptr_t> m_free_regions;
};
} // namespace Common

View File

@ -21,15 +21,18 @@
#include <boost/icl/interval_set.hpp>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/random.h>
#include <unistd.h>
#include "common/scope_exit.h"
#endif // ^^^ Linux ^^^
#include <mutex>
#include <random>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/free_region_manager.h"
#include "common/host_memory.h"
#include "common/logging/log.h"
@ -141,7 +144,7 @@ public:
Release();
}
void Map(size_t virtual_offset, size_t host_offset, size_t length) {
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
std::unique_lock lock{placeholder_mutex};
if (!IsNiechePlaceholder(virtual_offset, length)) {
Split(virtual_offset, length);
@ -160,7 +163,7 @@ public:
}
}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
DWORD new_flags{};
if (read && write) {
new_flags = PAGE_READWRITE;
@ -186,6 +189,11 @@ public:
}
}
void EnableDirectMappedAddress() {
// TODO
UNREACHABLE();
}
const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
@ -353,6 +361,64 @@ private:
#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv
#ifdef ARCHITECTURE_arm64
static uint64_t GetRandomU64() {
uint64_t ret;
ASSERT(getrandom(&ret, sizeof(ret), 0) == 0);
return ret;
}
static void* ChooseVirtualBase(size_t virtual_size) {
constexpr uintptr_t Map39BitSize = (1ULL << 39);
constexpr uintptr_t Map36BitSize = (1ULL << 36);
// Seed the MT with some initial strong randomness.
//
// This is not a cryptographic application, we just want something more
// random than the current time.
std::mt19937_64 rng(GetRandomU64());
// We want to ensure we are allocating at an address aligned to the L2 block size.
// For Qualcomm devices, we must also allocate memory above 36 bits.
const size_t lower = Map36BitSize / HugePageSize;
const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
const size_t range = upper - lower;
// Try up to 64 times to allocate memory at random addresses in the range.
for (int i = 0; i < 64; i++) {
// Calculate a possible location.
uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize;
// Try to map.
// Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here.
void* map_pointer =
mmap(reinterpret_cast<void*>(hint_address), virtual_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
// If we successfully mapped, we're done.
if (reinterpret_cast<uintptr_t>(map_pointer) == hint_address) {
return map_pointer;
}
// Unmap if necessary, and try again.
if (map_pointer != MAP_FAILED) {
munmap(map_pointer, virtual_size);
}
}
return MAP_FAILED;
}
#else
static void* ChooseVirtualBase(size_t virtual_size) {
return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
}
#endif
class HostMemory::Impl {
public:
explicit Impl(size_t backing_size_, size_t virtual_size_)
@ -415,8 +481,7 @@ public:
}
}
#else
virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
virtual_base = virtual_map_base = static_cast<u8*>(ChooseVirtualBase(virtual_size));
if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
throw std::bad_alloc{};
@ -424,7 +489,7 @@ public:
madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
#endif
placeholders.add({0, virtual_size});
free_manager.SetAddressSpace(virtual_base, virtual_size);
good = true;
}
@ -432,14 +497,29 @@ public:
Release();
}
void Map(size_t virtual_offset, size_t host_offset, size_t length) {
{
std::scoped_lock lock{placeholder_mutex};
placeholders.subtract({virtual_offset, virtual_offset + length});
}
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
// Intersect the range with our address space.
AdjustMap(&virtual_offset, &length);
void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, fd, host_offset);
// We are removing a placeholder.
free_manager.AllocateBlock(virtual_base + virtual_offset, length);
// Deduce mapping protection flags.
int flags = PROT_NONE;
if (True(perms & MemoryPermission::Read)) {
flags |= PROT_READ;
}
if (True(perms & MemoryPermission::Write)) {
flags |= PROT_WRITE;
}
#ifdef ARCHITECTURE_arm64
if (True(perms & MemoryPermission::Execute)) {
flags |= PROT_EXEC;
}
#endif
void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd,
host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
}
@ -447,47 +527,54 @@ public:
// The method name is wrong. We're still talking about the virtual range.
// We don't want to unmap, we want to reserve this memory.
{
std::scoped_lock lock{placeholder_mutex};
auto it = placeholders.find({virtual_offset - 1, virtual_offset + length + 1});
// Intersect the range with our address space.
AdjustMap(&virtual_offset, &length);
if (it != placeholders.end()) {
size_t prev_upper = virtual_offset + length;
virtual_offset = std::min(virtual_offset, it->lower());
length = std::max(it->upper(), prev_upper) - virtual_offset;
}
// Merge with any adjacent placeholder mappings.
auto [merged_pointer, merged_size] =
free_manager.FreeBlock(virtual_base + virtual_offset, length);
placeholders.add({virtual_offset, virtual_offset + length});
}
void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE,
void* ret = mmap(merged_pointer, merged_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
int flags = 0;
void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
// Intersect the range with our address space.
AdjustMap(&virtual_offset, &length);
int flags = PROT_NONE;
if (read) {
flags |= PROT_READ;
}
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCHITECTURE_arm64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(virtual_base + virtual_offset, length, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
void EnableDirectMappedAddress() {
virtual_base = nullptr;
}
const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
u8* virtual_map_base{reinterpret_cast<u8*>(MAP_FAILED)};
private:
/// Release all resources in the object
void Release() {
if (virtual_base != MAP_FAILED) {
int ret = munmap(virtual_base, virtual_size);
if (virtual_map_base != MAP_FAILED) {
int ret = munmap(virtual_map_base, virtual_size);
ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
}
@ -502,10 +589,29 @@ private:
}
}
int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
void AdjustMap(size_t* virtual_offset, size_t* length) {
if (virtual_base != nullptr) {
return;
}
boost::icl::interval_set<size_t> placeholders; ///< Mapped placeholders
std::mutex placeholder_mutex; ///< Mutex for placeholders
// If we are direct mapped, we want to make sure we are operating on a region
// that is in range of our virtual mapping.
size_t intended_start = *virtual_offset;
size_t intended_end = intended_start + *length;
size_t address_space_start = reinterpret_cast<size_t>(virtual_map_base);
size_t address_space_end = address_space_start + virtual_size;
if (address_space_start > intended_end || intended_start > address_space_end) {
*virtual_offset = 0;
*length = 0;
} else {
*virtual_offset = std::max(intended_start, address_space_start);
*length = std::min(intended_end, address_space_end) - *virtual_offset;
}
}
int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
FreeRegionManager free_manager{};
};
#else // ^^^ Linux ^^^ vvv Generic vvv
@ -518,11 +624,11 @@ public:
throw std::bad_alloc{};
}
void Map(size_t virtual_offset, size_t host_offset, size_t length) {}
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {}
void Unmap(size_t virtual_offset, size_t length) {}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {}
void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {}
u8* backing_base{nullptr};
u8* virtual_base{nullptr};
@ -535,15 +641,16 @@ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
try {
// Try to allocate a fastmem arena.
// The implementation will fail with std::bad_alloc on errors.
impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment),
AlignUp(virtual_size, PageAlignment) +
3 * HugePageSize);
impl =
std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment),
AlignUp(virtual_size, PageAlignment) + HugePageSize);
backing_base = impl->backing_base;
virtual_base = impl->virtual_base;
if (virtual_base) {
virtual_base += 2 * HugePageSize - 1;
virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1);
// Ensure the virtual base is aligned to the L2 block size.
virtual_base = reinterpret_cast<u8*>(
Common::AlignUp(reinterpret_cast<uintptr_t>(virtual_base), HugePageSize));
virtual_base_offset = virtual_base - impl->virtual_base;
}
@ -562,7 +669,8 @@ HostMemory::HostMemory(HostMemory&&) noexcept = default;
HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
MemoryPermission perms) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(host_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
@ -571,7 +679,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
if (length == 0 || !virtual_base || !impl) {
return;
}
impl->Map(virtual_offset + virtual_base_offset, host_offset, length);
impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms);
}
void HostMemory::Unmap(size_t virtual_offset, size_t length) {
@ -584,14 +692,22 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) {
impl->Unmap(virtual_offset + virtual_base_offset, length);
}
void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) {
void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write,
bool execute) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (length == 0 || !virtual_base || !impl) {
return;
}
impl->Protect(virtual_offset + virtual_base_offset, length, read, write);
impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute);
}
void HostMemory::EnableDirectMappedAddress() {
if (impl) {
impl->EnableDirectMappedAddress();
virtual_size += reinterpret_cast<uintptr_t>(virtual_base);
}
}
} // namespace Common

View File

@ -4,11 +4,20 @@
#pragma once
#include <memory>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/virtual_buffer.h"
namespace Common {
enum class MemoryPermission : u32 {
Read = 1 << 0,
Write = 1 << 1,
ReadWrite = Read | Write,
Execute = 1 << 2,
};
DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission)
/**
* A low level linear memory buffer, which supports multiple mappings
* Its purpose is to rebuild a given sparse memory layout, including mirrors.
@ -31,11 +40,13 @@ public:
HostMemory(HostMemory&& other) noexcept;
HostMemory& operator=(HostMemory&& other) noexcept;
void Map(size_t virtual_offset, size_t host_offset, size_t length);
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms);
void Unmap(size_t virtual_offset, size_t length);
void Protect(size_t virtual_offset, size_t length, bool read, bool write);
void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute = false);
void EnableDirectMappedAddress();
[[nodiscard]] u8* BackingBasePointer() noexcept {
return backing_base;

View File

@ -41,6 +41,7 @@ SWITCHABLE(AspectRatio, true);
SWITCHABLE(AstcDecodeMode, true);
SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true);
@ -155,6 +156,22 @@ bool IsFastmemEnabled() {
return true;
}
static bool is_nce_enabled = false;
void SetNceEnabled(bool is_39bit) {
const bool is_nce_selected = values.cpu_backend.GetValue() == CpuBackend::Nce;
is_nce_enabled = IsFastmemEnabled() && is_nce_selected && is_39bit;
if (is_nce_selected && !is_nce_enabled) {
LOG_WARNING(
Common,
"Program does not utilize 39-bit address space, unable to natively execute code");
}
}
bool IsNceEnabled() {
return is_nce_enabled;
}
bool IsDockedMode() {
return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked;
}

View File

@ -63,6 +63,7 @@ SWITCHABLE(AspectRatio, true);
SWITCHABLE(AstcDecodeMode, true);
SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true);
@ -179,6 +180,14 @@ struct Values {
&use_speed_limit};
// Cpu
SwitchableSetting<CpuBackend, true> cpu_backend{
linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic,
#ifdef ARCHITECTURE_arm64
CpuBackend::Nce,
#else
CpuBackend::Dynarmic,
#endif
"cpu_backend", Category::Cpu};
SwitchableSetting<CpuAccuracy, true> cpu_accuracy{linkage, CpuAccuracy::Auto,
CpuAccuracy::Auto, CpuAccuracy::Paranoid,
"cpu_accuracy", Category::Cpu};
@ -358,6 +367,8 @@ struct Values {
Category::RendererDebug};
// TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control
bool renderer_amdvlk_depth_bias_workaround{};
Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
Category::RendererDebug};
// System
SwitchableSetting<Language, true> language_index{linkage,
@ -534,6 +545,8 @@ bool IsGPULevelExtreme();
bool IsGPULevelHigh();
bool IsFastmemEnabled();
void SetNceEnabled(bool is_64bit);
bool IsNceEnabled();
bool IsDockedMode();

View File

@ -129,6 +129,8 @@ ENUM(ShaderBackend, Glsl, Glasm, SpirV);
ENUM(GpuAccuracy, Normal, High, Extreme);
ENUM(CpuBackend, Dynarmic, Nce);
ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid);
ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb);

View File

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <dlfcn.h>
#include "common/assert.h"
#include "common/dynamic_library.h"
#include "common/scope_exit.h"
#include "common/signal_chain.h"
namespace Common {
template <typename T>
T* LookupLibcSymbol(const char* name) {
#if defined(__BIONIC__)
Common::DynamicLibrary provider("libc.so");
if (!provider.IsOpen()) {
UNREACHABLE_MSG("Failed to open libc!");
}
#else
// For other operating environments, we assume the symbol is not overridden.
const char* base = nullptr;
Common::DynamicLibrary provider(base);
#endif
void* sym = provider.GetSymbolAddress(name);
if (sym == nullptr) {
sym = dlsym(RTLD_DEFAULT, name);
}
if (sym == nullptr) {
UNREACHABLE_MSG("Unable to find symbol {}!", name);
}
return reinterpret_cast<T*>(sym);
}
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) {
static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
return libc_sigaction(signum, act, oldact);
}
} // namespace Common

19
src/common/signal_chain.h Normal file
View File

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#ifndef _WIN32
#include <signal.h>
namespace Common {
// Android's ART overrides sigaction with its own wrapper. This is problematic for SIGSEGV
// in particular, because ART's handler accesses tpidr_el0, which conflicts with NCE.
// This extracts the libc symbol and calls it directly.
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact);
} // namespace Common
#endif

View File

@ -921,6 +921,23 @@ if (ENABLE_WEB_SERVICE)
target_link_libraries(core PRIVATE web_service)
endif()
if (ARCHITECTURE_arm64 AND (ANDROID OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
target_compile_definitions(core PRIVATE -DHAS_NCE)
enable_language(C ASM)
set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp")
target_sources(core PRIVATE
arm/nce/arm_nce.cpp
arm/nce/arm_nce.h
arm/nce/arm_nce.s
arm/nce/guest_context.h
arm/nce/patch.cpp
arm/nce/patch.h
arm/nce/instructions.h
)
target_link_libraries(core PRIVATE merry::oaknut)
endif()
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_sources(core PRIVATE
arm/dynarmic/arm_dynarmic.h

View File

@ -201,6 +201,8 @@ void ARM_Interface::Run() {
if (True(hr & HaltReason::DataAbort)) {
if (system.DebuggerEnabled()) {
system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
} else {
LogBacktrace();
}
current_thread->RequestSuspend(SuspendType::Debug);
break;

View File

@ -81,6 +81,9 @@ public:
// thread context to be 800 bytes in size.
static_assert(sizeof(ThreadContext64) == 0x320);
/// Perform any backend-specific initialization.
virtual void Initialize() {}
/// Runs the CPU until an event happens
void Run();

View File

@ -0,0 +1,400 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cinttypes>
#include <memory>
#include "common/signal_chain.h"
#include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/patch.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/hle/kernel/k_process.h"
#include <signal.h>
#include <sys/syscall.h>
#include <unistd.h>
namespace Core {
namespace {
struct sigaction g_orig_action;
// Verify assembly offsets.
using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext);
static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
while (header->magic != FPSIMD_MAGIC) {
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
}
return reinterpret_cast<fpsimd_context*>(header);
}
} // namespace
void* ARM_NCE::RestoreGuestContext(void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Thread-local parameters will be located in x9.
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
// Save host callee-saved registers.
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
sizeof(guest_ctx->host_ctx.host_saved_vregs));
std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
sizeof(guest_ctx->host_ctx.host_saved_regs));
// Save stack pointer.
guest_ctx->host_ctx.host_sp = host_ctx.sp;
// Restore all guest state except tpidr_el0.
host_ctx.sp = guest_ctx->sp;
host_ctx.pc = guest_ctx->pc;
host_ctx.pstate = guest_ctx->pstate;
fpctx->fpcr = guest_ctx->fpcr;
fpctx->fpsr = guest_ctx->fpsr;
std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
// Return the new thread-local storage pointer.
return tpidr;
}
void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
// Save all guest registers except tpidr_el0.
std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
guest_ctx->fpsr = fpctx->fpsr;
guest_ctx->fpcr = fpctx->fpcr;
guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
guest_ctx->pc = host_ctx.pc;
guest_ctx->sp = host_ctx.sp;
// Restore stack pointer.
host_ctx.sp = guest_ctx->host_ctx.host_sp;
// Restore host callee-saved registers.
std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
sizeof(guest_ctx->host_ctx.host_saved_regs));
std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
sizeof(guest_ctx->host_ctx.host_saved_vregs));
// Return from the call on exit by setting pc to x30.
host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
// Clear esr_el1 and return it.
host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
}
bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
auto* info = static_cast<siginfo_t*>(raw_info);
// Try to handle an invalid access.
// TODO: handle accesses which split a page?
const Common::ProcessAddress addr =
(reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
// We handled the access successfully and are returning to guest code.
return true;
}
// We can't handle the access, so determine why we crashed.
const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
// For data aborts, skip the instruction and return to guest code.
// This will allow games to continue in many scenarios where they would otherwise crash.
if (!is_prefetch_abort) {
host_ctx.pc += 4;
return true;
}
// This is a prefetch abort.
guest_ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::PrefetchAbort));
// Forcibly mark the context as locked. We are still running.
// We may race with SignalInterrupt here:
// - If we lose the race, then SignalInterrupt will send us a signal we are masking,
// and it will do nothing when it is unmasked, as we have already left guest code.
// - If we win the race, then SignalInterrupt will wait for us to unlock first.
auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
thread_params.lock.store(SpinLockLocked);
// Return to host.
SaveGuestContext(guest_ctx, raw_context);
return false;
}
void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
}
HaltReason ARM_NCE::RunJit() {
// Get the thread parameters.
// TODO: pass the current thread down from ::Run
auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
auto* thread_params = &thread->GetNativeExecutionParameters();
{
// Lock our core context.
std::scoped_lock lk{lock};
// We should not be running.
ASSERT(running_thread == nullptr);
// Check if we need to run. If we have already been halted, we are done.
u64 halt = guest_ctx.esr_el1.exchange(0);
if (halt != 0) {
return static_cast<HaltReason>(halt);
}
// Mark that we are running.
running_thread = thread;
// Acquire the lock on the thread parameters.
// This allows us to force synchronization with SignalInterrupt.
LockThreadParameters(thread_params);
}
// Assign current members.
guest_ctx.parent = this;
thread_params->native_context = &guest_ctx;
thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
thread_params->is_running = true;
HaltReason halt{};
// TODO: finding and creating the post handler needs to be locked
// to deal with dynamic loading of NROs.
const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
} else {
halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
}
// Unload members.
// The thread does not change, so we can persist the old reference.
guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
thread_params->native_context = nullptr;
thread_params->is_running = false;
// Unlock the thread parameters.
UnlockThreadParameters(thread_params);
{
// Lock the core context.
std::scoped_lock lk{lock};
// On exit, we no longer have an active thread.
running_thread = nullptr;
}
// Return the halt reason.
return halt;
}
HaltReason ARM_NCE::StepJit() {
return HaltReason::StepThread;
}
u32 ARM_NCE::GetSvcNumber() const {
return guest_ctx.svc_swi;
}
ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
: ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
guest_ctx.system = &system_;
}
ARM_NCE::~ARM_NCE() = default;
void ARM_NCE::Initialize() {
thread_id = gettid();
// Setup our signals
static std::once_flag flag;
std::call_once(flag, [] {
using HandlerType = decltype(sigaction::sa_sigaction);
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal);
sigaddset(&signal_mask, BreakFromRunCodeSignal);
sigaddset(&signal_mask, GuestFaultSignal);
struct sigaction return_to_run_code_action {};
return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
&ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
return_to_run_code_action.sa_mask = signal_mask;
Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
nullptr);
struct sigaction break_from_run_code_action {};
break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
break_from_run_code_action.sa_sigaction =
reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
break_from_run_code_action.sa_mask = signal_mask;
Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);
struct sigaction fault_action {};
fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
fault_action.sa_sigaction =
reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
fault_action.sa_mask = signal_mask;
Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);
// Simplify call for g_orig_action.
// These fields occupy the same space in memory, so this should be a no-op in practice.
if (!(g_orig_action.sa_flags & SA_SIGINFO)) {
g_orig_action.sa_sigaction =
reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler);
}
});
}
void ARM_NCE::SetPC(u64 pc) {
guest_ctx.pc = pc;
}
u64 ARM_NCE::GetPC() const {
return guest_ctx.pc;
}
u64 ARM_NCE::GetSP() const {
return guest_ctx.sp;
}
u64 ARM_NCE::GetReg(int index) const {
return guest_ctx.cpu_registers[index];
}
void ARM_NCE::SetReg(int index, u64 value) {
guest_ctx.cpu_registers[index] = value;
}
u128 ARM_NCE::GetVectorReg(int index) const {
return guest_ctx.vector_registers[index];
}
void ARM_NCE::SetVectorReg(int index, u128 value) {
guest_ctx.vector_registers[index] = value;
}
u32 ARM_NCE::GetPSTATE() const {
return guest_ctx.pstate;
}
void ARM_NCE::SetPSTATE(u32 pstate) {
guest_ctx.pstate = pstate;
}
u64 ARM_NCE::GetTlsAddress() const {
return guest_ctx.tpidrro_el0;
}
void ARM_NCE::SetTlsAddress(u64 address) {
guest_ctx.tpidrro_el0 = address;
}
u64 ARM_NCE::GetTPIDR_EL0() const {
return guest_ctx.tpidr_el0;
}
void ARM_NCE::SetTPIDR_EL0(u64 value) {
guest_ctx.tpidr_el0 = value;
}
void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
ctx.cpu_registers = guest_ctx.cpu_registers;
ctx.sp = guest_ctx.sp;
ctx.pc = guest_ctx.pc;
ctx.pstate = guest_ctx.pstate;
ctx.vector_registers = guest_ctx.vector_registers;
ctx.fpcr = guest_ctx.fpcr;
ctx.fpsr = guest_ctx.fpsr;
ctx.tpidr = guest_ctx.tpidr_el0;
}
void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
guest_ctx.cpu_registers = ctx.cpu_registers;
guest_ctx.sp = ctx.sp;
guest_ctx.pc = ctx.pc;
guest_ctx.pstate = ctx.pstate;
guest_ctx.vector_registers = ctx.vector_registers;
guest_ctx.fpcr = ctx.fpcr;
guest_ctx.fpsr = ctx.fpsr;
guest_ctx.tpidr_el0 = ctx.tpidr;
}
void ARM_NCE::SignalInterrupt() {
// Lock core context.
std::scoped_lock lk{lock};
// Add break loop condition.
guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
// If there is no thread running, we are done.
if (running_thread == nullptr) {
return;
}
// Lock the thread context.
auto* params = &running_thread->GetNativeExecutionParameters();
LockThreadParameters(params);
if (params->is_running) {
// We should signal to the running thread.
// The running thread will unlock the thread context.
syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
} else {
// If the thread is no longer running, we have nothing to do.
UnlockThreadParameters(params);
}
}
void ARM_NCE::ClearInterrupt() {
guest_ctx.esr_el1 = {};
}
void ARM_NCE::ClearInstructionCache() {
// TODO: This is not possible to implement correctly on Linux because
// we do not have any access to ic iallu.
// Require accesses to complete.
std::atomic_thread_fence(std::memory_order_seq_cst);
}
void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
this->ClearInstructionCache();
}
void ARM_NCE::ClearExclusiveState() {
// No-op.
}
void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
std::size_t new_address_space_size_in_bits) {
// No-op. Page table is never used.
}
} // namespace Core

108
src/core/arm/nce/arm_nce.h Normal file
View File

@ -0,0 +1,108 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <memory>
#include <span>
#include <unordered_map>
#include <vector>
#include "core/arm/arm_interface.h"
#include "core/arm/nce/guest_context.h"
namespace Core::Memory {
class Memory;
}
namespace Core {
class System;
class ARM_NCE final : public ARM_Interface {
public:
ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
~ARM_NCE() override;
void Initialize() override;
void SetPC(u64 pc) override;
u64 GetPC() const override;
u64 GetSP() const override;
u64 GetReg(int index) const override;
void SetReg(int index, u64 value) override;
u128 GetVectorReg(int index) const override;
void SetVectorReg(int index, u128 value) override;
u32 GetPSTATE() const override;
void SetPSTATE(u32 pstate) override;
u64 GetTlsAddress() const override;
void SetTlsAddress(u64 address) override;
void SetTPIDR_EL0(u64 value) override;
u64 GetTPIDR_EL0() const override;
Architecture GetArchitecture() const override {
return Architecture::Aarch64;
}
void SaveContext(ThreadContext32& ctx) const override {}
void SaveContext(ThreadContext64& ctx) const override;
void LoadContext(const ThreadContext32& ctx) override {}
void LoadContext(const ThreadContext64& ctx) override;
void SignalInterrupt() override;
void ClearInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
void InvalidateCacheRange(u64 addr, std::size_t size) override;
void PageTableChanged(Common::PageTable& new_page_table,
std::size_t new_address_space_size_in_bits) override;
protected:
HaltReason RunJit() override;
HaltReason StepJit() override;
u32 GetSvcNumber() const override;
const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
return nullptr;
}
void RewindBreakpointInstruction() override {}
private:
// Assembly definitions.
static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
u64 trampoline_addr);
static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr);
static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info,
void* raw_context);
static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context);
static void GuestFaultSignalHandler(int sig, void* info, void* raw_context);
static void LockThreadParameters(void* tpidr);
static void UnlockThreadParameters(void* tpidr);
private:
// C++ implementation functions for assembly definitions.
static void* RestoreGuestContext(void* raw_context);
static void SaveGuestContext(GuestContext* ctx, void* raw_context);
static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context);
static void HandleHostFault(int sig, void* info, void* raw_context);
public:
// Members set on initialization.
std::size_t core_index{};
pid_t thread_id{-1};
// Core context.
GuestContext guest_ctx;
// Thread and invalidation info.
std::mutex lock;
Kernel::KThread* running_thread{};
};
} // namespace Core

222
src/core/arm/nce/arm_nce.s Normal file
View File

@ -0,0 +1,222 @@
/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "core/arm/nce/arm_nce_asm_definitions.h"
#define LOAD_IMMEDIATE_32(reg, val) \
mov reg, #(((val) >> 0x00) & 0xFFFF); \
movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16
/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
/* Back up host sp to x3. */
/* Back up host tpidr_el0 to x4. */
mov x3, sp
mrs x4, tpidr_el0
/* Load guest sp. x5 is used as a scratch register. */
ldr x5, [x1, #(GuestContextSp)]
mov sp, x5
/* Offset GuestContext pointer to the host member. */
add x5, x1, #(GuestContextHostContext)
/* Save original host sp and tpidr_el0 (x3, x4) to host context. */
stp x3, x4, [x5, #(HostContextSpTpidrEl0)]
/* Save all callee-saved host GPRs. */
stp x19, x20, [x5, #(HostContextRegs+0x0)]
stp x21, x22, [x5, #(HostContextRegs+0x10)]
stp x23, x24, [x5, #(HostContextRegs+0x20)]
stp x25, x26, [x5, #(HostContextRegs+0x30)]
stp x27, x28, [x5, #(HostContextRegs+0x40)]
stp x29, x30, [x5, #(HostContextRegs+0x50)]
/* Save all callee-saved host FPRs. */
stp q8, q9, [x5, #(HostContextVregs+0x0)]
stp q10, q11, [x5, #(HostContextVregs+0x20)]
stp q12, q13, [x5, #(HostContextVregs+0x40)]
stp q14, q15, [x5, #(HostContextVregs+0x60)]
/* Load guest tpidr_el0 from argument. */
msr tpidr_el0, x0
/* Tail call the trampoline to restore guest state. */
br x2
/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
/* This jumps to the signal handler, which will restore the entire context. */
/* On entry, x0 = thread id, which is already in the right place. */
/* Move tpidr to x9 so it is not trampled. */
mov x9, x1
/* Set up arguments. */
mov x8, #(__NR_tkill)
mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
/* Tail call the signal handler. */
svc #0
/* Block execution from flowing here. */
brk #1000
/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
stp x29, x30, [sp, #-0x10]!
mov x29, sp
/* Call the context restorer with the raw context. */
mov x0, x2
bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv
/* Save the old value of tpidr_el0. */
mrs x8, tpidr_el0
ldr x9, [x0, #(TpidrEl0NativeContext)]
str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)]
/* Set our new tpidr_el0. */
msr tpidr_el0, x0
/* Unlock the context. */
bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
/* Returning from here will enter the guest. */
ldp x29, x30, [sp], #0x10
ret
/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
LOAD_IMMEDIATE_32(w10, TlsMagic)
cmp w9, w10
b.ne 1f
/* Correct TLS magic, so this is a guest interrupt. */
/* Restore host tpidr_el0. */
ldr x0, [x8, #(TpidrEl0NativeContext)]
ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
msr tpidr_el0, x3
/* Tail call the restorer. */
mov x1, x2
b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
/* Returning from here will enter host code. */
1:
/* Incorrect TLS magic, so this is a spurious signal. */
ret
/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
LOAD_IMMEDIATE_32(w10, TlsMagic)
cmp w9, w10
b.eq 1f
/* Incorrect TLS magic, so this is a host fault. */
/* Tail call the handler. */
b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
1:
/* Correct TLS magic, so this is a guest fault. */
stp x29, x30, [sp, #-0x20]!
str x19, [sp, #0x10]
mov x29, sp
/* Save the old tpidr_el0. */
mov x19, x8
/* Restore host tpidr_el0. */
ldr x0, [x8, #(TpidrEl0NativeContext)]
ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
msr tpidr_el0, x3
/* Call the handler. */
bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
/* If the handler returned false, we want to preserve the host tpidr_el0. */
cbz x0, 2f
/* Otherwise, restore guest tpidr_el0. */
msr tpidr_el0, x19
2:
ldr x19, [sp, #0x10]
ldp x29, x30, [sp], #0x20
ret
/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
.global _ZN4Core7ARM_NCE20LockThreadParametersEPv
.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
_ZN4Core7ARM_NCE20LockThreadParametersEPv:
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)
1:
/* Clear the monitor. */
clrex
2:
/* Load-linked with acquire ordering. */
ldaxr w1, [x0]
/* If the value was SpinLockLocked, clear monitor and retry. */
cbz w1, 1b
/* Store-conditional SpinLockLocked with relaxed ordering. */
stxr w1, wzr, [x0]
/* If we failed to store, retry. */
cbnz w1, 2b
ret
/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)
/* Load SpinLockUnlocked. */
mov w1, #(SpinLockUnlocked)
/* Store value with release ordering. */
stlr w1, [x0]
ret

View File

@ -0,0 +1,29 @@
/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#define __ASSEMBLY__
#include <asm-generic/signal.h>
#include <asm-generic/unistd.h>
#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
#define BreakFromRunCodeSignal SIGURG
#define GuestFaultSignal SIGSEGV
#define GuestContextSp 0xF8
#define GuestContextHostContext 0x320
#define HostContextSpTpidrEl0 0xE0
#define HostContextTpidrEl0 0xE8
#define HostContextRegs 0x0
#define HostContextVregs 0x60
#define TpidrEl0NativeContext 0x10
#define TpidrEl0Lock 0x18
#define TpidrEl0TlsMagic 0x20
#define TlsMagic 0x555a5559
#define SpinLockLocked 0
#define SpinLockUnlocked 1

View File

@ -0,0 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/arm/arm_interface.h"
#include "core/arm/nce/arm_nce_asm_definitions.h"
namespace Core {
class ARM_NCE;
class System;
struct HostContext {
alignas(16) std::array<u64, 12> host_saved_regs{};
alignas(16) std::array<u128, 8> host_saved_vregs{};
u64 host_sp{};
void* host_tpidr_el0{};
};
struct GuestContext {
std::array<u64, 31> cpu_registers{};
u64 sp{};
u64 pc{};
u32 fpcr{};
u32 fpsr{};
std::array<u128, 32> vector_registers{};
u32 pstate{};
alignas(16) HostContext host_ctx{};
u64 tpidrro_el0{};
u64 tpidr_el0{};
std::atomic<u64> esr_el1{};
u32 nzcv{};
u32 svc_swi{};
System* system{};
ARM_NCE* parent{};
};
// Verify assembly offsets.
static_assert(offsetof(GuestContext, sp) == GuestContextSp);
static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext);
static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0);
static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0);
static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0);
static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs);
static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs);
} // namespace Core

View File

@ -0,0 +1,147 @@
// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors
// SPDX-License-Identifier: MPL-2.0
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Core::NCE {
enum SystemRegister : u32 {
TpidrEl0 = 0x5E82,
TpidrroEl0 = 0x5E83,
CntfrqEl0 = 0x5F00,
CntpctEl0 = 0x5F01,
};
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call-
union SVC {
constexpr explicit SVC(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0);
}
constexpr u32 GetSig0() {
return decltype(sig0)::ExtractValue(raw);
}
constexpr u32 GetValue() {
return decltype(value)::ExtractValue(raw);
}
constexpr u32 GetSig1() {
return decltype(sig1)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> sig0; // 0x1
BitField<5, 16, u32> value; // 16-bit immediate
BitField<21, 11, u32> sig1; // 0x6A0
};
static_assert(sizeof(SVC) == sizeof(u32));
static_assert(SVC(0xD40000C1).Verify());
static_assert(SVC(0xD40000C1).GetValue() == 0x6);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register-
union MRS {
constexpr explicit MRS(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return (this->GetSig() == 0xD53);
}
constexpr u32 GetRt() {
return decltype(rt)::ExtractValue(raw);
}
constexpr u32 GetSystemReg() {
return decltype(system_reg)::ExtractValue(raw);
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // destination register
BitField<5, 15, u32> system_reg; // source system register
BitField<20, 12, u32> sig; // 0xD53
};
static_assert(sizeof(MRS) == sizeof(u32));
static_assert(MRS(0xD53BE020).Verify());
static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0);
static_assert(MRS(0xD53BE020).GetRt() == 0x0);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-
union MSR {
constexpr explicit MSR(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return this->GetSig() == 0xD51;
}
constexpr u32 GetRt() {
return decltype(rt)::ExtractValue(raw);
}
constexpr u32 GetSystemReg() {
return decltype(system_reg)::ExtractValue(raw);
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // source register
BitField<5, 15, u32> system_reg; // destination system register
BitField<20, 12, u32> sig; // 0xD51
};
static_assert(sizeof(MSR) == sizeof(u32));
static_assert(MSR(0xD51BD040).Verify());
static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0);
static_assert(MSR(0xD51BD040).GetRt() == 0x0);
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register-
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers-
union Exclusive {
constexpr explicit Exclusive(u32 raw_) : raw{raw_} {}
constexpr bool Verify() {
return this->GetSig() == 0x10;
}
constexpr u32 GetSig() {
return decltype(sig)::ExtractValue(raw);
}
constexpr u32 AsOrdered() {
return raw | decltype(o0)::FormatValue(1);
}
u32 raw;
private:
BitField<0, 5, u32> rt; // memory operand
BitField<5, 5, u32> rn; // register operand 1
BitField<10, 5, u32> rt2; // register operand 2
BitField<15, 1, u32> o0; // ordered
BitField<16, 5, u32> rs; // status register
BitField<21, 2, u32> l; // operation type
BitField<23, 7, u32> sig; // 0x10
BitField<30, 2, u32> size; // size
};
static_assert(Exclusive(0xC85FFC00).Verify());
static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00);
static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00);
static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440);
} // namespace Core::NCE

472
src/core/arm/nce/patch.cpp Normal file
View File

@ -0,0 +1,472 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arm64/native_clock.h"
#include "common/bit_cast.h"
#include "common/literals.h"
#include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/guest_context.h"
#include "core/arm/nce/instructions.h"
#include "core/arm/nce/patch.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/kernel/svc.h"
namespace Core::NCE {
using namespace Common::Literals;
using namespace oaknut::util;
using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128_MiB;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
Patcher::Patcher() : c(m_patch_instructions) {}
Patcher::~Patcher() = default;
void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code) {
// Write save context helper function.
c.l(m_save_context);
WriteSaveContext();
// Write load context helper function.
c.l(m_load_context);
WriteLoadContext();
// Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size);
const auto text_words =
std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
// Loop through instructions, patching as needed.
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
const u32 inst = text_words[i];
const auto AddRelocations = [&] {
const uintptr_t this_offset = i * sizeof(u32);
const uintptr_t next_offset = this_offset + sizeof(u32);
// Relocate from here to patch.
this->BranchToPatch(this_offset);
// Relocate from patch to next instruction.
return next_offset;
};
// SVC
if (auto svc = SVC{inst}; svc.Verify()) {
WriteSvcTrampoline(AddRelocations(), svc.GetValue());
continue;
}
// MRS Xn, TPIDR_EL0
// MRS Xn, TPIDRRO_EL0
if (auto mrs = MRS{inst};
mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
: oaknut::SystemReg::TPIDR_EL0;
const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
continue;
}
// MRS Xn, CNTPCT_EL0
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
continue;
}
// MRS Xn, CNTFRQ_EL0
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
UNREACHABLE();
}
// MSR TPIDR_EL0, Xn
if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
continue;
}
}
// Determine patching mode for the final relocation step
const size_t image_size = program_image.size();
this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
}
void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
const Kernel::CodeSet::Segment& code,
Kernel::PhysicalMemory& program_image,
EntryTrampolines* out_trampolines) {
const size_t patch_size = GetSectionSize();
const size_t image_size = program_image.size();
// Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size);
const auto text_words =
std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
if (mode == PatchMode::PreText) {
rc.B(rel.patch_offset - patch_size - rel.module_offset);
} else {
rc.B(image_size - rel.module_offset + rel.patch_offset);
}
};
const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
if (mode == PatchMode::PreText) {
rc.B(patch_size - rel.patch_offset + rel.module_offset);
} else {
rc.B(rel.module_offset - image_size - rel.patch_offset);
}
};
const auto RebasePatch = [&](ptrdiff_t patch_offset) {
if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_offset;
} else {
return GetInteger(load_base) + image_size + patch_offset;
}
};
const auto RebasePc = [&](uintptr_t module_offset) {
if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_size + module_offset;
} else {
return GetInteger(load_base) + module_offset;
}
};
// We are now ready to relocate!
for (const Relocation& rel : m_branch_to_patch_relocations) {
ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
}
for (const Relocation& rel : m_branch_to_module_relocations) {
ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
rel);
}
// Rewrite PC constants and record post trampolines
for (const Relocation& rel : m_write_module_pc_relocations) {
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
rc.dx(RebasePc(rel.module_offset));
}
for (const Trampoline& rel : m_trampolines) {
out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
}
// Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
// Convert to ordered to preserve this assumption.
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
const u32 inst = text_words[i];
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
text_words[i] = exclusive.AsOrdered();
}
}
// Copy to program image
if (this->mode == PatchMode::PreText) {
std::memcpy(program_image.data(), m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32));
} else {
program_image.resize(image_size + patch_size);
std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32));
}
}
size_t Patcher::GetSectionSize() const noexcept {
return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
}
void Patcher::WriteLoadContext() {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
// of stack.
c.STR(X30, SP, 8);
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
// Load system registers.
c.LDR(W0, X30, offsetof(GuestContext, fpsr));
c.MSR(oaknut::SystemReg::FPSR, X0);
c.LDR(W0, X30, offsetof(GuestContext, fpcr));
c.MSR(oaknut::SystemReg::FPCR, X0);
c.LDR(W0, X30, offsetof(GuestContext, nzcv));
c.MSR(oaknut::SystemReg::NZCV, X0);
// Load all vector registers.
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
for (int i = 0; i <= 30; i += 2) {
c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
}
// Load all general-purpose registers except X30.
for (int i = 0; i <= 28; i += 2) {
c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
}
// Reload our return X30 from the stack and return.
// The patch code will reload the guest X30 for us.
c.LDR(X30, SP, 8);
c.RET();
}
void Patcher::WriteSaveContext() {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
// stack.
c.STR(X30, SP, 8);
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
// Store all general-purpose registers except X30.
for (int i = 0; i <= 28; i += 2) {
c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
}
// Store all vector registers.
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
for (int i = 0; i <= 30; i += 2) {
c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
}
// Store guest system registers, X30 and SP, using X0 as a scratch register.
c.STR(X0, SP, PRE_INDEXED, -16);
c.LDR(X0, SP, 16);
c.STR(X0, X30, 8 * 30);
c.ADD(X0, SP, 32);
c.STR(X0, X30, offsetof(GuestContext, sp));
c.MRS(X0, oaknut::SystemReg::FPSR);
c.STR(W0, X30, offsetof(GuestContext, fpsr));
c.MRS(X0, oaknut::SystemReg::FPCR);
c.STR(W0, X30, offsetof(GuestContext, fpcr));
c.MRS(X0, oaknut::SystemReg::NZCV);
c.STR(W0, X30, offsetof(GuestContext, nzcv));
c.LDR(X0, SP, POST_INDEXED, 16);
// Reload our return X30 from the stack, and return.
c.LDR(X30, SP, 8);
c.RET();
}
void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
// We are about to start saving state, so we need to lock the context.
this->LockContext();
// Store guest X30 to the stack. Then, save the context and restore the stack.
// This will save all registers except PC, but we know PC at patch time.
c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_save_context);
c.LDR(X30, SP, POST_INDEXED, 16);
// Now that we've saved all registers, we can use any registers as scratch.
// Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
oaknut::Label pc_after_svc;
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
c.LDR(X2, pc_after_svc);
c.STR(X2, X1, offsetof(GuestContext, pc));
// Store SVC number to execute when we return
c.MOV(X2, svc_id);
c.STR(W2, X1, offsetof(GuestContext, svc_swi));
// We are calling a SVC. Clear esr_el1 and return it.
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
oaknut::Label retry;
c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
c.l(retry);
c.LDAXR(X0, X2);
c.STLXR(W3, XZR, X2);
c.CBNZ(W3, retry);
// Add "calling SVC" flag. Since this is X0, this is now our return value.
c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
// Offset the GuestContext pointer to the HostContext member.
// STP has limited range of [-512, 504] which we can't reach otherwise
// NB: Due to this all offsets below are from the start of HostContext.
c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
// Reload host TPIDR_EL0 and SP.
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
c.MOV(SP, X2);
c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
// Load callee-saved host registers and return to host.
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
c.LDP(X19, X20, X1, HOST_REGS_OFF);
c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
c.RET();
// Write the post-SVC trampoline address, which will jump back to the guest after restoring its
// state.
m_trampolines.push_back({c.offset(), module_dest});
// Host called this location. Save the return address so we can
// unwind the stack properly when jumping back.
c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
// Reload all guest registers except X30 and PC.
// The function also expects 16 bytes of stack already allocated.
c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_load_context);
c.LDR(X30, SP, POST_INDEXED, 16);
// Use X1 as a scratch register to restore X30.
c.STR(X1, SP, PRE_INDEXED, -16);
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
c.LDR(X1, SP, POST_INDEXED, 16);
// Unlock the context.
this->UnlockContext();
// Jump back to the instruction after the emulated SVC.
this->BranchToModule(module_dest);
// Store PC after call.
c.l(pc_after_svc);
this->WriteModulePc(module_dest);
}
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg) {
// Retrieve emulated TLS register from GuestContext.
c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
} else {
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
}
// Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest);
}
void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
c.STR(scratch_reg, SP, PRE_INDEXED, -16);
// Save guest value to NativeExecutionParameters::tpidr_el0.
c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
// Restore scratch register.
c.LDR(scratch_reg, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MSR.
this->BranchToModule(module_dest);
}
void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
static Common::Arm64::NativeClock clock{};
const auto factor = clock.GetGuestCNTFRQFactor();
const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
oaknut::Label factorlo;
oaknut::Label factorhi;
// Save scratches.
c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
// Load counter value.
c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
// Load scaling factor.
c.LDR(scratch0, factorlo);
c.LDR(scratch1, factorhi);
// Multiply low bits and get result.
c.UMULH(scratch0, dest_reg, scratch0);
// Multiply high bits and add low bit result.
c.MADD(dest_reg, dest_reg, scratch1, scratch0);
// Reload scratches.
c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest);
// Scaling factor constant values.
c.l(factorlo);
c.dx(raw_factor[0]);
c.l(factorhi);
c.dx(raw_factor[1]);
}
void Patcher::LockContext() {
oaknut::Label retry;
// Save scratches.
c.STP(X0, X1, SP, PRE_INDEXED, -16);
// Reload lock pointer.
c.l(retry);
c.CLREX();
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
static_assert(SpinLockLocked == 0);
// Load-linked with acquire ordering.
c.LDAXR(W1, X0);
// If the value was SpinLockLocked, clear monitor and retry.
c.CBZ(W1, retry);
// Store-conditional SpinLockLocked with relaxed ordering.
c.STXR(W1, WZR, X0);
// If we failed to store, retry.
c.CBNZ(W1, retry);
// We succeeded! Reload scratches.
c.LDP(X0, X1, SP, POST_INDEXED, 16);
}
void Patcher::UnlockContext() {
// Save scratches.
c.STP(X0, X1, SP, PRE_INDEXED, -16);
// Load lock pointer.
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
// Load SpinLockUnlocked.
c.MOV(W1, SpinLockUnlocked);
// Store value with release ordering.
c.STLR(W1, X0);
// Load scratches.
c.LDP(X0, X1, SP, POST_INDEXED, 16);
}
} // namespace Core::NCE

101
src/core/arm/nce/patch.h Normal file
View File

@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <unordered_map>
#include <vector>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#pragma GCC diagnostic pop
#include "common/common_types.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/k_typed_address.h"
#include "core/hle/kernel/physical_memory.h"
namespace Core::NCE {
enum class PatchMode : u32 {
None,
PreText, ///< Patch section is inserted before .text
PostData, ///< Patch section is inserted after .data
};
using ModuleTextAddress = u64;
using PatchTextAddress = u64;
using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
class Patcher {
public:
explicit Patcher();
~Patcher();
void PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code);
void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
size_t GetSectionSize() const noexcept;
[[nodiscard]] PatchMode GetPatchMode() const noexcept {
return mode;
}
private:
using ModuleDestLabel = uintptr_t;
struct Trampoline {
ptrdiff_t patch_offset;
uintptr_t module_offset;
};
void WriteLoadContext();
void WriteSaveContext();
void LockContext();
void UnlockContext();
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg);
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
private:
void BranchToPatch(uintptr_t module_dest) {
m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
}
void BranchToModule(uintptr_t module_dest) {
m_branch_to_module_relocations.push_back({c.offset(), module_dest});
c.dw(0);
}
void WriteModulePc(uintptr_t module_dest) {
m_write_module_pc_relocations.push_back({c.offset(), module_dest});
c.dx(0);
}
private:
// List of patch instructions we have generated.
std::vector<u32> m_patch_instructions{};
// Relocation type for relative branch from module to patch.
struct Relocation {
ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
};
oaknut::VectorCodeGenerator c;
std::vector<Trampoline> m_trampolines;
std::vector<Relocation> m_branch_to_patch_relocations{};
std::vector<Relocation> m_branch_to_module_relocations{};
std::vector<Relocation> m_write_module_pc_relocations{};
oaknut::Label m_save_context{};
oaknut::Label m_load_context{};
PatchMode mode{PatchMode::None};
};
} // namespace Core::NCE

View File

@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) {
system.GPU().ObtainContext();
}
system.ArmInterface(core).Initialize();
auto& kernel = system.Kernel();
auto& scheduler = *kernel.CurrentScheduler();
auto* thread = scheduler.GetSchedulerCurrentThread();

View File

@ -6,7 +6,7 @@
namespace Core {
#ifdef ANDROID
#ifdef HAS_NCE
constexpr size_t VirtualReserveSize = 1ULL << 38;
#else
constexpr size_t VirtualReserveSize = 1ULL << 39;
@ -15,6 +15,7 @@ constexpr size_t VirtualReserveSize = 1ULL << 39;
DeviceMemory::DeviceMemory()
: buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(),
VirtualReserveSize} {}
DeviceMemory::~DeviceMemory() = default;
} // namespace Core

View File

@ -75,12 +75,26 @@ struct CodeSet final {
return segments[2];
}
#ifdef HAS_NCE
Segment& PatchSegment() {
return patch_segment;
}
const Segment& PatchSegment() const {
return patch_segment;
}
#endif
/// The overall data that backs this code set.
Kernel::PhysicalMemory memory;
/// The segments that comprise this code set.
std::array<Segment, 3> segments;
#ifdef HAS_NCE
Segment patch_segment;
#endif
/// The entry point address for this code set.
KProcessAddress entrypoint = 0;
};

View File

@ -25,8 +25,8 @@ constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
{ .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
{ .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, },
{ .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, },
#ifdef ANDROID
// With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region.
#ifdef HAS_NCE
// With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region.
{ .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
#else
{ .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },

View File

@ -88,6 +88,22 @@ Result FlushDataCache(AddressType addr, u64 size) {
R_SUCCEED();
}
constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission perm) {
Common::MemoryPermission perms{};
if (True(perm & KMemoryPermission::UserRead)) {
perms |= Common::MemoryPermission::Read;
}
if (True(perm & KMemoryPermission::UserWrite)) {
perms |= Common::MemoryPermission::Write;
}
#ifdef HAS_NCE
if (True(perm & KMemoryPermission::UserExecute)) {
perms |= Common::MemoryPermission::Execute;
}
#endif
return perms;
}
} // namespace
void KPageTableBase::MemoryRange::Open() {
@ -170,7 +186,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit,
Core::Memory::Memory& memory) {
Core::Memory::Memory& memory,
KProcessAddress aslr_space_start) {
// Calculate region extents.
const size_t as_width = GetAddressSpaceWidth(as_type);
const KProcessAddress start = 0;
@ -211,7 +228,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap);
stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack);
kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall);
m_code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit);
m_code_region_start = m_address_space_start + aslr_space_start +
GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit);
m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit);
m_alias_code_region_start = m_code_region_start;
m_alias_code_region_end = m_code_region_end;
@ -5643,7 +5661,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
case OperationType::Map: {
ASSERT(virt_addr != 0);
ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize));
m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr);
m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr,
ConvertToMemoryPermission(properties.perm));
// Open references to pages, if we should.
if (this->IsHeapPhysicalAddress(phys_addr)) {
@ -5658,8 +5677,18 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
}
case OperationType::ChangePermissions:
case OperationType::ChangePermissionsAndRefresh:
case OperationType::ChangePermissionsAndRefreshAndFlush:
case OperationType::ChangePermissionsAndRefreshAndFlush: {
const bool read = True(properties.perm & Kernel::KMemoryPermission::UserRead);
const bool write = True(properties.perm & Kernel::KMemoryPermission::UserWrite);
// todo: this doesn't really belong here and should go into m_memory to handle rasterizer
// access todo: ignore exec on non-direct-mapped case
const bool exec = True(properties.perm & Kernel::KMemoryPermission::UserExecute);
if (Settings::IsFastmemEnabled()) {
m_system.DeviceMemory().buffer.Protect(GetInteger(virt_addr), num_pages * PageSize,
read, write, exec);
}
R_SUCCEED();
}
default:
UNREACHABLE();
}
@ -5687,7 +5716,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
const size_t size{node.GetNumPages() * PageSize};
// Map the pages.
m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress());
m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress(),
ConvertToMemoryPermission(properties.perm));
virt_addr += size;
}

View File

@ -235,7 +235,8 @@ public:
bool enable_device_address_space_merge, bool from_back,
KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit, Core::Memory::Memory& memory);
KResourceLimit* resource_limit, Core::Memory::Memory& memory,
KProcessAddress aslr_space_start);
void Finalize();

View File

@ -300,7 +300,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
params.code_address, params.code_num_pages * PageSize,
m_system_resource, res_limit, this->GetMemory()));
m_system_resource, res_limit, this->GetMemory(), 0));
}
ON_RESULT_FAILURE_2 {
m_page_table.Finalize();
@ -332,7 +332,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
std::span<const u32> user_caps, KResourceLimit* res_limit,
KMemoryManager::Pool pool) {
KMemoryManager::Pool pool, KProcessAddress aslr_space_start) {
ASSERT(res_limit != nullptr);
// Set members.
@ -393,7 +393,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
params.code_address, code_size, m_system_resource, res_limit,
this->GetMemory()));
this->GetMemory(), aslr_space_start));
}
ON_RESULT_FAILURE_2 {
m_page_table.Finalize();
@ -1128,7 +1128,7 @@ KProcess::KProcess(KernelCore& kernel)
KProcess::~KProcess() = default;
Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
bool is_hbl) {
KProcessAddress aslr_space_start, bool is_hbl) {
// Create a resource limit for the process.
const auto physical_memory_size =
m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application);
@ -1179,7 +1179,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
.name = {},
.version = {},
.program_id = metadata.GetTitleID(),
.code_address = code_address,
.code_address = code_address + GetInteger(aslr_space_start),
.code_num_pages = static_cast<s32>(code_size / PageSize),
.flags = flag,
.reslimit = Svc::InvalidHandle,
@ -1193,7 +1193,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
// Initialize for application process.
R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit,
KMemoryManager::Pool::Application));
KMemoryManager::Pool::Application, aslr_space_start));
// Assign remaining properties.
m_is_hbl = is_hbl;
@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute);
ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read);
ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
#ifdef ARCHITECTURE_arm64
if (Settings::IsNceEnabled()) {
auto& buffer = m_kernel.System().DeviceMemory().buffer;
const auto& code = code_set.CodeSegment();
const auto& patch = code_set.PatchSegment();
buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true);
buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true);
ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
}
#endif
}
bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) {

View File

@ -120,6 +120,9 @@ private:
std::atomic<s64> m_num_ipc_messages{};
std::atomic<s64> m_num_ipc_replies{};
std::atomic<s64> m_num_ipc_receives{};
#ifdef HAS_NCE
std::unordered_map<u64, u64> m_post_handlers{};
#endif
private:
Result StartTermination();
@ -150,7 +153,8 @@ public:
std::span<const u32> caps, KResourceLimit* res_limit,
KMemoryManager::Pool pool, bool immortal);
Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps,
KResourceLimit* res_limit, KMemoryManager::Pool pool);
KResourceLimit* res_limit, KMemoryManager::Pool pool,
KProcessAddress aslr_space_start);
void Exit();
const char* GetName() const {
@ -466,6 +470,12 @@ public:
static void Switch(KProcess* cur_process, KProcess* next_process);
#ifdef HAS_NCE
std::unordered_map<u64, u64>& GetPostHandlers() noexcept {
return m_post_handlers;
}
#endif
public:
// Attempts to insert a watchpoint into a free slot. Returns false if none are available.
bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type);
@ -479,7 +489,7 @@ public:
public:
Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
bool is_hbl);
KProcessAddress aslr_space_start, bool is_hbl);
void LoadModule(CodeSet code_set, KProcessAddress base_addr);

View File

@ -23,10 +23,11 @@ public:
Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge,
bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address,
size_t code_size, KSystemResource* system_resource,
KResourceLimit* resource_limit, Core::Memory::Memory& memory) {
R_RETURN(m_page_table.InitializeForProcess(as_type, enable_aslr, enable_das_merge,
from_back, pool, code_address, code_size,
system_resource, resource_limit, memory));
KResourceLimit* resource_limit, Core::Memory::Memory& memory,
KProcessAddress aslr_space_start) {
R_RETURN(m_page_table.InitializeForProcess(
as_type, enable_aslr, enable_das_merge, from_back, pool, code_address, code_size,
system_resource, resource_limit, memory, aslr_space_start));
}
void Finalize() {

View File

@ -655,6 +655,21 @@ public:
return m_stack_top;
}
public:
// TODO: This shouldn't be defined in kernel namespace
struct NativeExecutionParameters {
u64 tpidr_el0{};
u64 tpidrro_el0{};
void* native_context{};
std::atomic<u32> lock{1};
bool is_running{};
u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')};
};
NativeExecutionParameters& GetNativeExecutionParameters() {
return m_native_execution_parameters;
}
private:
KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key,
bool is_kernel_address_key);
@ -914,6 +929,7 @@ private:
ThreadWaitReasonForDebugging m_wait_reason_for_debugging{};
uintptr_t m_argument{};
KProcessAddress m_stack_top{};
NativeExecutionParameters m_native_execution_parameters{};
public:
using ConditionVariableThreadTreeType = ConditionVariableThreadTree;

View File

@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "core/arm/dynarmic/arm_dynarmic_32.h"
#include "core/arm/dynarmic/arm_dynarmic_64.h"
#ifdef HAS_NCE
#include "core/arm/nce/arm_nce.h"
#endif
#include "core/core.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/kernel.h"
@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
: m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} {
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
// TODO(bunnei): Initialization relies on a core being available. We may later replace this with
// a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager.
// an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU
// manager.
auto& kernel = system.Kernel();
m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
system, kernel.IsMulticore(),
@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
PhysicalCore::~PhysicalCore() = default;
void PhysicalCore::Initialize(bool is_64_bit) {
#if defined(HAS_NCE)
if (Settings::IsNceEnabled()) {
m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(),
m_core_index);
return;
}
#endif
#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
auto& kernel = m_system.Kernel();
if (!is_64_bit) {

View File

@ -32,7 +32,7 @@ public:
{10200, nullptr, "SendFriendRequestForApplication"},
{10211, nullptr, "AddFacedFriendRequestForApplication"},
{10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
{10420, nullptr, "IsBlockedUserListCacheAvailable"},
{10420, &IFriendService::CheckBlockedUserListAvailability, "CheckBlockedUserListAvailability"},
{10421, nullptr, "EnsureBlockedUserListAvailable"},
{10500, nullptr, "GetProfileList"},
{10600, nullptr, "DeclareOpenOnlinePlaySession"},
@ -206,6 +206,17 @@ private:
rb.Push(true);
}
void CheckBlockedUserListAvailability(HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto uuid{rp.PopRaw<Common::UUID>()};
LOG_WARNING(Service_Friend, "(STUBBED) called, uuid=0x{}", uuid.RawString());
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(true);
}
KernelHelpers::ServiceContext service_context;
Kernel::KEvent* completion_event;

View File

@ -3,6 +3,7 @@
#include <cstring>
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/control_metadata.h"
@ -14,6 +15,10 @@
#include "core/loader/deconstructed_rom_directory.h"
#include "core/loader/nso.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader {
AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_,
@ -124,21 +129,43 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
}
metadata.Print();
const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2",
// Enable NCE only for programs with 39-bit address space.
const bool is_39bit =
metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit;
Settings::SetNceEnabled(is_39bit);
const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2",
"subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7",
"subsdk8", "subsdk9", "sdk"};
// Use the NSO module loader to figure out the code layout
std::size_t code_size{};
for (const auto& module : static_modules) {
// Define an nce patch context for each potential module.
#ifdef HAS_NCE
std::array<Core::NCE::Patcher, 13> module_patchers;
#endif
const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
#ifdef HAS_NCE
if (Settings::IsNceEnabled()) {
return &module_patchers[i];
}
#endif
return nullptr;
};
// Use the NSO module loader to figure out the code layout
for (size_t i = 0; i < static_modules.size(); i++) {
const auto& module = static_modules[i];
const FileSys::VirtualFile module_file{dir->GetFile(module)};
if (!module_file) {
continue;
}
const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
const auto tentative_next_load_addr = AppLoader_NSO::LoadModule(
process, system, *module_file, code_size, should_pass_arguments, false);
const auto tentative_next_load_addr =
AppLoader_NSO::LoadModule(process, system, *module_file, code_size,
should_pass_arguments, false, {}, GetPatcher(i));
if (!tentative_next_load_addr) {
return {ResultStatus::ErrorLoadingNSO, {}};
}
@ -146,8 +173,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
code_size = *tentative_next_load_addr;
}
// Enable direct memory mapping in case of NCE.
const u64 fastmem_base = [&]() -> size_t {
if (Settings::IsNceEnabled()) {
auto& buffer = system.DeviceMemory().buffer;
buffer.EnableDirectMappedAddress();
return reinterpret_cast<u64>(buffer.VirtualBasePointer());
}
return 0;
}();
// Setup the process code layout
if (process.LoadFromMetadata(metadata, code_size, is_hbl).IsError()) {
if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) {
return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
}
@ -157,7 +194,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
VAddr next_load_addr{base_address};
const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(),
system.GetContentProvider()};
for (const auto& module : static_modules) {
for (size_t i = 0; i < static_modules.size(); i++) {
const auto& module = static_modules[i];
const FileSys::VirtualFile module_file{dir->GetFile(module)};
if (!module_file) {
continue;
@ -165,15 +203,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
const VAddr load_addr{next_load_addr};
const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
const auto tentative_next_load_addr = AppLoader_NSO::LoadModule(
process, system, *module_file, load_addr, should_pass_arguments, true, pm);
const auto tentative_next_load_addr =
AppLoader_NSO::LoadModule(process, system, *module_file, load_addr,
should_pass_arguments, true, pm, GetPatcher(i));
if (!tentative_next_load_addr) {
return {ResultStatus::ErrorLoadingNSO, {}};
}
next_load_addr = *tentative_next_load_addr;
modules.insert_or_assign(load_addr, module);
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr);
}
// Find the RomFS by searching for a ".romfs" file in this directory

View File

@ -91,7 +91,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process,
// Setup the process code layout
if (process
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false)
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0,
false)
.IsError()) {
return {ResultStatus::ErrorNotInitialized, {}};
}

View File

@ -22,6 +22,10 @@
#include "core/loader/nso.h"
#include "core/memory.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader {
struct NroSegmentHeader {
@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) {
return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK);
}
static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) {
static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process,
const std::vector<u8>& data) {
if (data.size() < sizeof(NroHeader)) {
return {};
}
@ -194,14 +199,61 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
codeset.DataSegment().size += bss_size;
program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
size_t image_size = program_image.size();
#ifdef HAS_NCE
const auto& code = codeset.CodeSegment();
// NROs always have a 39-bit address space.
Settings::SetNceEnabled(true);
// Create NCE patcher
Core::NCE::Patcher patch{};
if (Settings::IsNceEnabled()) {
// Patch SVCs and MRS calls in the guest code
patch.PatchText(program_image, code);
// We only support PostData patching for NROs.
ASSERT(patch.GetPatchMode() == Core::NCE::PatchMode::PostData);
// Update patch section.
auto& patch_segment = codeset.PatchSegment();
patch_segment.addr = image_size;
patch_segment.size = static_cast<u32>(patch.GetSectionSize());
// Add patch section size to the module size.
image_size += patch_segment.size;
}
#endif
// Enable direct memory mapping in case of NCE.
const u64 fastmem_base = [&]() -> size_t {
if (Settings::IsNceEnabled()) {
auto& buffer = system.DeviceMemory().buffer;
buffer.EnableDirectMappedAddress();
return reinterpret_cast<u64>(buffer.VirtualBasePointer());
}
return 0;
}();
// Setup the process code layout
if (process
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false)
.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base,
false)
.IsError()) {
return false;
}
// Relocate code patch and copy to the program_image if running under NCE.
// This needs to be after LoadFromMetadata so we can use the process entry point.
#ifdef HAS_NCE
if (Settings::IsNceEnabled()) {
patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image,
&process.GetPostHandlers());
}
#endif
// Load codeset for current process
codeset.memory = std::move(program_image);
process.LoadModule(std::move(codeset), process.GetEntryPoint());
@ -209,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
return true;
}
bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) {
return LoadNroImpl(process, nro_file.ReadAllBytes());
bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process,
const FileSys::VfsFile& nro_file) {
return LoadNroImpl(system, process, nro_file.ReadAllBytes());
}
AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) {
@ -218,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S
return {ResultStatus::ErrorAlreadyLoaded, {}};
}
if (!LoadNro(process, *file)) {
if (!LoadNro(system, process, *file)) {
return {ResultStatus::ErrorLoadingNRO, {}};
}

View File

@ -54,7 +54,7 @@ public:
bool IsRomFSUpdatable() const override;
private:
bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file);
bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file);
std::vector<u8> icon_data;
std::unique_ptr<FileSys::NACP> nacp;

View File

@ -20,6 +20,10 @@
#include "core/loader/nso.h"
#include "core/memory.h"
#ifdef HAS_NCE
#include "core/arm/nce/patch.h"
#endif
namespace Loader {
namespace {
struct MODHeader {
@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) {
std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system,
const FileSys::VfsFile& nso_file, VAddr load_base,
bool should_pass_arguments, bool load_into_process,
std::optional<FileSys::PatchManager> pm) {
std::optional<FileSys::PatchManager> pm,
Core::NCE::Patcher* patch) {
if (nso_file.GetSize() < sizeof(NSOHeader)) {
return std::nullopt;
}
@ -86,6 +91,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
return std::nullopt;
}
// Allocate some space at the beginning if we are patching in PreText mode.
const size_t module_start = [&]() -> size_t {
#ifdef HAS_NCE
if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
return patch->GetSectionSize();
}
#endif
return 0;
}();
// Build program image
Kernel::CodeSet codeset;
Kernel::PhysicalMemory program_image;
@ -95,11 +110,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
if (nso_header.IsSegmentCompressed(i)) {
data = DecompressSegment(data, nso_header.segments[i]);
}
program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size()));
std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
data.size());
codeset.segments[i].addr = nso_header.segments[i].location;
codeset.segments[i].offset = nso_header.segments[i].location;
program_image.resize(module_start + nso_header.segments[i].location +
static_cast<u32>(data.size()));
std::memcpy(program_image.data() + module_start + nso_header.segments[i].location,
data.data(), data.size());
codeset.segments[i].addr = module_start + nso_header.segments[i].location;
codeset.segments[i].offset = module_start + nso_header.segments[i].location;
codeset.segments[i].size = nso_header.segments[i].size;
}
@ -118,7 +134,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
}
codeset.DataSegment().size += nso_header.segments[2].bss_size;
const u32 image_size{
u32 image_size{
PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)};
program_image.resize(image_size);
@ -129,16 +145,45 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
// Apply patches if necessary
const auto name = nso_file.GetName();
if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) {
std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
std::span<u8> patchable_section(program_image.data() + module_start,
program_image.size() - module_start);
std::vector<u8> pi_header(sizeof(NSOHeader) + patchable_section.size());
std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader));
std::memcpy(pi_header.data() + sizeof(NSOHeader), program_image.data(),
program_image.size());
std::memcpy(pi_header.data() + sizeof(NSOHeader), patchable_section.data(),
patchable_section.size());
pi_header = pm->PatchNSO(pi_header, name);
std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), patchable_section.data());
}
#ifdef HAS_NCE
// If we are computing the process code layout and using nce backend, patch.
const auto& code = codeset.CodeSegment();
if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) {
// Patch SVCs and MRS calls in the guest code
patch->PatchText(program_image, code);
// Add patch section size to the module size.
image_size += patch->GetSectionSize();
} else if (patch) {
// Relocate code patch and copy to the program_image.
patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers());
// Update patch section.
auto& patch_segment = codeset.PatchSegment();
patch_segment.addr =
patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size;
patch_segment.size = static_cast<u32>(patch->GetSectionSize());
// Add patch section size to the module size. In PreText mode image_size
// already contains the patch segment as part of module_start.
if (patch->GetPatchMode() == Core::NCE::PatchMode::PostData) {
image_size += patch_segment.size;
}
}
#endif
// If we aren't actually loading (i.e. just computing the process code layout), we are done
if (!load_into_process) {
return load_base + image_size;

View File

@ -15,6 +15,10 @@ namespace Core {
class System;
}
namespace Core::NCE {
class Patcher;
}
namespace Kernel {
class KProcess;
}
@ -88,7 +92,8 @@ public:
static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system,
const FileSys::VfsFile& nso_file, VAddr load_base,
bool should_pass_arguments, bool load_into_process,
std::optional<FileSys::PatchManager> pm = {});
std::optional<FileSys::PatchManager> pm = {},
Core::NCE::Patcher* patch = nullptr);
LoadResult Load(Kernel::KProcess& process, Core::System& system) override;

View File

@ -53,7 +53,7 @@ struct Memory::Impl {
}
void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target) {
Common::PhysicalAddress target, Common::MemoryPermission perms) {
ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base));
ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}",
@ -63,7 +63,7 @@ struct Memory::Impl {
if (Settings::IsFastmemEnabled()) {
system.DeviceMemory().buffer.Map(GetInteger(base),
GetInteger(target) - DramMemoryMap::Base, size);
GetInteger(target) - DramMemoryMap::Base, size, perms);
}
}
@ -831,8 +831,8 @@ void Memory::SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
}
void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target) {
impl->MapMemoryRegion(page_table, base, size, target);
Common::PhysicalAddress target, Common::MemoryPermission perms) {
impl->MapMemoryRegion(page_table, base, size, target, perms);
}
void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) {
@ -1001,4 +1001,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->FlushRegion(dest_addr, size);
}
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
bool mapped = true;
u8* const ptr = impl->GetPointerImpl(
GetInteger(vaddr),
[&] {
LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size,
GetInteger(vaddr));
mapped = false;
},
[&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); });
return mapped && ptr != nullptr;
}
} // namespace Core::Memory

View File

@ -15,8 +15,9 @@
#include "core/hle/result.h"
namespace Common {
enum class MemoryPermission : u32;
struct PageTable;
}
} // namespace Common
namespace Core {
class System;
@ -82,9 +83,10 @@ public:
* @param size The amount of bytes to map. Must be page-aligned.
* @param target Buffer with the memory backing the mapping. Must be of length at least
* `size`.
* @param perms The permissions to map the memory with.
*/
void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
Common::PhysicalAddress target);
Common::PhysicalAddress target, Common::MemoryPermission perms);
/**
* Unmaps a region of the emulated process address space.
@ -472,6 +474,7 @@ public:
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
private:

View File

@ -11,6 +11,7 @@ using namespace Common::Literals;
static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
static constexpr size_t BACKING_SIZE = 4_GiB;
static constexpr auto PERMS = Common::MemoryPermission::ReadWrite;
TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
{ HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
@ -19,7 +20,7 @@ TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
TEST_CASE("HostMemory: Simple map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x8000, 0x1000);
mem.Map(0x5000, 0x8000, 0x1000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50;
@ -28,8 +29,8 @@ TEST_CASE("HostMemory: Simple map", "[common]") {
TEST_CASE("HostMemory: Simple mirror map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
mem.Map(0x8000, 0x4000, 0x1000);
mem.Map(0x5000, 0x3000, 0x2000, PERMS);
mem.Map(0x8000, 0x4000, 0x1000, PERMS);
volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000;
volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000;
@ -39,7 +40,7 @@ TEST_CASE("HostMemory: Simple mirror map", "[common]") {
TEST_CASE("HostMemory: Simple unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[75] = 50;
@ -50,7 +51,7 @@ TEST_CASE("HostMemory: Simple unmap", "[common]") {
TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000, PERMS);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50;
@ -58,79 +59,79 @@ TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
mem.Unmap(0x5000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000, PERMS);
REQUIRE(data[0] == 50);
mem.Map(0x7000, 0x2000, 0x5000);
mem.Map(0x7000, 0x2000, 0x5000, PERMS);
REQUIRE(data[0x3000] == 50);
}
TEST_CASE("HostMemory: Nieche allocation", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x20000);
mem.Map(0x0000, 0, 0x20000, PERMS);
mem.Unmap(0x0000, 0x4000);
mem.Map(0x1000, 0, 0x2000);
mem.Map(0x3000, 0, 0x1000);
mem.Map(0, 0, 0x1000);
mem.Map(0x1000, 0, 0x2000, PERMS);
mem.Map(0x3000, 0, 0x1000, PERMS);
mem.Map(0, 0, 0x1000, PERMS);
}
TEST_CASE("HostMemory: Full unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x8000, 0x4000);
mem.Map(0x6000, 0, 0x16000);
mem.Map(0x6000, 0, 0x16000, PERMS);
}
TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0x80000, 0x4000);
mem.Map(0x2000, 0x80000, 0x4000, PERMS);
}
TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x6000, 0x4000);
mem.Map(0x8000, 0, 0x2000);
mem.Map(0x8000, 0, 0x2000, PERMS);
}
TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Map(0x4000, 0, 0x1b000);
mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x1b000, PERMS);
mem.Unmap(0x3000, 0x1c000);
mem.Map(0x3000, 0, 0x20000);
mem.Map(0x3000, 0, 0x20000, PERMS);
}
TEST_CASE("HostMemory: Unmap between placeholders", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0x0000, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0, 0x4000);
mem.Map(0x2000, 0, 0x4000, PERMS);
}
TEST_CASE("HostMemory: Unmap to origin", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x4000, 0x4000);
mem.Map(0, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0, 0, 0x4000, PERMS);
mem.Map(0x4000, 0, 0x4000, PERMS);
}
TEST_CASE("HostMemory: Unmap to right", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000, PERMS);
mem.Map(0x8000, 0, 0x4000, PERMS);
mem.Unmap(0x8000, 0x4000);
mem.Map(0x8000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000, PERMS);
}
TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x1000] = 17;
@ -142,7 +143,7 @@ TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x3000] = 19;
@ -156,7 +157,7 @@ TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
mem.Map(0x4000, 0x10000, 0x4000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19;
@ -170,8 +171,8 @@ TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x2000);
mem.Map(0x6000, 0x20000, 0x2000);
mem.Map(0x4000, 0x10000, 0x2000, PERMS);
mem.Map(0x6000, 0x20000, 0x2000, PERMS);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19;

View File

@ -4,7 +4,7 @@
add_subdirectory(host_shaders)
if(LIBVA_FOUND)
set_source_files_properties(host1x/codecs/codec.cpp
set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
endif()
@ -15,6 +15,7 @@ add_library(video_core STATIC
buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h
buffer_cache/memory_tracker_base.h
buffer_cache/usage_tracker.h
buffer_cache/word_manager.h
cache_types.h
cdma_pusher.cpp
@ -66,6 +67,8 @@ add_library(video_core STATIC
host1x/codecs/vp9.cpp
host1x/codecs/vp9.h
host1x/codecs/vp9_types.h
host1x/ffmpeg/ffmpeg.cpp
host1x/ffmpeg/ffmpeg.h
host1x/control.cpp
host1x/control.h
host1x/host1x.cpp

View File

@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
if (!channel_state) {
return;
}
runtime.TickFrame(slot_buffers);
// Calculate hits and shots and move hit bits to the right
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
for (const IntervalType& add_interval : tmp_intervals) {
common_ranges.add(add_interval);
}
runtime.CopyBuffer(dest_buffer, src_buffer, copies);
const auto& copy = copies[0];
src_buffer.MarkUsage(copy.src_offset, copy.size);
dest_buffer.MarkUsage(copy.dst_offset, copy.size);
runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
common_ranges.subtract(subtract_interval);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
auto& dest_buffer = slot_buffers[buffer];
Buffer& dest_buffer = slot_buffers[buffer];
const u32 offset = dest_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dest_buffer, offset, size, value);
dest_buffer.MarkUsage(offset, size);
return true;
}
@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
async_downloads += std::make_pair(base_interval, 1);
buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
normalized_copies.push_back(second_copy);
}
@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
// Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset;
const std::array copies{copy};
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies,
false);
Buffer& buffer = slot_buffers[buffer_id];
buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
}
runtime.PostCopyBarrier();
runtime.Finish();
@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
}
@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count,
buffer, offset, size);
@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, binding.size);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
}
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else {
@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
if (is_written) {
@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
host_bindings.sizes.push_back(binding.size);
host_bindings.sizes.push_back(size);
}
if (host_bindings.buffers.size() > 0) {
runtime.BindTransformFeedbackBuffers(host_bindings);
@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
++binding_index;
@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
}
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
channel_state->vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
.buffer_id = FindBuffer(*cpu_addr, size),
.buffer_id = buffer_id,
};
}
@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
.dst_offset = dst_base_offset,
.size = overlap.SizeBytes(),
});
runtime.CopyBuffer(new_buffer, overlap, copies);
new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
runtime.CopyBuffer(new_buffer, overlap, copies, true);
DeleteBuffer(overlap_id, true);
}
@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id];
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
const size_t size_bytes = new_buffer.SizeBytes();
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
new_buffer.MarkUsage(0, size_bytes);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
return SynchronizeBufferImpl(buffer, cpu_addr, size);
}
template <class P>
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@ -1493,51 +1508,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
return false;
}
template <class P>
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
IntervalSet found_sets{};
auto make_copies = [&] {
for (auto& interval : found_sets) {
const std::size_t sub_size = interval.upper() - interval.lower();
const VAddr cpu_addr_ = interval.lower();
copies.push_back(BufferCopy{
.src_offset = total_size_bytes,
.dst_offset = cpu_addr_ - buffer.CpuAddr(),
.size = sub_size,
});
total_size_bytes += sub_size;
largest_copy = std::max<u64>(largest_copy, sub_size);
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
};
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
const VAddr base_adr = cpu_addr_out;
const VAddr end_adr = base_adr + range_size;
const IntervalType add_interval{base_adr, end_adr};
found_sets.add(add_interval);
});
if (found_sets.empty()) {
return true;
}
const IntervalType search_interval{cpu_addr, cpu_addr + size};
auto it = common_ranges.lower_bound(search_interval);
auto it_end = common_ranges.upper_bound(search_interval);
if (it == common_ranges.end()) {
make_copies();
return false;
}
while (it != it_end) {
found_sets.subtract(*it);
it++;
}
make_copies();
return false;
}
template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) {
@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset
copy.src_offset += upload_staging.offset;
}
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
}
}
@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
}};
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
for (BufferCopy& copy : copies) {
// Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset;
buffer.MarkUsage(copy.src_offset, copy.size);
}
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish();
for (const BufferCopy& copy : copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;

View File

@ -529,10 +529,6 @@ private:
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies);

View File

@ -0,0 +1,79 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/alignment.h"
#include "common/common_types.h"
namespace VideoCommon {
class UsageTracker {
static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
public:
explicit UsageTracker(size_t size) {
const size_t num_pages = (size >> PAGE_SHIFT) + 1;
pages.resize(num_pages, 0ULL);
}
void Reset() noexcept {
std::ranges::fill(pages, 0ULL);
}
void Track(u64 offset, u64 size) noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
TrackPage(page, offset, size);
if (page == page_end) {
return;
}
for (size_t i = page + 1; i < page_end; i++) {
pages[i] = ~u64{0};
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
[[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
if (IsPageUsed(page, offset, size)) {
return true;
}
for (size_t i = page + 1; i < page_end; i++) {
if (pages[i] != 0) {
return true;
}
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
private:
void TrackPage(u64 page, u64 offset, u64 size) noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
pages[page] |= (~u64{0} & mask) << first_bit;
}
bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
const size_t mask2 = (~u64{0} & mask) << first_bit;
return (pages[page] & mask2) != 0;
}
private:
std::vector<u64> pages;
};
} // namespace VideoCommon

View File

@ -1,11 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <fstream>
#include <vector>
#include "common/assert.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/codec.h"
#include "video_core/host1x/codecs/h264.h"
@ -14,242 +10,17 @@
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
extern "C" {
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#ifdef LIBVA_FOUND
// for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h>
#endif
}
namespace Tegra {
namespace {
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
constexpr std::array PREFERRED_GPU_DECODERS = {
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
// last resort for Linux Flatpak (w/ NVIDIA)
AV_HWDEVICE_TYPE_VULKAN,
};
void AVPacketDeleter(AVPacket* ptr) {
av_packet_free(&ptr);
}
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == av_codec_ctx->pix_fmt) {
return av_codec_ctx->pix_fmt;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
av_buffer_unref(&av_codec_ctx->hw_device_ctx);
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
// List all the currently available hwcontext in ffmpeg
std::vector<AVHWDeviceType> ListSupportedContexts() {
std::vector<AVHWDeviceType> contexts{};
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
do {
current_device_type = av_hwdevice_iterate_types(current_device_type);
contexts.push_back(current_device_type);
} while (current_device_type != AV_HWDEVICE_TYPE_NONE);
return contexts;
}
} // namespace
void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr);
}
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() {
if (!initialized) {
return;
}
// Free libav memory
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
if (filters_initialized) {
avfilter_graph_free(&av_filter_graph);
}
}
bool Codec::CreateGpuAvDevice() {
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static const auto supported_contexts = ListSupportedContexts();
for (const auto& type : PREFERRED_GPU_DECODERS) {
if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
[&type](const auto& context) { return context == type; })) {
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
// Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
av_buffer_unref(&av_gpu_decoder);
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
av_hwdevice_get_type_name(type), hwdevice_res);
continue;
}
#ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) {
// we need to determine if this is an impersonated VAAPI driver
AVHWDeviceContext* hwctx =
static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
const char* vendor_name = vaQueryVendorString(vactx->display);
if (strstr(vendor_name, "VDPAU backend")) {
// VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
continue;
} else {
// according to some user testing, certain vaapi driver (Intel?) could be buggy
// so let's log the driver name which may help the developers/supporters
LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
}
}
#endif
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
if (!config) {
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
}
}
}
return false;
}
void Codec::InitializeAvCodecContext() {
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
av_codec_ctx->thread_count = 0;
av_codec_ctx->thread_type &= ~FF_THREAD_FRAME;
}
void Codec::InitializeGpuDecoder() {
if (!CreateGpuAvDevice()) {
av_buffer_unref(&av_gpu_decoder);
return;
}
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetGpuFormat;
}
void Codec::InitializeAvFilters(AVFrame* frame) {
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT({
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
});
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width,
frame->height, frame->format);
av_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(),
nullptr, av_filter_graph);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret);
return;
}
ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr,
av_filter_graph);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret);
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = av_filter_sink_ctx;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = av_filter_src_ctx;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret);
return;
}
ret = avfilter_graph_config(av_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret);
return;
}
filters_initialized = true;
}
Codec::~Codec() = default;
void Codec::Initialize() {
const AVCodecID codec = [&] {
switch (current_codec) {
case Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return AV_CODEC_ID_NONE;
}
}();
av_codec = avcodec_find_decoder(codec);
InitializeAvCodecContext();
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
InitializeGpuDecoder();
}
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
return;
}
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
initialized = true;
initialized = decode_api.Initialize(current_codec);
}
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
@ -264,14 +35,18 @@ void Codec::Decode() {
if (is_first_frame) {
Initialize();
}
if (!initialized) {
return;
}
// Assemble bitstream.
bool vp9_hidden_frame = false;
const auto& frame_data = [&]() {
size_t configuration_size = 0;
const auto packet_data = [&]() {
switch (current_codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, is_first_frame);
return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
@ -283,89 +58,35 @@ void Codec::Decode() {
return std::span<const u8>{};
}
}();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data, configuration_size)) {
return;
}
packet->data = const_cast<u8*>(frame_data.data());
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return;
}
// Only receive/store visible frames
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
AVFramePtr final_frame{nullptr, AVFrameDeleter};
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
return;
}
if (initial_frame->width == 0 || initial_frame->height == 0) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
return;
}
bool is_interlaced = initial_frame->interlaced_frame != 0;
if (av_codec_ctx->hw_device_ctx) {
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
final_frame->format = PREFERRED_GPU_FMT;
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
} else {
final_frame = std::move(initial_frame);
}
if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
if (!is_interlaced) {
av_frames.push(std::move(final_frame));
} else {
if (!filters_initialized) {
InitializeAvFilters(final_frame.get());
}
if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret) {
LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret);
return;
}
while (true) {
auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get());
// Receive output frames from decoder.
decode_api.ReceiveFrames(frames);
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF))
break;
if (ret < 0) {
LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret);
return;
}
av_frames.push(std::move(filter_frame));
}
}
while (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();
while (frames.size() > 10) {
LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
frames.pop();
}
}
AVFramePtr Codec::GetCurrentFrame() {
std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
// Sometimes VIC will request more frames than have been decoded.
// in this case, return a nullptr and don't overwrite previous frame data
if (av_frames.empty()) {
return AVFramePtr{nullptr, AVFrameDeleter};
// in this case, return a blank frame and don't overwrite previous data.
if (frames.empty()) {
return {};
}
AVFramePtr frame = std::move(av_frames.front());
av_frames.pop();
auto frame = std::move(frames.front());
frames.pop();
return frame;
}

View File

@ -4,28 +4,15 @@
#pragma once
#include <memory>
#include <optional>
#include <string_view>
#include <queue>
#include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h"
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
namespace Tegra {
void AVFrameDeleter(AVFrame* ptr);
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
namespace Decoder {
class H264;
class VP8;
@ -51,7 +38,7 @@ public:
void Decode();
/// Returns next decoded frame
[[nodiscard]] AVFramePtr GetCurrentFrame();
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
/// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
@ -60,25 +47,9 @@ public:
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
void InitializeAvCodecContext();
void InitializeAvFilters(AVFrame* frame);
void InitializeGpuDecoder();
bool CreateGpuAvDevice();
bool initialized{};
bool filters_initialized{};
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
const AVCodec* av_codec{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
AVFilterContext* av_filter_src_ctx{nullptr};
AVFilterContext* av_filter_sink_ctx{nullptr};
AVFilterGraph* av_filter_graph{nullptr};
FFmpeg::DecodeApi decode_api;
Host1x::Host1x& host1x;
const Host1x::NvdecCommon::NvdecRegisters& state;
@ -86,7 +57,7 @@ private:
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<AVFramePtr> av_frames{};
std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
};
} // namespace Tegra

View File

@ -30,7 +30,7 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
H264::~H264() = default;
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
size_t* out_configuration_size, bool is_first_frame) {
H264DecoderContext context;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
sizeof(H264DecoderContext));
@ -39,6 +39,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
if (!is_first_frame && frame_number != 0) {
frame.resize_destructive(context.stream_len);
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
*out_configuration_size = 0;
return frame;
}
@ -157,6 +158,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
frame.resize(encoded_header.size() + context.stream_len);
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
*out_configuration_size = encoded_header.size();
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
frame.data() + encoded_header.size(), context.stream_len);

View File

@ -67,6 +67,7 @@ public:
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size,
bool is_first_frame = false);
private:

View File

@ -0,0 +1,419 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
extern "C" {
#ifdef LIBVA_FOUND
// for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h>
#endif
}
namespace FFmpeg {
namespace {
constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
constexpr std::array PreferredGpuDecoders = {
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
// last resort for Linux Flatpak (w/ NVIDIA)
AV_HWDEVICE_TYPE_VULKAN,
};
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == codec_context->pix_fmt) {
return codec_context->pix_fmt;
}
}
LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
av_buffer_unref(&codec_context->hw_device_ctx);
codec_context->pix_fmt = PreferredCpuFormat;
return codec_context->pix_fmt;
}
std::string AVError(int errnum) {
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
return errbuf;
}
} // namespace
Packet::Packet(std::span<const u8> data) {
m_packet = av_packet_alloc();
m_packet->data = const_cast<u8*>(data.data());
m_packet->size = static_cast<s32>(data.size());
}
Packet::~Packet() {
av_packet_free(&m_packet);
}
Frame::Frame() {
m_frame = av_frame_alloc();
}
Frame::~Frame() {
av_frame_free(&m_frame);
}
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
const AVCodecID av_codec = [&] {
switch (codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", codec);
return AV_CODEC_ID_NONE;
}
}();
m_codec = avcodec_find_decoder(av_codec);
}
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
if (!config) {
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
av_hwdevice_get_type_name(type));
break;
}
if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
config->device_type == type) {
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
*out_pix_fmt = config->pix_fmt;
return true;
}
}
return false;
}
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
std::vector<AVHWDeviceType> types;
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
while (true) {
current_device_type = av_hwdevice_iterate_types(current_device_type);
if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
return types;
}
types.push_back(current_device_type);
}
}
HardwareContext::~HardwareContext() {
av_buffer_unref(&m_gpu_decoder);
}
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
const Decoder& decoder) {
const auto supported_types = GetSupportedDeviceTypes();
for (const auto type : PreferredGpuDecoders) {
AVPixelFormat hw_pix_fmt;
if (std::ranges::find(supported_types, type) == supported_types.end()) {
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
if (!this->InitializeWithType(type)) {
continue;
}
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
return true;
}
}
return false;
}
bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
av_buffer_unref(&m_gpu_decoder);
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
ret < 0) {
LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
AVError(ret));
return false;
}
#ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) {
// We need to determine if this is an impersonated VAAPI driver.
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
const char* vendor_name = vaQueryVendorString(vactx->display);
if (strstr(vendor_name, "VDPAU backend")) {
// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
return false;
} else {
// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
// Log the driver name just in case.
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
}
}
#endif
return true;
}
DecoderContext::DecoderContext(const Decoder& decoder) {
m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
}
DecoderContext::~DecoderContext() {
av_buffer_unref(&m_codec_context->hw_device_ctx);
avcodec_free_context(&m_codec_context);
}
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
AVPixelFormat hw_pix_fmt) {
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
m_codec_context->get_format = GetGpuFormat;
m_codec_context->pix_fmt = hw_pix_fmt;
}
bool DecoderContext::OpenContext(const Decoder& decoder) {
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
return false;
}
if (!m_codec_context->hw_device_ctx) {
LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
}
return true;
}
bool DecoderContext::SendPacket(const Packet& packet) {
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
auto dst_frame = std::make_unique<Frame>();
const auto ReceiveImpl = [&](AVFrame* frame) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
return false;
}
*out_is_interlaced = frame->interlaced_frame != 0;
return true;
};
if (m_codec_context->hw_device_ctx) {
// If we have a hardware context, make a separate frame here to receive the
// hardware result before sending it to the output.
Frame intermediate_frame;
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
return {};
}
dst_frame->SetFormat(PreferredGpuFormat);
if (const int ret =
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
} else {
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(dst_frame->GetFrame())) {
return {};
}
}
return dst_frame;
}
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT({
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
});
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
m_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
nullptr, m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
return;
}
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = m_sink_context;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = m_source_context;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
return;
}
ret = avfilter_graph_config(m_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
return;
}
m_initialized = true;
}
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
auto dst_frame = std::make_unique<Frame>();
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
return {};
}
if (ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
return {};
}
return dst_frame;
}
DeinterlaceFilter::~DeinterlaceFilter() {
avfilter_graph_free(&m_filter_graph);
}
void DecodeApi::Reset() {
m_deinterlace_filter.reset();
m_hardware_context.reset();
m_decoder_context.reset();
m_decoder.reset();
}
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
this->Reset();
m_decoder.emplace(codec);
m_decoder_context.emplace(*m_decoder);
// Enable GPU decoding if requested.
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
m_hardware_context.emplace();
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
}
// Open the decoder context.
if (!m_decoder_context->OpenContext(*m_decoder)) {
this->Reset();
return false;
}
return true;
}
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
FFmpeg::Packet packet(packet_data);
return m_decoder_context->SendPacket(packet);
}
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
// Receive raw frame from decoder.
bool is_interlaced;
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
if (!frame) {
return;
}
if (!is_interlaced) {
// If the frame is not interlaced, we can pend it now.
frame_queue.push(std::move(frame));
} else {
// Create the deinterlacer if needed.
if (!m_deinterlace_filter) {
m_deinterlace_filter.emplace(*frame);
}
// Add the frame we just received.
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
return;
}
// Pend output fields.
while (true) {
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
if (!filter_frame) {
break;
}
frame_queue.push(std::move(filter_frame));
}
}
}
} // namespace FFmpeg

View File

@ -0,0 +1,213 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <optional>
#include <span>
#include <vector>
#include <queue>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/host1x/nvdec_common.h"
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
namespace FFmpeg {
class Packet;
class Frame;
class Decoder;
class HardwareContext;
class DecoderContext;
class DeinterlaceFilter;
// Wraps an AVPacket, a container for compressed bitstream data.
class Packet {
public:
YUZU_NON_COPYABLE(Packet);
YUZU_NON_MOVEABLE(Packet);
explicit Packet(std::span<const u8> data);
~Packet();
AVPacket* GetPacket() const {
return m_packet;
}
private:
AVPacket* m_packet{};
};
// Wraps an AVFrame, a container for audio and video stream data.
class Frame {
public:
YUZU_NON_COPYABLE(Frame);
YUZU_NON_MOVEABLE(Frame);
explicit Frame();
~Frame();
int GetWidth() const {
return m_frame->width;
}
int GetHeight() const {
return m_frame->height;
}
AVPixelFormat GetPixelFormat() const {
return static_cast<AVPixelFormat>(m_frame->format);
}
int GetStride(int plane) const {
return m_frame->linesize[plane];
}
int* GetStrides() const {
return m_frame->linesize;
}
u8* GetData(int plane) const {
return m_frame->data[plane];
}
u8** GetPlanes() const {
return m_frame->data;
}
void SetFormat(int format) {
m_frame->format = format;
}
AVFrame* GetFrame() const {
return m_frame;
}
private:
AVFrame* m_frame{};
};
// Wraps an AVCodec, a type containing information about a codec.
class Decoder {
public:
YUZU_NON_COPYABLE(Decoder);
YUZU_NON_MOVEABLE(Decoder);
explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
~Decoder() = default;
bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
const AVCodec* GetCodec() const {
return m_codec;
}
private:
const AVCodec* m_codec{};
};
// Wraps AVBufferRef for an accelerated decoder.
class HardwareContext {
public:
YUZU_NON_COPYABLE(HardwareContext);
YUZU_NON_MOVEABLE(HardwareContext);
static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
explicit HardwareContext() = default;
~HardwareContext();
bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
AVBufferRef* GetBufferRef() const {
return m_gpu_decoder;
}
private:
bool InitializeWithType(AVHWDeviceType type);
AVBufferRef* m_gpu_decoder{};
};
// Wraps an AVCodecContext.
class DecoderContext {
public:
YUZU_NON_COPYABLE(DecoderContext);
YUZU_NON_MOVEABLE(DecoderContext);
explicit DecoderContext(const Decoder& decoder);
~DecoderContext();
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
bool OpenContext(const Decoder& decoder);
bool SendPacket(const Packet& packet);
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
AVCodecContext* GetCodecContext() const {
return m_codec_context;
}
private:
AVCodecContext* m_codec_context{};
};
// Wraps an AVFilterGraph.
class DeinterlaceFilter {
public:
YUZU_NON_COPYABLE(DeinterlaceFilter);
YUZU_NON_MOVEABLE(DeinterlaceFilter);
explicit DeinterlaceFilter(const Frame& frame);
~DeinterlaceFilter();
bool AddSourceFrame(const Frame& frame);
std::unique_ptr<Frame> DrainSinkFrame();
private:
AVFilterGraph* m_filter_graph{};
AVFilterContext* m_source_context{};
AVFilterContext* m_sink_context{};
bool m_initialized{};
};
class DecodeApi {
public:
YUZU_NON_COPYABLE(DecodeApi);
YUZU_NON_MOVEABLE(DecodeApi);
DecodeApi() = default;
~DecodeApi() = default;
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
void Reset();
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
private:
std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context;
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
};
} // namespace FFmpeg

View File

@ -28,7 +28,7 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) {
}
}
AVFramePtr Nvdec::GetFrame() {
std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
return codec->GetCurrentFrame();
}

View File

@ -23,7 +23,7 @@ public:
void ProcessMethod(u32 method, u32 argument);
/// Return most recently decoded frame
[[nodiscard]] AVFramePtr GetFrame();
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
private:
/// Invoke codec to decode a frame

View File

@ -82,27 +82,26 @@ void Vic::Execute() {
return;
}
const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
const auto* frame = frame_ptr.get();
auto frame = nvdec_processor->GetFrame();
if (!frame) {
return;
}
const u64 surface_width = config.surface_width_minus1 + 1;
const u64 surface_height = config.surface_height_minus1 + 1;
if (static_cast<u64>(frame->width) != surface_width ||
static_cast<u64>(frame->height) != surface_height) {
if (static_cast<u64>(frame->GetWidth()) != surface_width ||
static_cast<u64>(frame->GetHeight()) != surface_height) {
// TODO: Properly support multiple video streams with differing frame dimensions
LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
frame->width, frame->height, surface_width, surface_height);
frame->GetWidth(), frame->GetHeight(), surface_width, surface_height);
}
switch (config.pixel_format) {
case VideoPixelFormat::RGBA8:
case VideoPixelFormat::BGRA8:
case VideoPixelFormat::RGBX8:
WriteRGBFrame(frame, config);
WriteRGBFrame(std::move(frame), config);
break;
case VideoPixelFormat::YUV420:
WriteYUVFrame(frame, config);
WriteYUVFrame(std::move(frame), config);
break;
default:
UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
@ -110,10 +109,14 @@ void Vic::Execute() {
}
}
void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
const auto frame_width = frame->GetWidth();
const auto frame_height = frame->GetHeight();
const auto frame_format = frame->GetPixelFormat();
if (!scaler_ctx || frame_width != scaler_width || frame_height != scaler_height) {
const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
switch (pixel_format) {
case VideoPixelFormat::RGBA8:
@ -129,27 +132,26 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
sws_freeContext(scaler_ctx);
// Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
scaler_ctx = sws_getContext(frame->width, frame->height,
static_cast<AVPixelFormat>(frame->format), frame->width,
frame->height, target_format, 0, nullptr, nullptr, nullptr);
scaler_width = frame->width;
scaler_height = frame->height;
scaler_ctx = sws_getContext(frame_width, frame_height, frame_format, frame_width,
frame_height, target_format, 0, nullptr, nullptr, nullptr);
scaler_width = frame_width;
scaler_height = frame_height;
converted_frame_buffer.reset();
}
if (!converted_frame_buffer) {
const size_t frame_size = frame->width * frame->height * 4;
const size_t frame_size = frame_width * frame_height * 4;
converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
}
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
const std::array<int, 4> converted_stride{frame_width * 4, frame_height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
converted_stride.data());
sws_scale(scaler_ctx, frame->GetPlanes(), frame->GetStrides(), 0, frame_height,
&converted_frame_buf_addr, converted_stride.data());
// Use the minimum of surface/frame dimensions to avoid buffer overflow.
const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
const u32 width = std::min(surface_width, static_cast<u32>(frame->width));
const u32 height = std::min(surface_height, static_cast<u32>(frame->height));
const u32 width = std::min(surface_width, static_cast<u32>(frame_width));
const u32 height = std::min(surface_height, static_cast<u32>(frame_height));
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) {
// swizzle pitch linear to block linear
@ -169,23 +171,23 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
}
}
void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
const std::size_t surface_width = config.surface_width_minus1 + 1;
const std::size_t surface_height = config.surface_height_minus1 + 1;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
// Use the minimum of surface/frame dimensions to avoid buffer overflow.
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->GetWidth()));
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->GetHeight()));
const auto stride = static_cast<size_t>(frame->linesize[0]);
const auto stride = static_cast<size_t>(frame->GetStride(0));
luma_buffer.resize_destructive(aligned_width * surface_height);
chroma_buffer.resize_destructive(aligned_width * surface_height / 2);
// Populate luma buffer
const u8* luma_src = frame->data[0];
const u8* luma_src = frame->GetData(0);
for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
@ -196,16 +198,16 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
// Chroma
const std::size_t half_height = frame_height / 2;
const auto half_stride = static_cast<size_t>(frame->linesize[1]);
const auto half_stride = static_cast<size_t>(frame->GetStride(1));
switch (frame->format) {
switch (frame->GetPixelFormat()) {
case AV_PIX_FMT_YUV420P: {
// Frame from FFmpeg software
// Populate chroma buffer from both channels with interleaving.
const std::size_t half_width = frame_width / 2;
u8* chroma_buffer_data = chroma_buffer.data();
const u8* chroma_b_src = frame->data[1];
const u8* chroma_r_src = frame->data[2];
const u8* chroma_b_src = frame->GetData(1);
const u8* chroma_r_src = frame->GetData(2);
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * half_stride;
const std::size_t dst = y * aligned_width;
@ -219,7 +221,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
case AV_PIX_FMT_NV12: {
// Frame from VA-API hardware
// This is already interleaved so just copy
const u8* chroma_src = frame->data[1];
const u8* chroma_src = frame->GetData(1);
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;

View File

@ -39,9 +39,9 @@ public:
private:
void Execute();
void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
Host1x& host1x;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;

View File

@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
std::span<const VideoCommon::BufferCopy> copies, bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
}
void BufferCacheRuntime::PreCopyBarrier() {

View File

@ -30,6 +30,8 @@ public:
void MakeResident(GLenum access) noexcept;
void MarkUsage(u64 offset, u64 size) {}
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@ -66,22 +68,29 @@ public:
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
return false;
}
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
std::span<const VideoCommon::BufferCopy> copies, bool);
void PreCopyBarrier();
void PostCopyBarrier();
void Finish();
void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);

View File

@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
tracker{SizeBytes()} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@ -355,12 +355,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
std::span<const VideoCommon::BufferCopy> copies) {
if (Settings::values.disable_buffer_reorder) {
return false;
}
const bool can_use_upload_cmdbuf =
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
});
return can_use_upload_cmdbuf;
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return;
}
@ -376,9 +395,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
});
return;
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {

View File

@ -5,6 +5,7 @@
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@ -34,6 +35,18 @@ public:
return *buffer;
}
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
return tracker.IsUsed(offset, size);
}
void MarkUsage(u64 offset, u64 size) noexcept {
tracker.Track(offset, size);
}
void ResetUsageTracking() noexcept {
tracker.Reset();
}
operator VkBuffer() const noexcept {
return *buffer;
}
@ -49,6 +62,7 @@ private:
const Device* device{};
vk::Buffer buffer;
std::vector<BufferView> views;
VideoCommon::UsageTracker tracker;
};
class QuadArrayIndexBuffer;
@ -67,6 +81,8 @@ public:
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool);
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
void Finish();
u64 GetDeviceLocalMemory() const;
@ -79,12 +95,15 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void PostCopyBarrier();

View File

@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
Refresh();
}
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) {
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
if (semaphore) {
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
host_tick);
} else {
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
}
}
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
};
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) {
const VkSemaphore timeline_semaphore = *semaphore;
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
const std::array signal_values{host_tick, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const VkTimelineSemaphoreSubmitInfo timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
return device.GetGraphicsQueue().Submit(submit_info);
}
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) {
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal_semaphore,
};

View File

@ -52,14 +52,16 @@ public:
void Wait(u64 tick);
/// Submits the device graphics queue, updating the tick as necessary
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick);
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
private:
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick);
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick);
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick);
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick);
void WaitThread(std::stop_token token);

View File

@ -22,11 +22,12 @@ namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
vk::CommandBuffer upload_cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->Execute(cmdbuf, upload_cmdbuf);
command->~Command();
command = next;
}
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
// Perform the work, tracking whether the chunk was a submission
// before executing.
const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
// If the chunk was a submission, reallocate the command buffer.
if (has_submit) {
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_upload_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
}
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
upload_cmdbuf.End();
cmdbuf.End();
if (on_submit) {
@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:

View File

@ -80,7 +80,8 @@ public:
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
void RecordWithUploadBuffer(T&& command) {
if (chunk->Record(command)) {
return;
}
@ -88,6 +89,15 @@ public:
(void)chunk->Record(command);
}
template <typename T>
requires std::is_invocable_v<T, vk::CommandBuffer>
void Record(T&& c) {
this->RecordWithUploadBuffer(
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
command(cmdbuf);
});
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
@ -119,7 +129,7 @@ private:
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
Command* GetNext() const {
return next;
@ -142,8 +152,8 @@ private:
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
command(cmdbuf, upload_cmdbuf);
}
private:
@ -152,7 +162,7 @@ private:
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
template <typename T>
bool Record(T& command) {
@ -228,6 +238,7 @@ private:
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit;

View File

@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
scheduler.Record([&](vk::CommandBuffer& cmdbuf) {
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]);
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) {
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,

View File

@ -36,6 +36,10 @@ public:
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref);
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
return *stream_buffer;
}
void TickFrame();
private:

View File

@ -138,6 +138,10 @@ public:
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
}
[[nodiscard]] size_t size() const noexcept {
return values_capacity - free_list.size();
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}

View File

@ -1101,6 +1101,10 @@ public:
return &handle;
}
VkCommandBuffer operator*() const noexcept {
return handle;
}
void Begin(const VkCommandBufferBeginInfo& begin_info) const {
Check(dld->vkBeginCommandBuffer(handle, &begin_info));
}

View File

@ -252,6 +252,7 @@ file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*)
if (ENABLE_QT_TRANSLATION)
set(YUZU_QT_LANGUAGES "${PROJECT_SOURCE_DIR}/dist/languages" CACHE PATH "Path to the translation bundle for the Qt frontend")
option(GENERATE_QT_TRANSLATION "Generate en.ts as the translation source file" OFF)
option(WORKAROUND_BROKEN_LUPDATE "Run lupdate directly through CMake if Qt's convenience wrappers don't work" OFF)
# Update source TS file if enabled
if (GENERATE_QT_TRANSLATION)
@ -259,6 +260,21 @@ if (ENABLE_QT_TRANSLATION)
# these calls to qt_create_translation also creates a rule to generate en.qm which conflicts with providing english plurals
# so we have to set a OUTPUT_LOCATION so that we don't have multiple rules to generate en.qm
set_source_files_properties(${YUZU_QT_LANGUAGES}/en.ts PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/translations")
if (WORKAROUND_BROKEN_LUPDATE)
add_custom_command(OUTPUT ${YUZU_QT_LANGUAGES}/en.ts
COMMAND lupdate
-source-language en_US
-target-language en_US
${SRCS}
${UIS}
-ts ${YUZU_QT_LANGUAGES}/en.ts
DEPENDS
${SRCS}
${UIS}
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)
else()
qt_create_translation(QM_FILES
${SRCS}
${UIS}
@ -267,11 +283,28 @@ if (ENABLE_QT_TRANSLATION)
-source-language en_US
-target-language en_US
)
endif()
# Generate plurals into dist/english_plurals/generated_en.ts so it can be used to revise dist/english_plurals/en.ts
set(GENERATED_PLURALS_FILE ${PROJECT_SOURCE_DIR}/dist/english_plurals/generated_en.ts)
set_source_files_properties(${GENERATED_PLURALS_FILE} PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/plurals")
if (WORKAROUND_BROKEN_LUPDATE)
add_custom_command(OUTPUT ${GENERATED_PLURALS_FILE}
COMMAND lupdate
-source-language en_US
-target-language en_US
${SRCS}
${UIS}
-ts ${GENERATED_PLURALS_FILE}
DEPENDS
${SRCS}
${UIS}
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)
else()
qt_create_translation(QM_FILES ${SRCS} ${UIS} ${GENERATED_PLURALS_FILE} OPTIONS -pluralonly -source-language en_US -target-language en_US)
endif()
add_custom_target(translation ALL DEPENDS ${YUZU_QT_LANGUAGES}/en.ts ${GENERATED_PLURALS_FILE})
endif()

View File

@ -51,6 +51,8 @@ void ConfigureDebug::SetConfiguration() {
ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue());
ui->enable_renderdoc_hotkey->setEnabled(runtime_lock);
ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue());
ui->disable_buffer_reorder->setEnabled(runtime_lock);
ui->disable_buffer_reorder->setChecked(Settings::values.disable_buffer_reorder.GetValue());
ui->enable_graphics_debugging->setEnabled(runtime_lock);
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
ui->enable_shader_feedback->setEnabled(runtime_lock);
@ -96,6 +98,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked();
Settings::values.disable_buffer_reorder = ui->disable_buffer_reorder->isChecked();
Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();

View File

@ -271,19 +271,6 @@
</widget>
</item>
<item row="8" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="dump_macros">
<property name="enabled">
<bool>true</bool>
@ -306,59 +293,7 @@
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="dump_shaders">
<property name="enabled">
<bool>true</bool>
@ -378,6 +313,81 @@
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="10" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="disable_buffer_reorder">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;When checked, disables reording of mapped memory uploads which allows to associate uploads with specific draws. May reduce performance in some cases.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Disable Buffer Reorder</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@ -1,17 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/time_zone.h"
#include "yuzu/configuration/shared_translation.h"
#include <map>
#include <memory>
#include <tuple>
#include <utility>
#include <QCoreApplication>
#include <QWidget>
#include "common/settings.h"
#include "common/settings_enums.h"
#include "common/settings_setting.h"
#include "common/time_zone.h"
#include "yuzu/uisettings.h"
namespace ConfigurationShared {
@ -21,123 +22,135 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
const auto& tr = [parent](const char* text) -> QString { return parent->tr(text); };
#define INSERT(SETTINGS, ID, NAME, TOOLTIP) \
translations->insert(std::pair{SETTINGS::values.ID.Id(), std::pair{tr((NAME)), tr((TOOLTIP))}})
translations->insert(std::pair{SETTINGS::values.ID.Id(), std::pair{(NAME), (TOOLTIP)}})
// A setting can be ignored by giving it a blank name
// Audio
INSERT(Settings, sink_id, "Output Engine:", "");
INSERT(Settings, audio_output_device_id, "Output Device:", "");
INSERT(Settings, audio_input_device_id, "Input Device:", "");
INSERT(Settings, audio_muted, "Mute audio", "");
INSERT(Settings, volume, "Volume:", "");
INSERT(Settings, dump_audio_commands, "", "");
INSERT(UISettings, mute_when_in_background, "Mute audio when in background", "");
INSERT(Settings, sink_id, tr("Output Engine:"), QStringLiteral());
INSERT(Settings, audio_output_device_id, tr("Output Device:"), QStringLiteral());
INSERT(Settings, audio_input_device_id, tr("Input Device:"), QStringLiteral());
INSERT(Settings, audio_muted, tr("Mute audio"), QStringLiteral());
INSERT(Settings, volume, tr("Volume:"), QStringLiteral());
INSERT(Settings, dump_audio_commands, QStringLiteral(), QStringLiteral());
INSERT(UISettings, mute_when_in_background, tr("Mute audio when in background"),
QStringLiteral());
// Core
INSERT(Settings, use_multi_core, "Multicore CPU Emulation", "");
INSERT(Settings, memory_layout_mode, "Memory Layout", "");
INSERT(Settings, use_speed_limit, "", "");
INSERT(Settings, speed_limit, "Limit Speed Percent", "");
INSERT(Settings, use_multi_core, tr("Multicore CPU Emulation"), QStringLiteral());
INSERT(Settings, memory_layout_mode, tr("Memory Layout"), QStringLiteral());
INSERT(Settings, use_speed_limit, QStringLiteral(), QStringLiteral());
INSERT(Settings, speed_limit, tr("Limit Speed Percent"), QStringLiteral());
// Cpu
INSERT(Settings, cpu_accuracy, "Accuracy:", "");
INSERT(Settings, cpu_accuracy, tr("Accuracy:"), QStringLiteral());
// Cpu Debug
// Cpu Unsafe
INSERT(Settings, cpuopt_unsafe_unfuse_fma,
"Unfuse FMA (improve performance on CPUs without FMA)",
"This option improves speed by reducing accuracy of fused-multiply-add instructions on "
"CPUs without native FMA support.");
INSERT(Settings, cpuopt_unsafe_reduce_fp_error, "Faster FRSQRTE and FRECPE",
"This option improves the speed of some approximate floating-point functions by using "
"less accurate native approximations.");
INSERT(Settings, cpuopt_unsafe_ignore_standard_fpcr, "Faster ASIMD instructions (32 bits only)",
"This option improves the speed of 32 bits ASIMD floating-point functions by running "
"with incorrect rounding modes.");
INSERT(Settings, cpuopt_unsafe_inaccurate_nan, "Inaccurate NaN handling",
"This option improves speed by removing NaN checking. Please note this also reduces "
"accuracy of certain floating-point instructions.");
INSERT(
Settings, cpuopt_unsafe_fastmem_check, "Disable address space checks",
"This option improves speed by eliminating a safety check before every memory read/write "
"in guest. Disabling it may allow a game to read/write the emulator's memory.");
INSERT(Settings, cpuopt_unsafe_ignore_global_monitor, "Ignore global monitor",
"This option improves speed by relying only on the semantics of cmpxchg to ensure "
Settings, cpuopt_unsafe_unfuse_fma,
tr("Unfuse FMA (improve performance on CPUs without FMA)"),
tr("This option improves speed by reducing accuracy of fused-multiply-add instructions on "
"CPUs without native FMA support."));
INSERT(
Settings, cpuopt_unsafe_reduce_fp_error, tr("Faster FRSQRTE and FRECPE"),
tr("This option improves the speed of some approximate floating-point functions by using "
"less accurate native approximations."));
INSERT(Settings, cpuopt_unsafe_ignore_standard_fpcr,
tr("Faster ASIMD instructions (32 bits only)"),
tr("This option improves the speed of 32 bits ASIMD floating-point functions by running "
"with incorrect rounding modes."));
INSERT(Settings, cpuopt_unsafe_inaccurate_nan, tr("Inaccurate NaN handling"),
tr("This option improves speed by removing NaN checking. Please note this also reduces "
"accuracy of certain floating-point instructions."));
INSERT(Settings, cpuopt_unsafe_fastmem_check, tr("Disable address space checks"),
tr("This option improves speed by eliminating a safety check before every memory "
"read/write "
"in guest. Disabling it may allow a game to read/write the emulator's memory."));
INSERT(
Settings, cpuopt_unsafe_ignore_global_monitor, tr("Ignore global monitor"),
tr("This option improves speed by relying only on the semantics of cmpxchg to ensure "
"safety of exclusive access instructions. Please note this may result in deadlocks and "
"other race conditions.");
"other race conditions."));
// Renderer
INSERT(Settings, renderer_backend, "API:", "");
INSERT(Settings, vulkan_device, "Device:", "");
INSERT(Settings, shader_backend, "Shader Backend:", "");
INSERT(Settings, resolution_setup, "Resolution:", "");
INSERT(Settings, scaling_filter, "Window Adapting Filter:", "");
INSERT(Settings, fsr_sharpening_slider, "FSR Sharpness:", "");
INSERT(Settings, anti_aliasing, "Anti-Aliasing Method:", "");
INSERT(Settings, fullscreen_mode, "Fullscreen Mode:", "");
INSERT(Settings, aspect_ratio, "Aspect Ratio:", "");
INSERT(Settings, use_disk_shader_cache, "Use disk pipeline cache", "");
INSERT(Settings, use_asynchronous_gpu_emulation, "Use asynchronous GPU emulation", "");
INSERT(Settings, nvdec_emulation, "NVDEC emulation:", "");
INSERT(Settings, accelerate_astc, "ASTC Decoding Method:", "");
INSERT(Settings, astc_recompression, "ASTC Recompression Method:", "");
INSERT(Settings, vsync_mode, "VSync Mode:",
"FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen "
INSERT(Settings, renderer_backend, tr("API:"), QStringLiteral());
INSERT(Settings, vulkan_device, tr("Device:"), QStringLiteral());
INSERT(Settings, shader_backend, tr("Shader Backend:"), QStringLiteral());
INSERT(Settings, resolution_setup, tr("Resolution:"), QStringLiteral());
INSERT(Settings, scaling_filter, tr("Window Adapting Filter:"), QStringLiteral());
INSERT(Settings, fsr_sharpening_slider, tr("FSR Sharpness:"), QStringLiteral());
INSERT(Settings, anti_aliasing, tr("Anti-Aliasing Method:"), QStringLiteral());
INSERT(Settings, fullscreen_mode, tr("Fullscreen Mode:"), QStringLiteral());
INSERT(Settings, aspect_ratio, tr("Aspect Ratio:"), QStringLiteral());
INSERT(Settings, use_disk_shader_cache, tr("Use disk pipeline cache"), QStringLiteral());
INSERT(Settings, use_asynchronous_gpu_emulation, tr("Use asynchronous GPU emulation"),
QStringLiteral());
INSERT(Settings, nvdec_emulation, tr("NVDEC emulation:"), QStringLiteral());
INSERT(Settings, accelerate_astc, tr("ASTC Decoding Method:"), QStringLiteral());
INSERT(Settings, astc_recompression, tr("ASTC Recompression Method:"), QStringLiteral());
INSERT(
Settings, vsync_mode, tr("VSync Mode:"),
tr("FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen "
"refresh rate.\nFIFO Relaxed is similar to FIFO but allows tearing as it recovers from "
"a slow down.\nMailbox can have lower latency than FIFO and does not tear but may drop "
"frames.\nImmediate (no synchronization) just presents whatever is available and can "
"exhibit tearing.");
INSERT(Settings, bg_red, "", "");
INSERT(Settings, bg_green, "", "");
INSERT(Settings, bg_blue, "", "");
"exhibit tearing."));
INSERT(Settings, bg_red, QStringLiteral(), QStringLiteral());
INSERT(Settings, bg_green, QStringLiteral(), QStringLiteral());
INSERT(Settings, bg_blue, QStringLiteral(), QStringLiteral());
// Renderer (Advanced Graphics)
INSERT(Settings, async_presentation, "Enable asynchronous presentation (Vulkan only)", "");
INSERT(Settings, renderer_force_max_clock, "Force maximum clocks (Vulkan only)",
"Runs work in the background while waiting for graphics commands to keep the GPU from "
"lowering its clock speed.");
INSERT(Settings, max_anisotropy, "Anisotropic Filtering:", "");
INSERT(Settings, gpu_accuracy, "Accuracy Level:", "");
INSERT(Settings, use_asynchronous_shaders, "Use asynchronous shader building (Hack)",
"Enables asynchronous shader compilation, which may reduce shader stutter. This feature "
"is experimental.");
INSERT(Settings, use_fast_gpu_time, "Use Fast GPU Time (Hack)",
"Enables Fast GPU Time. This option will force most games to run at their highest "
"native resolution.");
INSERT(Settings, use_vulkan_driver_pipeline_cache, "Use Vulkan pipeline cache",
"Enables GPU vendor-specific pipeline cache. This option can improve shader loading "
INSERT(Settings, async_presentation, tr("Enable asynchronous presentation (Vulkan only)"),
QStringLiteral());
INSERT(
Settings, renderer_force_max_clock, tr("Force maximum clocks (Vulkan only)"),
tr("Runs work in the background while waiting for graphics commands to keep the GPU from "
"lowering its clock speed."));
INSERT(Settings, max_anisotropy, tr("Anisotropic Filtering:"), QStringLiteral());
INSERT(Settings, gpu_accuracy, tr("Accuracy Level:"), QStringLiteral());
INSERT(
Settings, use_asynchronous_shaders, tr("Use asynchronous shader building (Hack)"),
tr("Enables asynchronous shader compilation, which may reduce shader stutter. This feature "
"is experimental."));
INSERT(Settings, use_fast_gpu_time, tr("Use Fast GPU Time (Hack)"),
tr("Enables Fast GPU Time. This option will force most games to run at their highest "
"native resolution."));
INSERT(Settings, use_vulkan_driver_pipeline_cache, tr("Use Vulkan pipeline cache"),
tr("Enables GPU vendor-specific pipeline cache. This option can improve shader loading "
"time significantly in cases where the Vulkan driver does not store pipeline cache "
"files internally.");
INSERT(Settings, enable_compute_pipelines, "Enable Compute Pipelines (Intel Vulkan Only)",
"Enable compute pipelines, required by some games.\nThis setting only exists for Intel "
"files internally."));
INSERT(
Settings, enable_compute_pipelines, tr("Enable Compute Pipelines (Intel Vulkan Only)"),
tr("Enable compute pipelines, required by some games.\nThis setting only exists for Intel "
"proprietary drivers, and may crash if enabled.\nCompute pipelines are always enabled "
"on all other drivers.");
INSERT(Settings, use_reactive_flushing, "Enable Reactive Flushing",
"Uses reactive flushing instead of predictive flushing, allowing more accurate memory "
"syncing.");
INSERT(Settings, use_video_framerate, "Sync to framerate of video playback",
"Run the game at normal speed during video playback, even when the framerate is "
"unlocked.");
INSERT(Settings, barrier_feedback_loops, "Barrier feedback loops",
"Improves rendering of transparency effects in specific games.");
"on all other drivers."));
INSERT(
Settings, use_reactive_flushing, tr("Enable Reactive Flushing"),
tr("Uses reactive flushing instead of predictive flushing, allowing more accurate memory "
"syncing."));
INSERT(Settings, use_video_framerate, tr("Sync to framerate of video playback"),
tr("Run the game at normal speed during video playback, even when the framerate is "
"unlocked."));
INSERT(Settings, barrier_feedback_loops, tr("Barrier feedback loops"),
tr("Improves rendering of transparency effects in specific games."));
// Renderer (Debug)
// System
INSERT(Settings, rng_seed, "RNG Seed", "");
INSERT(Settings, rng_seed_enabled, "", "");
INSERT(Settings, device_name, "Device Name", "");
INSERT(Settings, custom_rtc, "Custom RTC", "");
INSERT(Settings, custom_rtc_enabled, "", "");
INSERT(Settings, language_index,
"Language:", "Note: this can be overridden when region setting is auto-select");
INSERT(Settings, region_index, "Region:", "");
INSERT(Settings, time_zone_index, "Time Zone:", "");
INSERT(Settings, sound_index, "Sound Output Mode:", "");
INSERT(Settings, use_docked_mode, "Console Mode:", "");
INSERT(Settings, current_user, "", "");
INSERT(Settings, rng_seed, tr("RNG Seed"), QStringLiteral());
INSERT(Settings, rng_seed_enabled, QStringLiteral(), QStringLiteral());
INSERT(Settings, device_name, tr("Device Name"), QStringLiteral());
INSERT(Settings, custom_rtc, tr("Custom RTC"), QStringLiteral());
INSERT(Settings, custom_rtc_enabled, QStringLiteral(), QStringLiteral());
INSERT(Settings, language_index, tr("Language:"),
tr("Note: this can be overridden when region setting is auto-select"));
INSERT(Settings, region_index, tr("Region:"), QStringLiteral());
INSERT(Settings, time_zone_index, tr("Time Zone:"), QStringLiteral());
INSERT(Settings, sound_index, tr("Sound Output Mode:"), QStringLiteral());
INSERT(Settings, use_docked_mode, tr("Console Mode:"), QStringLiteral());
INSERT(Settings, current_user, QStringLiteral(), QStringLiteral());
// Controls
@ -154,11 +167,14 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
// Ui
// Ui General
INSERT(UISettings, select_user_on_boot, "Prompt for user on game boot", "");
INSERT(UISettings, pause_when_in_background, "Pause emulation when in background", "");
INSERT(UISettings, confirm_before_stopping, "Confirm before stopping emulation", "");
INSERT(UISettings, hide_mouse, "Hide mouse on inactivity", "");
INSERT(UISettings, controller_applet_disabled, "Disable controller applet", "");
INSERT(UISettings, select_user_on_boot, tr("Prompt for user on game boot"), QStringLiteral());
INSERT(UISettings, pause_when_in_background, tr("Pause emulation when in background"),
QStringLiteral());
INSERT(UISettings, confirm_before_stopping, tr("Confirm before stopping emulation"),
QStringLiteral());
INSERT(UISettings, hide_mouse, tr("Hide mouse on inactivity"), QStringLiteral());
INSERT(UISettings, controller_applet_disabled, tr("Disable controller applet"),
QStringLiteral());
// Ui Debugging
@ -178,140 +194,141 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
return parent->tr(text, context);
};
#define PAIR(ENUM, VALUE, TRANSLATION) \
{ static_cast<u32>(Settings::ENUM::VALUE), tr(TRANSLATION) }
#define CTX_PAIR(ENUM, VALUE, TRANSLATION, CONTEXT) \
{ static_cast<u32>(Settings::ENUM::VALUE), tr(TRANSLATION, CONTEXT) }
#define PAIR(ENUM, VALUE, TRANSLATION) {static_cast<u32>(Settings::ENUM::VALUE), (TRANSLATION)}
// Intentionally skipping VSyncMode to let the UI fill that one out
translations->insert({Settings::EnumMetadata<Settings::AstcDecodeMode>::Index(),
{
PAIR(AstcDecodeMode, Cpu, "CPU"),
PAIR(AstcDecodeMode, Gpu, "GPU"),
PAIR(AstcDecodeMode, CpuAsynchronous, "CPU Asynchronous"),
PAIR(AstcDecodeMode, Cpu, tr("CPU")),
PAIR(AstcDecodeMode, Gpu, tr("GPU")),
PAIR(AstcDecodeMode, CpuAsynchronous, tr("CPU Asynchronous")),
}});
translations->insert({Settings::EnumMetadata<Settings::AstcRecompression>::Index(),
translations->insert(
{Settings::EnumMetadata<Settings::AstcRecompression>::Index(),
{
PAIR(AstcRecompression, Uncompressed, "Uncompressed (Best quality)"),
PAIR(AstcRecompression, Bc1, "BC1 (Low quality)"),
PAIR(AstcRecompression, Bc3, "BC3 (Medium quality)"),
PAIR(AstcRecompression, Uncompressed, tr("Uncompressed (Best quality)")),
PAIR(AstcRecompression, Bc1, tr("BC1 (Low quality)")),
PAIR(AstcRecompression, Bc3, tr("BC3 (Medium quality)")),
}});
translations->insert({Settings::EnumMetadata<Settings::RendererBackend>::Index(),
{
#ifdef HAS_OPENGL
PAIR(RendererBackend, OpenGL, "OpenGL"),
PAIR(RendererBackend, OpenGL, tr("OpenGL")),
#endif
PAIR(RendererBackend, Vulkan, "Vulkan"),
PAIR(RendererBackend, Null, "Null"),
PAIR(RendererBackend, Vulkan, tr("Vulkan")),
PAIR(RendererBackend, Null, tr("Null")),
}});
translations->insert({Settings::EnumMetadata<Settings::ShaderBackend>::Index(),
translations->insert(
{Settings::EnumMetadata<Settings::ShaderBackend>::Index(),
{
PAIR(ShaderBackend, Glsl, "GLSL"),
PAIR(ShaderBackend, Glasm, "GLASM (Assembly Shaders, NVIDIA Only)"),
PAIR(ShaderBackend, SpirV, "SPIR-V (Experimental, Mesa Only)"),
PAIR(ShaderBackend, Glsl, tr("GLSL")),
PAIR(ShaderBackend, Glasm, tr("GLASM (Assembly Shaders, NVIDIA Only)")),
PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, Mesa Only)")),
}});
translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
{
PAIR(GpuAccuracy, Normal, "Normal"),
PAIR(GpuAccuracy, High, "High"),
PAIR(GpuAccuracy, Extreme, "Extreme"),
PAIR(GpuAccuracy, Normal, tr("Normal")),
PAIR(GpuAccuracy, High, tr("High")),
PAIR(GpuAccuracy, Extreme, tr("Extreme")),
}});
translations->insert({Settings::EnumMetadata<Settings::CpuAccuracy>::Index(),
translations->insert(
{Settings::EnumMetadata<Settings::CpuAccuracy>::Index(),
{
PAIR(CpuAccuracy, Auto, "Auto"),
PAIR(CpuAccuracy, Accurate, "Accurate"),
PAIR(CpuAccuracy, Unsafe, "Unsafe"),
PAIR(CpuAccuracy, Paranoid, "Paranoid (disables most optimizations)"),
PAIR(CpuAccuracy, Auto, tr("Auto")),
PAIR(CpuAccuracy, Accurate, tr("Accurate")),
PAIR(CpuAccuracy, Unsafe, tr("Unsafe")),
PAIR(CpuAccuracy, Paranoid, tr("Paranoid (disables most optimizations)")),
}});
translations->insert({Settings::EnumMetadata<Settings::FullscreenMode>::Index(),
{
PAIR(FullscreenMode, Borderless, "Borderless Windowed"),
PAIR(FullscreenMode, Exclusive, "Exclusive Fullscreen"),
PAIR(FullscreenMode, Borderless, tr("Borderless Windowed")),
PAIR(FullscreenMode, Exclusive, tr("Exclusive Fullscreen")),
}});
translations->insert({Settings::EnumMetadata<Settings::NvdecEmulation>::Index(),
{
PAIR(NvdecEmulation, Off, "No Video Output"),
PAIR(NvdecEmulation, Cpu, "CPU Video Decoding"),
PAIR(NvdecEmulation, Gpu, "GPU Video Decoding (Default)"),
PAIR(NvdecEmulation, Off, tr("No Video Output")),
PAIR(NvdecEmulation, Cpu, tr("CPU Video Decoding")),
PAIR(NvdecEmulation, Gpu, tr("GPU Video Decoding (Default)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ResolutionSetup>::Index(),
translations->insert(
{Settings::EnumMetadata<Settings::ResolutionSetup>::Index(),
{
PAIR(ResolutionSetup, Res1_2X, "0.5X (360p/540p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res3_4X, "0.75X (540p/810p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res1X, "1X (720p/1080p)"),
PAIR(ResolutionSetup, Res3_2X, "1.5X (1080p/1620p) [EXPERIMENTAL]"),
PAIR(ResolutionSetup, Res2X, "2X (1440p/2160p)"),
PAIR(ResolutionSetup, Res3X, "3X (2160p/3240p)"),
PAIR(ResolutionSetup, Res4X, "4X (2880p/4320p)"),
PAIR(ResolutionSetup, Res5X, "5X (3600p/5400p)"),
PAIR(ResolutionSetup, Res6X, "6X (4320p/6480p)"),
PAIR(ResolutionSetup, Res7X, "7X (5040p/7560p)"),
PAIR(ResolutionSetup, Res8X, "8X (5760p/8640p)"),
PAIR(ResolutionSetup, Res1_2X, tr("0.5X (360p/540p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res3_4X, tr("0.75X (540p/810p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res1X, tr("1X (720p/1080p)")),
PAIR(ResolutionSetup, Res3_2X, tr("1.5X (1080p/1620p) [EXPERIMENTAL]")),
PAIR(ResolutionSetup, Res2X, tr("2X (1440p/2160p)")),
PAIR(ResolutionSetup, Res3X, tr("3X (2160p/3240p)")),
PAIR(ResolutionSetup, Res4X, tr("4X (2880p/4320p)")),
PAIR(ResolutionSetup, Res5X, tr("5X (3600p/5400p)")),
PAIR(ResolutionSetup, Res6X, tr("6X (4320p/6480p)")),
PAIR(ResolutionSetup, Res7X, tr("7X (5040p/7560p)")),
PAIR(ResolutionSetup, Res8X, tr("8X (5760p/8640p)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ScalingFilter>::Index(),
{
PAIR(ScalingFilter, NearestNeighbor, "Nearest Neighbor"),
PAIR(ScalingFilter, Bilinear, "Bilinear"),
PAIR(ScalingFilter, Bicubic, "Bicubic"),
PAIR(ScalingFilter, Gaussian, "Gaussian"),
PAIR(ScalingFilter, ScaleForce, "ScaleForce"),
PAIR(ScalingFilter, Fsr, "AMD FidelityFX™ Super Resolution"),
PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™ Super Resolution")),
}});
translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(),
{
PAIR(AntiAliasing, None, "None"),
PAIR(AntiAliasing, Fxaa, "FXAA"),
PAIR(AntiAliasing, Smaa, "SMAA"),
PAIR(AntiAliasing, None, tr("None")),
PAIR(AntiAliasing, Fxaa, tr("FXAA")),
PAIR(AntiAliasing, Smaa, tr("SMAA")),
}});
translations->insert({Settings::EnumMetadata<Settings::AspectRatio>::Index(),
{
PAIR(AspectRatio, R16_9, "Default (16:9)"),
PAIR(AspectRatio, R4_3, "Force 4:3"),
PAIR(AspectRatio, R21_9, "Force 21:9"),
PAIR(AspectRatio, R16_10, "Force 16:10"),
PAIR(AspectRatio, Stretch, "Stretch to Window"),
PAIR(AspectRatio, R16_9, tr("Default (16:9)")),
PAIR(AspectRatio, R4_3, tr("Force 4:3")),
PAIR(AspectRatio, R21_9, tr("Force 21:9")),
PAIR(AspectRatio, R16_10, tr("Force 16:10")),
PAIR(AspectRatio, Stretch, tr("Stretch to Window")),
}});
translations->insert({Settings::EnumMetadata<Settings::AnisotropyMode>::Index(),
{
PAIR(AnisotropyMode, Automatic, "Automatic"),
PAIR(AnisotropyMode, Default, "Default"),
PAIR(AnisotropyMode, X2, "2x"),
PAIR(AnisotropyMode, X4, "4x"),
PAIR(AnisotropyMode, X8, "8x"),
PAIR(AnisotropyMode, X16, "16x"),
PAIR(AnisotropyMode, Automatic, tr("Automatic")),
PAIR(AnisotropyMode, Default, tr("Default")),
PAIR(AnisotropyMode, X2, tr("2x")),
PAIR(AnisotropyMode, X4, tr("4x")),
PAIR(AnisotropyMode, X8, tr("8x")),
PAIR(AnisotropyMode, X16, tr("16x")),
}});
translations->insert(
{Settings::EnumMetadata<Settings::Language>::Index(),
{
PAIR(Language, Japanese, "Japanese (日本語)"),
PAIR(Language, EnglishAmerican, "American English"),
PAIR(Language, French, "French (français)"),
PAIR(Language, German, "German (Deutsch)"),
PAIR(Language, Italian, "Italian (italiano)"),
PAIR(Language, Spanish, "Spanish (español)"),
PAIR(Language, Chinese, "Chinese"),
PAIR(Language, Korean, "Korean (한국어)"),
PAIR(Language, Dutch, "Dutch (Nederlands)"),
PAIR(Language, Portuguese, "Portuguese (português)"),
PAIR(Language, Russian, "Russian (Русский)"),
PAIR(Language, Taiwanese, "Taiwanese"),
PAIR(Language, EnglishBritish, "British English"),
PAIR(Language, FrenchCanadian, "Canadian French"),
PAIR(Language, SpanishLatin, "Latin American Spanish"),
PAIR(Language, ChineseSimplified, "Simplified Chinese"),
PAIR(Language, ChineseTraditional, "Traditional Chinese (正體中文)"),
PAIR(Language, PortugueseBrazilian, "Brazilian Portuguese (português do Brasil)"),
PAIR(Language, Japanese, tr("Japanese (日本語)")),
PAIR(Language, EnglishAmerican, tr("American English")),
PAIR(Language, French, tr("French (français)")),
PAIR(Language, German, tr("German (Deutsch)")),
PAIR(Language, Italian, tr("Italian (italiano)")),
PAIR(Language, Spanish, tr("Spanish (español)")),
PAIR(Language, Chinese, tr("Chinese")),
PAIR(Language, Korean, tr("Korean (한국어)")),
PAIR(Language, Dutch, tr("Dutch (Nederlands)")),
PAIR(Language, Portuguese, tr("Portuguese (português)")),
PAIR(Language, Russian, tr("Russian (Русский)")),
PAIR(Language, Taiwanese, tr("Taiwanese")),
PAIR(Language, EnglishBritish, tr("British English")),
PAIR(Language, FrenchCanadian, tr("Canadian French")),
PAIR(Language, SpanishLatin, tr("Latin American Spanish")),
PAIR(Language, ChineseSimplified, tr("Simplified Chinese")),
PAIR(Language, ChineseTraditional, tr("Traditional Chinese (正體中文)")),
PAIR(Language, PortugueseBrazilian, tr("Brazilian Portuguese (português do Brasil)")),
}});
translations->insert({Settings::EnumMetadata<Settings::Region>::Index(),
{
PAIR(Region, Japan, "Japan"),
PAIR(Region, Usa, "USA"),
PAIR(Region, Europe, "Europe"),
PAIR(Region, Australia, "Australia"),
PAIR(Region, China, "China"),
PAIR(Region, Korea, "Korea"),
PAIR(Region, Taiwan, "Taiwan"),
PAIR(Region, Japan, tr("Japan")),
PAIR(Region, Usa, tr("USA")),
PAIR(Region, Europe, tr("Europe")),
PAIR(Region, Australia, tr("Australia")),
PAIR(Region, China, tr("China")),
PAIR(Region, Korea, tr("Korea")),
PAIR(Region, Taiwan, tr("Taiwan")),
}});
translations->insert(
{Settings::EnumMetadata<Settings::TimeZone>::Index(),
@ -323,72 +340,74 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
{static_cast<u32>(Settings::TimeZone::Default),
tr("Default (%1)", "Default time zone")
.arg(QString::fromStdString(Common::TimeZone::GetDefaultTimeZone()))},
PAIR(TimeZone, Cet, "CET"),
PAIR(TimeZone, Cst6Cdt, "CST6CDT"),
PAIR(TimeZone, Cuba, "Cuba"),
PAIR(TimeZone, Eet, "EET"),
PAIR(TimeZone, Egypt, "Egypt"),
PAIR(TimeZone, Eire, "Eire"),
PAIR(TimeZone, Est, "EST"),
PAIR(TimeZone, Est5Edt, "EST5EDT"),
PAIR(TimeZone, Gb, "GB"),
PAIR(TimeZone, GbEire, "GB-Eire"),
PAIR(TimeZone, Gmt, "GMT"),
PAIR(TimeZone, GmtPlusZero, "GMT+0"),
PAIR(TimeZone, GmtMinusZero, "GMT-0"),
PAIR(TimeZone, GmtZero, "GMT0"),
PAIR(TimeZone, Greenwich, "Greenwich"),
PAIR(TimeZone, Hongkong, "Hongkong"),
PAIR(TimeZone, Hst, "HST"),
PAIR(TimeZone, Iceland, "Iceland"),
PAIR(TimeZone, Iran, "Iran"),
PAIR(TimeZone, Israel, "Israel"),
PAIR(TimeZone, Jamaica, "Jamaica"),
PAIR(TimeZone, Japan, "Japan"),
PAIR(TimeZone, Kwajalein, "Kwajalein"),
PAIR(TimeZone, Libya, "Libya"),
PAIR(TimeZone, Met, "MET"),
PAIR(TimeZone, Mst, "MST"),
PAIR(TimeZone, Mst7Mdt, "MST7MDT"),
PAIR(TimeZone, Navajo, "Navajo"),
PAIR(TimeZone, Nz, "NZ"),
PAIR(TimeZone, NzChat, "NZ-CHAT"),
PAIR(TimeZone, Poland, "Poland"),
PAIR(TimeZone, Portugal, "Portugal"),
PAIR(TimeZone, Prc, "PRC"),
PAIR(TimeZone, Pst8Pdt, "PST8PDT"),
PAIR(TimeZone, Roc, "ROC"),
PAIR(TimeZone, Rok, "ROK"),
PAIR(TimeZone, Singapore, "Singapore"),
PAIR(TimeZone, Turkey, "Turkey"),
PAIR(TimeZone, Uct, "UCT"),
PAIR(TimeZone, Universal, "Universal"),
PAIR(TimeZone, Utc, "UTC"),
PAIR(TimeZone, WSu, "W-SU"),
PAIR(TimeZone, Wet, "WET"),
PAIR(TimeZone, Zulu, "Zulu"),
PAIR(TimeZone, Cet, tr("CET")),
PAIR(TimeZone, Cst6Cdt, tr("CST6CDT")),
PAIR(TimeZone, Cuba, tr("Cuba")),
PAIR(TimeZone, Eet, tr("EET")),
PAIR(TimeZone, Egypt, tr("Egypt")),
PAIR(TimeZone, Eire, tr("Eire")),
PAIR(TimeZone, Est, tr("EST")),
PAIR(TimeZone, Est5Edt, tr("EST5EDT")),
PAIR(TimeZone, Gb, tr("GB")),
PAIR(TimeZone, GbEire, tr("GB-Eire")),
PAIR(TimeZone, Gmt, tr("GMT")),
PAIR(TimeZone, GmtPlusZero, tr("GMT+0")),
PAIR(TimeZone, GmtMinusZero, tr("GMT-0")),
PAIR(TimeZone, GmtZero, tr("GMT0")),
PAIR(TimeZone, Greenwich, tr("Greenwich")),
PAIR(TimeZone, Hongkong, tr("Hongkong")),
PAIR(TimeZone, Hst, tr("HST")),
PAIR(TimeZone, Iceland, tr("Iceland")),
PAIR(TimeZone, Iran, tr("Iran")),
PAIR(TimeZone, Israel, tr("Israel")),
PAIR(TimeZone, Jamaica, tr("Jamaica")),
PAIR(TimeZone, Japan, tr("Japan")),
PAIR(TimeZone, Kwajalein, tr("Kwajalein")),
PAIR(TimeZone, Libya, tr("Libya")),
PAIR(TimeZone, Met, tr("MET")),
PAIR(TimeZone, Mst, tr("MST")),
PAIR(TimeZone, Mst7Mdt, tr("MST7MDT")),
PAIR(TimeZone, Navajo, tr("Navajo")),
PAIR(TimeZone, Nz, tr("NZ")),
PAIR(TimeZone, NzChat, tr("NZ-CHAT")),
PAIR(TimeZone, Poland, tr("Poland")),
PAIR(TimeZone, Portugal, tr("Portugal")),
PAIR(TimeZone, Prc, tr("PRC")),
PAIR(TimeZone, Pst8Pdt, tr("PST8PDT")),
PAIR(TimeZone, Roc, tr("ROC")),
PAIR(TimeZone, Rok, tr("ROK")),
PAIR(TimeZone, Singapore, tr("Singapore")),
PAIR(TimeZone, Turkey, tr("Turkey")),
PAIR(TimeZone, Uct, tr("UCT")),
PAIR(TimeZone, Universal, tr("Universal")),
PAIR(TimeZone, Utc, tr("UTC")),
PAIR(TimeZone, WSu, tr("W-SU")),
PAIR(TimeZone, Wet, tr("WET")),
PAIR(TimeZone, Zulu, tr("Zulu")),
}});
translations->insert({Settings::EnumMetadata<Settings::AudioMode>::Index(),
{
PAIR(AudioMode, Mono, "Mono"),
PAIR(AudioMode, Stereo, "Stereo"),
PAIR(AudioMode, Surround, "Surround"),
PAIR(AudioMode, Mono, tr("Mono")),
PAIR(AudioMode, Stereo, tr("Stereo")),
PAIR(AudioMode, Surround, tr("Surround")),
}});
translations->insert({Settings::EnumMetadata<Settings::MemoryLayout>::Index(),
{
PAIR(MemoryLayout, Memory_4Gb, "4GB DRAM (Default)"),
PAIR(MemoryLayout, Memory_6Gb, "6GB DRAM (Unsafe)"),
PAIR(MemoryLayout, Memory_8Gb, "8GB DRAM (Unsafe)"),
PAIR(MemoryLayout, Memory_4Gb, tr("4GB DRAM (Default)")),
PAIR(MemoryLayout, Memory_6Gb, tr("6GB DRAM (Unsafe)")),
PAIR(MemoryLayout, Memory_8Gb, tr("8GB DRAM (Unsafe)")),
}});
translations->insert({Settings::EnumMetadata<Settings::ConsoleMode>::Index(),
{
PAIR(ConsoleMode, Docked, tr("Docked")),
PAIR(ConsoleMode, Handheld, tr("Handheld")),
}});
translations->insert(
{Settings::EnumMetadata<Settings::ConsoleMode>::Index(),
{PAIR(ConsoleMode, Docked, "Docked"), PAIR(ConsoleMode, Handheld, "Handheld")}});
translations->insert(
{Settings::EnumMetadata<Settings::ConfirmStop>::Index(),
{
PAIR(ConfirmStop, Ask_Always, "Always ask (Default)"),
PAIR(ConfirmStop, Ask_Based_On_Game, "Only if game specifies not to stop"),
PAIR(ConfirmStop, Ask_Never, "Never ask"),
PAIR(ConfirmStop, Ask_Always, tr("Always ask (Default)")),
PAIR(ConfirmStop, Ask_Based_On_Game, tr("Only if game specifies not to stop")),
PAIR(ConfirmStop, Ask_Never, tr("Never ask")),
}});
#undef PAIR

View File

@ -194,7 +194,7 @@ QWidget* Widget::CreateRadioGroup(std::function<std::string()>& serializer,
return group;
}
const auto get_selected = [=]() -> int {
const auto get_selected = [this]() -> int {
for (const auto& [id, button] : radio_buttons) {
if (button->isChecked()) {
return id;
@ -203,7 +203,7 @@ QWidget* Widget::CreateRadioGroup(std::function<std::string()>& serializer,
return -1;
};
const auto set_index = [=](u32 value) {
const auto set_index = [this](u32 value) {
for (const auto& [id, button] : radio_buttons) {
button->setChecked(id == value);
}

View File

@ -479,6 +479,6 @@ void GameListWorker::run() {
}
}
RecordEvent([=](GameList* game_list) { game_list->DonePopulating(watch_list); });
RecordEvent([this](GameList* game_list) { game_list->DonePopulating(watch_list); });
processing_completed.Set();
}