arm: Implement native code execution backend
This commit is contained in:
		
							
								
								
									
										8
									
								
								externals/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								externals/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							| @@ -20,16 +20,16 @@ if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak) | |||||||
| endif() | endif() | ||||||
|  |  | ||||||
| # Dynarmic | # Dynarmic | ||||||
|  | if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut) | ||||||
|  |     add_subdirectory(oaknut) | ||||||
|  | endif() | ||||||
|  |  | ||||||
| if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) | if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) | ||||||
|     set(DYNARMIC_IGNORE_ASSERTS ON) |     set(DYNARMIC_IGNORE_ASSERTS ON) | ||||||
|     add_subdirectory(dynarmic) |     add_subdirectory(dynarmic) | ||||||
|     add_library(dynarmic::dynarmic ALIAS dynarmic) |     add_library(dynarmic::dynarmic ALIAS dynarmic) | ||||||
| endif() | endif() | ||||||
|  |  | ||||||
| if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut) |  | ||||||
|     add_subdirectory(oaknut) |  | ||||||
| endif() |  | ||||||
|  |  | ||||||
| # getopt | # getopt | ||||||
| if (MSVC) | if (MSVC) | ||||||
|     add_subdirectory(getopt) |     add_subdirectory(getopt) | ||||||
|   | |||||||
| @@ -189,6 +189,11 @@ public: | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void EnableDirectMappedAddress() { | ||||||
|  |         // TODO | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const size_t backing_size; ///< Size of the backing memory in bytes |     const size_t backing_size; ///< Size of the backing memory in bytes | ||||||
|     const size_t virtual_size; ///< Size of the virtual address placeholder in bytes |     const size_t virtual_size; ///< Size of the virtual address placeholder in bytes | ||||||
|  |  | ||||||
| @@ -340,11 +345,6 @@ private: | |||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void EnableDirectMappedAddress() { |  | ||||||
|         // TODO |  | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     HANDLE process{};        ///< Current process handle |     HANDLE process{};        ///< Current process handle | ||||||
|     HANDLE backing_handle{}; ///< File based backing memory |     HANDLE backing_handle{}; ///< File based backing memory | ||||||
|  |  | ||||||
|   | |||||||
| @@ -158,8 +158,8 @@ bool IsFastmemEnabled() { | |||||||
|  |  | ||||||
| static bool is_nce_enabled = false; | static bool is_nce_enabled = false; | ||||||
|  |  | ||||||
| void SetNceEnabled(bool is_64bit) { | void SetNceEnabled(bool is_39bit) { | ||||||
|     is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; |     is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_39bit; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool IsNceEnabled() { | bool IsNceEnabled() { | ||||||
|   | |||||||
| @@ -181,7 +181,7 @@ struct Values { | |||||||
|  |  | ||||||
|     // Cpu |     // Cpu | ||||||
|     SwitchableSetting<CpuBackend, true> cpu_backend{ |     SwitchableSetting<CpuBackend, true> cpu_backend{ | ||||||
|         linkage,         CpuBackend::Dynarmic, CpuBackend::Dynarmic, |         linkage,         CpuBackend::Nce, CpuBackend::Dynarmic, | ||||||
| #ifdef ARCHITECTURE_arm64 | #ifdef ARCHITECTURE_arm64 | ||||||
|         CpuBackend::Nce, |         CpuBackend::Nce, | ||||||
| #else | #else | ||||||
|   | |||||||
| @@ -926,6 +926,22 @@ if (ENABLE_WEB_SERVICE) | |||||||
|     target_link_libraries(core PRIVATE web_service) |     target_link_libraries(core PRIVATE web_service) | ||||||
| endif() | endif() | ||||||
|  |  | ||||||
|  | if (ARCHITECTURE_arm64) | ||||||
|  |     enable_language(C ASM) | ||||||
|  |     set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") | ||||||
|  |  | ||||||
|  |     target_sources(core PRIVATE | ||||||
|  |         arm/nce/arm_nce.cpp | ||||||
|  |         arm/nce/arm_nce.h | ||||||
|  |         arm/nce/arm_nce.s | ||||||
|  |         arm/nce/guest_context.h | ||||||
|  |         arm/nce/patch.cpp | ||||||
|  |         arm/nce/patch.h | ||||||
|  |         arm/nce/instructions.h | ||||||
|  |     ) | ||||||
|  |     target_link_libraries(core PRIVATE merry::oaknut) | ||||||
|  | endif() | ||||||
|  |  | ||||||
| if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) | if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) | ||||||
|     target_sources(core PRIVATE |     target_sources(core PRIVATE | ||||||
|         arm/dynarmic/arm_dynarmic.h |         arm/dynarmic/arm_dynarmic.h | ||||||
|   | |||||||
| @@ -81,6 +81,9 @@ public: | |||||||
|     // thread context to be 800 bytes in size. |     // thread context to be 800 bytes in size. | ||||||
|     static_assert(sizeof(ThreadContext64) == 0x320); |     static_assert(sizeof(ThreadContext64) == 0x320); | ||||||
|  |  | ||||||
|  |     /// Perform any backend-specific initialization. | ||||||
|  |     virtual void Initialize() {} | ||||||
|  |  | ||||||
|     /// Runs the CPU until an event happens |     /// Runs the CPU until an event happens | ||||||
|     void Run(); |     void Run(); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										395
									
								
								src/core/arm/nce/arm_nce.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										395
									
								
								src/core/arm/nce/arm_nce.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,395 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #include <cinttypes> | ||||||
|  | #include <memory> | ||||||
|  |  | ||||||
|  | #include "common/scope_exit.h" | ||||||
|  | #include "common/signal_chain.h" | ||||||
|  | #include "core/arm/nce/arm_nce.h" | ||||||
|  | #include "core/arm/nce/patch.h" | ||||||
|  | #include "core/core.h" | ||||||
|  | #include "core/memory.h" | ||||||
|  |  | ||||||
|  | #include "core/hle/kernel/k_process.h" | ||||||
|  |  | ||||||
|  | #include <signal.h> | ||||||
|  | #include <sys/syscall.h> | ||||||
|  | #include <unistd.h> | ||||||
|  |  | ||||||
|  | namespace Core { | ||||||
|  |  | ||||||
|  | namespace { | ||||||
|  |  | ||||||
|  | struct sigaction g_orig_action; | ||||||
|  |  | ||||||
|  | // Verify assembly offsets. | ||||||
|  | using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; | ||||||
|  | static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext); | ||||||
|  | static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock); | ||||||
|  | static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic); | ||||||
|  |  | ||||||
|  | fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { | ||||||
|  |     _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); | ||||||
|  |     while (header->magic != FPSIMD_MAGIC) { | ||||||
|  |         header = reinterpret_cast<_aarch64_ctx*>((char*)header + header->size); | ||||||
|  |     } | ||||||
|  |     return reinterpret_cast<fpsimd_context*>(header); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace | ||||||
|  |  | ||||||
|  | void* ARM_NCE::RestoreGuestContext(void* raw_context) { | ||||||
|  |     // Retrieve the host context. | ||||||
|  |     auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext; | ||||||
|  |  | ||||||
|  |     // Thread-local parameters will be located in x9. | ||||||
|  |     auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]); | ||||||
|  |     auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context); | ||||||
|  |  | ||||||
|  |     // Retrieve the host floating point state. | ||||||
|  |     auto* fpctx = GetFloatingPointState(host_ctx); | ||||||
|  |  | ||||||
|  |     // Save host callee-saved registers. | ||||||
|  |     std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8], | ||||||
|  |                 sizeof(guest_ctx->host_ctx.host_saved_vregs)); | ||||||
|  |     std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19], | ||||||
|  |                 sizeof(guest_ctx->host_ctx.host_saved_regs)); | ||||||
|  |  | ||||||
|  |     // Save stack pointer. | ||||||
|  |     guest_ctx->host_ctx.host_sp = host_ctx.sp; | ||||||
|  |  | ||||||
|  |     // Restore all guest state except tpidr_el0. | ||||||
|  |     host_ctx.sp = guest_ctx->sp; | ||||||
|  |     host_ctx.pc = guest_ctx->pc; | ||||||
|  |     host_ctx.pstate = guest_ctx->pstate; | ||||||
|  |     fpctx->fpcr = guest_ctx->fpcr; | ||||||
|  |     fpctx->fpsr = guest_ctx->fpsr; | ||||||
|  |     std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs)); | ||||||
|  |     std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs)); | ||||||
|  |  | ||||||
|  |     // Return the new thread-local storage pointer. | ||||||
|  |     return tpidr; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) { | ||||||
|  |     // Retrieve the host context. | ||||||
|  |     auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext; | ||||||
|  |  | ||||||
|  |     // Retrieve the host floating point state. | ||||||
|  |     auto* fpctx = GetFloatingPointState(host_ctx); | ||||||
|  |  | ||||||
|  |     // Save all guest registers except tpidr_el0. | ||||||
|  |     std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs)); | ||||||
|  |     std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs)); | ||||||
|  |     guest_ctx->fpsr = fpctx->fpsr; | ||||||
|  |     guest_ctx->fpcr = fpctx->fpcr; | ||||||
|  |     guest_ctx->pstate = static_cast<u32>(host_ctx.pstate); | ||||||
|  |     guest_ctx->pc = host_ctx.pc; | ||||||
|  |     guest_ctx->sp = host_ctx.sp; | ||||||
|  |  | ||||||
|  |     // Restore stack pointer. | ||||||
|  |     host_ctx.sp = guest_ctx->host_ctx.host_sp; | ||||||
|  |  | ||||||
|  |     // Restore host callee-saved registers. | ||||||
|  |     std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(), | ||||||
|  |                 sizeof(guest_ctx->host_ctx.host_saved_regs)); | ||||||
|  |     std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(), | ||||||
|  |                 sizeof(guest_ctx->host_ctx.host_saved_vregs)); | ||||||
|  |  | ||||||
|  |     // Return from the call on exit by setting pc to x30. | ||||||
|  |     host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11]; | ||||||
|  |  | ||||||
|  |     // Clear esr_el1 and return it. | ||||||
|  |     host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { | ||||||
|  |     auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext; | ||||||
|  |     auto* info = static_cast<siginfo_t*>(raw_info); | ||||||
|  |  | ||||||
|  |     // Try to handle an invalid access. | ||||||
|  |     // TODO: handle accesses which split a page? | ||||||
|  |     const Common::ProcessAddress addr = | ||||||
|  |         (reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK); | ||||||
|  |     if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) { | ||||||
|  |         // We handled the access successfully and are returning to guest code. | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // We can't handle the access, so trigger an exception. | ||||||
|  |     const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr); | ||||||
|  |     guest_ctx->esr_el1.fetch_or( | ||||||
|  |         static_cast<u64>(is_prefetch_abort ? HaltReason::PrefetchAbort : HaltReason::DataAbort)); | ||||||
|  |  | ||||||
|  |     // Forcibly mark the context as locked. We are still running. | ||||||
|  |     // We may race with SignalInterrupt here: | ||||||
|  |     // - If we lose the race, then SignalInterrupt will send us a signal which are masking, | ||||||
|  |     //   and it will do nothing when it is unmasked, as we have already left guest code. | ||||||
|  |     // - If we win the race, then SignalInterrupt will wait for us to unlock first. | ||||||
|  |     auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters(); | ||||||
|  |     thread_params.lock.store(SpinLockLocked); | ||||||
|  |  | ||||||
|  |     // Return to host. | ||||||
|  |     SaveGuestContext(guest_ctx, raw_context); | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) { | ||||||
|  |     return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | HaltReason ARM_NCE::RunJit() { | ||||||
|  |     // Get the thread parameters. | ||||||
|  |     // TODO: pass the current thread down from ::Run | ||||||
|  |     auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel()); | ||||||
|  |     auto* thread_params = &thread->GetNativeExecutionParameters(); | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         // Lock our core context. | ||||||
|  |         std::scoped_lock lk{lock}; | ||||||
|  |  | ||||||
|  |         // We should not be running. | ||||||
|  |         ASSERT(running_thread == nullptr); | ||||||
|  |  | ||||||
|  |         // Check if we need to run. If we have already been halted, we are done. | ||||||
|  |         u64 halt = guest_ctx.esr_el1.exchange(0); | ||||||
|  |         if (halt != 0) { | ||||||
|  |             return static_cast<HaltReason>(halt); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Mark that we are running. | ||||||
|  |         running_thread = thread; | ||||||
|  |  | ||||||
|  |         // Acquire the lock on the thread parameters. | ||||||
|  |         // This allows us to force synchronization with SignalInterrupt. | ||||||
|  |         LockThreadParameters(thread_params); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Assign current members. | ||||||
|  |     guest_ctx.parent = this; | ||||||
|  |     thread_params->native_context = &guest_ctx; | ||||||
|  |     thread_params->tpidr_el0 = guest_ctx.tpidr_el0; | ||||||
|  |     thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0; | ||||||
|  |     thread_params->is_running = true; | ||||||
|  |  | ||||||
|  |     HaltReason halt{}; | ||||||
|  |  | ||||||
|  |     // TODO: finding and creating the post handler needs to be locked | ||||||
|  |     // to deal with dynamic loading of NROs. | ||||||
|  |     const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers(); | ||||||
|  |     if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) { | ||||||
|  |         halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second); | ||||||
|  |     } else { | ||||||
|  |         halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Unload members. | ||||||
|  |     // The thread does not change, so we can persist the old reference. | ||||||
|  |     guest_ctx.tpidr_el0 = thread_params->tpidr_el0; | ||||||
|  |     thread_params->native_context = nullptr; | ||||||
|  |     thread_params->is_running = false; | ||||||
|  |  | ||||||
|  |     // Unlock the thread parameters. | ||||||
|  |     UnlockThreadParameters(thread_params); | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         // Lock the core context. | ||||||
|  |         std::scoped_lock lk{lock}; | ||||||
|  |  | ||||||
|  |         // On exit, we no longer have an active thread. | ||||||
|  |         running_thread = nullptr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Return the halt reason. | ||||||
|  |     return halt; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | HaltReason ARM_NCE::StepJit() { | ||||||
|  |     return HaltReason::StepThread; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u32 ARM_NCE::GetSvcNumber() const { | ||||||
|  |     return guest_ctx.svc_swi; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_) | ||||||
|  |     : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} { | ||||||
|  |     guest_ctx.system = &system_; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ARM_NCE::~ARM_NCE() = default; | ||||||
|  |  | ||||||
|  | void ARM_NCE::Initialize() { | ||||||
|  |     thread_id = gettid(); | ||||||
|  |  | ||||||
|  |     // Setup our signals | ||||||
|  |     static std::once_flag flag; | ||||||
|  |     std::call_once(flag, [] { | ||||||
|  |         using HandlerType = decltype(sigaction::sa_sigaction); | ||||||
|  |  | ||||||
|  |         sigset_t signal_mask; | ||||||
|  |         sigemptyset(&signal_mask); | ||||||
|  |         sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal); | ||||||
|  |         sigaddset(&signal_mask, BreakFromRunCodeSignal); | ||||||
|  |         sigaddset(&signal_mask, GuestFaultSignal); | ||||||
|  |  | ||||||
|  |         struct sigaction return_to_run_code_action {}; | ||||||
|  |         return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK; | ||||||
|  |         return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>( | ||||||
|  |             &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler); | ||||||
|  |         return_to_run_code_action.sa_mask = signal_mask; | ||||||
|  |         Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action, | ||||||
|  |                           nullptr); | ||||||
|  |  | ||||||
|  |         struct sigaction break_from_run_code_action {}; | ||||||
|  |         break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK; | ||||||
|  |         break_from_run_code_action.sa_sigaction = | ||||||
|  |             reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler); | ||||||
|  |         break_from_run_code_action.sa_mask = signal_mask; | ||||||
|  |         Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr); | ||||||
|  |  | ||||||
|  |         struct sigaction fault_action {}; | ||||||
|  |         fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; | ||||||
|  |         fault_action.sa_sigaction = | ||||||
|  |             reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler); | ||||||
|  |         fault_action.sa_mask = signal_mask; | ||||||
|  |         Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action); | ||||||
|  |  | ||||||
|  |         // Simplify call for g_orig_action. | ||||||
|  |         // These fields occupy the same space in memory, so this should be a no-op in practice. | ||||||
|  |         if (!(g_orig_action.sa_flags & SA_SIGINFO)) { | ||||||
|  |             g_orig_action.sa_sigaction = | ||||||
|  |                 reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler); | ||||||
|  |         } | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetPC(u64 pc) { | ||||||
|  |     guest_ctx.pc = pc; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 ARM_NCE::GetPC() const { | ||||||
|  |     return guest_ctx.pc; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 ARM_NCE::GetSP() const { | ||||||
|  |     return guest_ctx.sp; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 ARM_NCE::GetReg(int index) const { | ||||||
|  |     return guest_ctx.cpu_registers[index]; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetReg(int index, u64 value) { | ||||||
|  |     guest_ctx.cpu_registers[index] = value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u128 ARM_NCE::GetVectorReg(int index) const { | ||||||
|  |     return guest_ctx.vector_registers[index]; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetVectorReg(int index, u128 value) { | ||||||
|  |     guest_ctx.vector_registers[index] = value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u32 ARM_NCE::GetPSTATE() const { | ||||||
|  |     return guest_ctx.pstate; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetPSTATE(u32 pstate) { | ||||||
|  |     guest_ctx.pstate = pstate; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 ARM_NCE::GetTlsAddress() const { | ||||||
|  |     return guest_ctx.tpidrro_el0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetTlsAddress(u64 address) { | ||||||
|  |     guest_ctx.tpidrro_el0 = address; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 ARM_NCE::GetTPIDR_EL0() const { | ||||||
|  |     return guest_ctx.tpidr_el0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SetTPIDR_EL0(u64 value) { | ||||||
|  |     guest_ctx.tpidr_el0 = value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SaveContext(ThreadContext64& ctx) const { | ||||||
|  |     ctx.cpu_registers = guest_ctx.cpu_registers; | ||||||
|  |     ctx.sp = guest_ctx.sp; | ||||||
|  |     ctx.pc = guest_ctx.pc; | ||||||
|  |     ctx.pstate = guest_ctx.pstate; | ||||||
|  |     ctx.vector_registers = guest_ctx.vector_registers; | ||||||
|  |     ctx.fpcr = guest_ctx.fpcr; | ||||||
|  |     ctx.fpsr = guest_ctx.fpsr; | ||||||
|  |     ctx.tpidr = guest_ctx.tpidr_el0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::LoadContext(const ThreadContext64& ctx) { | ||||||
|  |     guest_ctx.cpu_registers = ctx.cpu_registers; | ||||||
|  |     guest_ctx.sp = ctx.sp; | ||||||
|  |     guest_ctx.pc = ctx.pc; | ||||||
|  |     guest_ctx.pstate = ctx.pstate; | ||||||
|  |     guest_ctx.vector_registers = ctx.vector_registers; | ||||||
|  |     guest_ctx.fpcr = ctx.fpcr; | ||||||
|  |     guest_ctx.fpsr = ctx.fpsr; | ||||||
|  |     guest_ctx.tpidr_el0 = ctx.tpidr; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::SignalInterrupt() { | ||||||
|  |     // Lock core context. | ||||||
|  |     std::scoped_lock lk{lock}; | ||||||
|  |  | ||||||
|  |     // Add break loop condition. | ||||||
|  |     guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop)); | ||||||
|  |  | ||||||
|  |     // If there is no thread running, we are done. | ||||||
|  |     if (running_thread == nullptr) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Lock the thread context. | ||||||
|  |     auto* params = &running_thread->GetNativeExecutionParameters(); | ||||||
|  |     LockThreadParameters(params); | ||||||
|  |  | ||||||
|  |     if (params->is_running) { | ||||||
|  |         // We should signal to the running thread. | ||||||
|  |         // The running thread will unlock the thread context. | ||||||
|  |         syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal); | ||||||
|  |     } else { | ||||||
|  |         // If the thread is no longer running, we have nothing to do. | ||||||
|  |         UnlockThreadParameters(params); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::ClearInterrupt() { | ||||||
|  |     guest_ctx.esr_el1 = {}; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::ClearInstructionCache() { | ||||||
|  |     // TODO: This is not possible to implement correctly on Linux because | ||||||
|  |     // we do not have any access to ic iallu. | ||||||
|  |  | ||||||
|  |     // Require accesses to complete. | ||||||
|  |     std::atomic_thread_fence(std::memory_order_seq_cst); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) { | ||||||
|  |     // Clean cache. | ||||||
|  |     auto* ptr = reinterpret_cast<char*>(addr); | ||||||
|  |     __builtin___clear_cache(ptr, ptr + size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::ClearExclusiveState() { | ||||||
|  |     // No-op. | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ARM_NCE::PageTableChanged(Common::PageTable& page_table, | ||||||
|  |                                std::size_t new_address_space_size_in_bits) { | ||||||
|  |     // No-op. Page table is never used. | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace Core | ||||||
							
								
								
									
										108
									
								
								src/core/arm/nce/arm_nce.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								src/core/arm/nce/arm_nce.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <atomic> | ||||||
|  | #include <memory> | ||||||
|  | #include <span> | ||||||
|  | #include <unordered_map> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | #include "core/arm/arm_interface.h" | ||||||
|  | #include "core/arm/nce/guest_context.h" | ||||||
|  |  | ||||||
|  | namespace Core::Memory { | ||||||
|  | class Memory; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | namespace Core { | ||||||
|  |  | ||||||
|  | class System; | ||||||
|  |  | ||||||
|  | class ARM_NCE final : public ARM_Interface { | ||||||
|  | public: | ||||||
|  |     ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_); | ||||||
|  |  | ||||||
|  |     ~ARM_NCE() override; | ||||||
|  |  | ||||||
|  |     void Initialize() override; | ||||||
|  |     void SetPC(u64 pc) override; | ||||||
|  |     u64 GetPC() const override; | ||||||
|  |     u64 GetSP() const override; | ||||||
|  |     u64 GetReg(int index) const override; | ||||||
|  |     void SetReg(int index, u64 value) override; | ||||||
|  |     u128 GetVectorReg(int index) const override; | ||||||
|  |     void SetVectorReg(int index, u128 value) override; | ||||||
|  |  | ||||||
|  |     u32 GetPSTATE() const override; | ||||||
|  |     void SetPSTATE(u32 pstate) override; | ||||||
|  |     u64 GetTlsAddress() const override; | ||||||
|  |     void SetTlsAddress(u64 address) override; | ||||||
|  |     void SetTPIDR_EL0(u64 value) override; | ||||||
|  |     u64 GetTPIDR_EL0() const override; | ||||||
|  |  | ||||||
|  |     Architecture GetArchitecture() const override { | ||||||
|  |         return Architecture::Aarch64; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void SaveContext(ThreadContext32& ctx) const override {} | ||||||
|  |     void SaveContext(ThreadContext64& ctx) const override; | ||||||
|  |     void LoadContext(const ThreadContext32& ctx) override {} | ||||||
|  |     void LoadContext(const ThreadContext64& ctx) override; | ||||||
|  |  | ||||||
|  |     void SignalInterrupt() override; | ||||||
|  |     void ClearInterrupt() override; | ||||||
|  |     void ClearExclusiveState() override; | ||||||
|  |     void ClearInstructionCache() override; | ||||||
|  |     void InvalidateCacheRange(u64 addr, std::size_t size) override; | ||||||
|  |     void PageTableChanged(Common::PageTable& new_page_table, | ||||||
|  |                           std::size_t new_address_space_size_in_bits) override; | ||||||
|  |  | ||||||
|  | protected: | ||||||
|  |     HaltReason RunJit() override; | ||||||
|  |     HaltReason StepJit() override; | ||||||
|  |  | ||||||
|  |     u32 GetSvcNumber() const override; | ||||||
|  |  | ||||||
|  |     const Kernel::DebugWatchpoint* HaltedWatchpoint() const override { | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void RewindBreakpointInstruction() override {} | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     // Assembly definitions. | ||||||
|  |     static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx, | ||||||
|  |                                                   u64 trampoline_addr); | ||||||
|  |     static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr); | ||||||
|  |  | ||||||
|  |     static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, | ||||||
|  |                                                                    void* raw_context); | ||||||
|  |     static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context); | ||||||
|  |     static void GuestFaultSignalHandler(int sig, void* info, void* raw_context); | ||||||
|  |  | ||||||
|  |     static void LockThreadParameters(void* tpidr); | ||||||
|  |     static void UnlockThreadParameters(void* tpidr); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     // C++ implementation functions for assembly definitions. | ||||||
|  |     static void* RestoreGuestContext(void* raw_context); | ||||||
|  |     static void SaveGuestContext(GuestContext* ctx, void* raw_context); | ||||||
|  |     static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context); | ||||||
|  |     static void HandleHostFault(int sig, void* info, void* raw_context); | ||||||
|  |  | ||||||
|  | public: | ||||||
|  |     // Members set on initialization. | ||||||
|  |     std::size_t core_index{}; | ||||||
|  |     pid_t thread_id{-1}; | ||||||
|  |  | ||||||
|  |     // Core context. | ||||||
|  |     GuestContext guest_ctx; | ||||||
|  |  | ||||||
|  |     // Thread and invalidation info. | ||||||
|  |     std::mutex lock; | ||||||
|  |     Kernel::KThread* running_thread{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace Core | ||||||
							
								
								
									
										222
									
								
								src/core/arm/nce/arm_nce.s
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										222
									
								
								src/core/arm/nce/arm_nce.s
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,222 @@ | |||||||
|  | /* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */ | ||||||
|  | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||||||
|  |  | ||||||
|  | #include "core/arm/nce/arm_nce_asm_definitions.h" | ||||||
|  |  | ||||||
|  | #define LOAD_IMMEDIATE_32(reg, val)                     \ | ||||||
|  |     mov     reg, #(((val) >> 0x00) & 0xFFFF);           \ | ||||||
|  |     movk    reg, #(((val) >> 0x10) & 0xFFFF), lsl #16 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm | ||||||
|  | .type       _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function | ||||||
|  | _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm: | ||||||
|  |     /* Back up host sp to x3. */ | ||||||
|  |     /* Back up host tpidr_el0 to x4. */ | ||||||
|  |     mov     x3, sp | ||||||
|  |     mrs     x4, tpidr_el0 | ||||||
|  |  | ||||||
|  |     /* Load guest sp. x5 is used as a scratch register. */ | ||||||
|  |     ldr     x5, [x1, #(GuestContextSp)] | ||||||
|  |     mov     sp, x5 | ||||||
|  |  | ||||||
|  |     /* Offset GuestContext pointer to the host member. */ | ||||||
|  |     add     x5, x1, #(GuestContextHostContext) | ||||||
|  |  | ||||||
|  |     /* Save original host sp and tpidr_el0 (x3, x4) to host context. */ | ||||||
|  |     stp     x3, x4, [x5, #(HostContextSpTpidrEl0)] | ||||||
|  |  | ||||||
|  |     /* Save all callee-saved host GPRs. */ | ||||||
|  |     stp     x19, x20, [x5, #(HostContextRegs+0x0)] | ||||||
|  |     stp     x21, x22, [x5, #(HostContextRegs+0x10)] | ||||||
|  |     stp     x23, x24, [x5, #(HostContextRegs+0x20)] | ||||||
|  |     stp     x25, x26, [x5, #(HostContextRegs+0x30)] | ||||||
|  |     stp     x27, x28, [x5, #(HostContextRegs+0x40)] | ||||||
|  |     stp     x29, x30, [x5, #(HostContextRegs+0x50)] | ||||||
|  |  | ||||||
|  |     /* Save all callee-saved host FPRs. */ | ||||||
|  |     stp     q8, q9,   [x5, #(HostContextVregs+0x0)] | ||||||
|  |     stp     q10, q11, [x5, #(HostContextVregs+0x20)] | ||||||
|  |     stp     q12, q13, [x5, #(HostContextVregs+0x40)] | ||||||
|  |     stp     q14, q15, [x5, #(HostContextVregs+0x60)] | ||||||
|  |  | ||||||
|  |     /* Load guest tpidr_el0 from argument. */ | ||||||
|  |     msr     tpidr_el0, x0 | ||||||
|  |  | ||||||
|  |     /* Tail call the trampoline to restore guest state. */ | ||||||
|  |     br      x2 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv | ||||||
|  | .type       _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function | ||||||
|  | _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv: | ||||||
|  |     /* This jumps to the signal handler, which will restore the entire context. */ | ||||||
|  |     /* On entry, x0 = thread id, which is already in the right place. */ | ||||||
|  |  | ||||||
|  |     /* Move tpidr to x9 so it is not trampled. */ | ||||||
|  |     mov     x9, x1 | ||||||
|  |  | ||||||
|  |     /* Set up arguments. */ | ||||||
|  |     mov     x8, #(__NR_tkill) | ||||||
|  |     mov     x1, #(ReturnToRunCodeByExceptionLevelChangeSignal) | ||||||
|  |  | ||||||
|  |     /* Tail call the signal handler. */ | ||||||
|  |     svc     #0 | ||||||
|  |  | ||||||
|  |     /* Block execution from flowing here. */ | ||||||
|  |     brk     #1000 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_ | ||||||
|  | .type       _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function | ||||||
|  | _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: | ||||||
|  |     stp     x29, x30, [sp, #-0x10]! | ||||||
|  |     mov     x29, sp | ||||||
|  |  | ||||||
|  |     /* Call the context restorer with the raw context. */ | ||||||
|  |     mov     x0, x2 | ||||||
|  |     bl      _ZN4Core7ARM_NCE19RestoreGuestContextEPv | ||||||
|  |  | ||||||
|  |     /* Save the old value of tpidr_el0. */ | ||||||
|  |     mrs     x8, tpidr_el0 | ||||||
|  |     ldr     x9, [x0, #(TpidrEl0NativeContext)] | ||||||
|  |     str     x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)] | ||||||
|  |  | ||||||
|  |     /* Set our new tpidr_el0. */ | ||||||
|  |     msr     tpidr_el0, x0 | ||||||
|  |  | ||||||
|  |     /* Unlock the context. */ | ||||||
|  |     bl      _ZN4Core7ARM_NCE22UnlockThreadParametersEPv | ||||||
|  |  | ||||||
|  |     /* Returning from here will enter the guest. */ | ||||||
|  |     ldp     x29, x30, [sp], #0x10 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_ | ||||||
|  | .type       _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function | ||||||
|  | _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_: | ||||||
|  |     /* Check to see if we have the correct TLS magic. */ | ||||||
|  |     mrs     x8, tpidr_el0 | ||||||
|  |     ldr     w9, [x8, #(TpidrEl0TlsMagic)] | ||||||
|  |  | ||||||
|  |     LOAD_IMMEDIATE_32(w10, TlsMagic) | ||||||
|  |  | ||||||
|  |     cmp     w9, w10 | ||||||
|  |     b.ne    1f | ||||||
|  |  | ||||||
|  |     /* Correct TLS magic, so this is a guest interrupt. */ | ||||||
|  |     /* Restore host tpidr_el0. */ | ||||||
|  |     ldr     x0, [x8, #(TpidrEl0NativeContext)] | ||||||
|  |     ldr     x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)] | ||||||
|  |     msr     tpidr_el0, x3 | ||||||
|  |  | ||||||
|  |     /* Tail call the restorer. */ | ||||||
|  |     mov     x1, x2 | ||||||
|  |     b       _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv | ||||||
|  |  | ||||||
|  |     /* Returning from here will enter host code. */ | ||||||
|  |  | ||||||
|  | 1: | ||||||
|  |     /* Incorrect TLS magic, so this is a spurious signal. */ | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_ | ||||||
|  | .type       _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function | ||||||
|  | _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_: | ||||||
|  |     /* Check to see if we have the correct TLS magic. */ | ||||||
|  |     mrs     x8, tpidr_el0 | ||||||
|  |     ldr     w9, [x8, #(TpidrEl0TlsMagic)] | ||||||
|  |  | ||||||
|  |     LOAD_IMMEDIATE_32(w10, TlsMagic) | ||||||
|  |  | ||||||
|  |     cmp     w9, w10 | ||||||
|  |     b.eq    1f | ||||||
|  |  | ||||||
|  |     /* Incorrect TLS magic, so this is a host fault. */ | ||||||
|  |     /* Tail call the handler. */ | ||||||
|  |     b       _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_ | ||||||
|  |  | ||||||
|  | 1: | ||||||
|  |     /* Correct TLS magic, so this is a guest fault. */ | ||||||
|  |     stp     x29, x30, [sp, #-0x20]! | ||||||
|  |     str     x19, [sp, #0x10] | ||||||
|  |     mov     x29, sp | ||||||
|  |  | ||||||
|  |     /* Save the old tpidr_el0. */ | ||||||
|  |     mov     x19, x8 | ||||||
|  |  | ||||||
|  |     /* Restore host tpidr_el0. */ | ||||||
|  |     ldr     x0, [x8, #(TpidrEl0NativeContext)] | ||||||
|  |     ldr     x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)] | ||||||
|  |     msr     tpidr_el0, x3 | ||||||
|  |  | ||||||
|  |     /* Call the handler. */ | ||||||
|  |     bl       _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_ | ||||||
|  |  | ||||||
|  |     /* If the handler returned false, we want to preserve the host tpidr_el0. */ | ||||||
|  |     cbz     x0, 2f | ||||||
|  |  | ||||||
|  |     /* Otherwise, restore guest tpidr_el0. */ | ||||||
|  |     msr     tpidr_el0, x19 | ||||||
|  |  | ||||||
|  | 2: | ||||||
|  |     ldr     x19, [sp, #0x10] | ||||||
|  |     ldp     x29, x30, [sp], #0x20 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE20LockThreadParametersEPv | ||||||
|  | .type       _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function | ||||||
|  | _ZN4Core7ARM_NCE20LockThreadParametersEPv: | ||||||
|  |     /* Offset to lock member. */ | ||||||
|  |     add     x0, x0, #(TpidrEl0Lock) | ||||||
|  |  | ||||||
|  | 1: | ||||||
|  |     /* Clear the monitor. */ | ||||||
|  |     clrex | ||||||
|  |  | ||||||
|  | 2: | ||||||
|  |     /* Load-linked with acquire ordering. */ | ||||||
|  |     ldaxr   w1, [x0] | ||||||
|  |  | ||||||
|  |     /* If the value was SpinLockLocked, clear monitor and retry. */ | ||||||
|  |     cbz     w1, 1b | ||||||
|  |  | ||||||
|  |     /* Store-conditional SpinLockLocked with relaxed ordering. */ | ||||||
|  |     stxr    w1, wzr, [x0] | ||||||
|  |  | ||||||
|  |     /* If we failed to store, retry. */ | ||||||
|  |     cbnz    w1, 2b | ||||||
|  |  | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */ | ||||||
|  | .section    .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits | ||||||
|  | .global     _ZN4Core7ARM_NCE22UnlockThreadParametersEPv | ||||||
|  | .type       _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function | ||||||
|  | _ZN4Core7ARM_NCE22UnlockThreadParametersEPv: | ||||||
|  |     /* Offset to lock member. */ | ||||||
|  |     add     x0, x0, #(TpidrEl0Lock) | ||||||
|  |  | ||||||
|  |     /* Load SpinLockUnlocked. */ | ||||||
|  |     mov     w1, #(SpinLockUnlocked) | ||||||
|  |  | ||||||
|  |     /* Store value with release ordering. */ | ||||||
|  |     stlr    w1, [x0] | ||||||
|  |  | ||||||
|  |     ret | ||||||
							
								
								
									
										29
									
								
								src/core/arm/nce/arm_nce_asm_definitions.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/core/arm/nce/arm_nce_asm_definitions.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | /* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */ | ||||||
|  | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #define __ASSEMBLY__ | ||||||
|  |  | ||||||
|  | #include <asm-generic/signal.h> | ||||||
|  | #include <asm-generic/unistd.h> | ||||||
|  |  | ||||||
|  | #define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2 | ||||||
|  | #define BreakFromRunCodeSignal SIGURG | ||||||
|  | #define GuestFaultSignal SIGSEGV | ||||||
|  |  | ||||||
|  | #define GuestContextSp 0xF8 | ||||||
|  | #define GuestContextHostContext 0x320 | ||||||
|  |  | ||||||
|  | #define HostContextSpTpidrEl0 0xE0 | ||||||
|  | #define HostContextTpidrEl0 0xE8 | ||||||
|  | #define HostContextRegs 0x0 | ||||||
|  | #define HostContextVregs 0x60 | ||||||
|  |  | ||||||
|  | #define TpidrEl0NativeContext 0x10 | ||||||
|  | #define TpidrEl0Lock 0x18 | ||||||
|  | #define TpidrEl0TlsMagic 0x20 | ||||||
|  | #define TlsMagic 0x555a5559 | ||||||
|  |  | ||||||
|  | #define SpinLockLocked 0 | ||||||
|  | #define SpinLockUnlocked 1 | ||||||
							
								
								
									
										50
									
								
								src/core/arm/nce/guest_context.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								src/core/arm/nce/guest_context.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include "common/common_funcs.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "core/arm/arm_interface.h" | ||||||
|  | #include "core/arm/nce/arm_nce_asm_definitions.h" | ||||||
|  |  | ||||||
|  | namespace Core { | ||||||
|  |  | ||||||
|  | class ARM_NCE; | ||||||
|  | class System; | ||||||
|  |  | ||||||
|  | struct HostContext { | ||||||
|  |     alignas(16) std::array<u64, 12> host_saved_regs{}; | ||||||
|  |     alignas(16) std::array<u128, 8> host_saved_vregs{}; | ||||||
|  |     u64 host_sp{}; | ||||||
|  |     void* host_tpidr_el0{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct GuestContext { | ||||||
|  |     std::array<u64, 31> cpu_registers{}; | ||||||
|  |     u64 sp{}; | ||||||
|  |     u64 pc{}; | ||||||
|  |     u32 fpcr{}; | ||||||
|  |     u32 fpsr{}; | ||||||
|  |     std::array<u128, 32> vector_registers{}; | ||||||
|  |     u32 pstate{}; | ||||||
|  |     alignas(16) HostContext host_ctx{}; | ||||||
|  |     u64 tpidrro_el0{}; | ||||||
|  |     u64 tpidr_el0{}; | ||||||
|  |     std::atomic<u64> esr_el1{}; | ||||||
|  |     u32 nzcv{}; | ||||||
|  |     u32 svc_swi{}; | ||||||
|  |     System* system{}; | ||||||
|  |     ARM_NCE* parent{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Verify assembly offsets. | ||||||
|  | static_assert(offsetof(GuestContext, sp) == GuestContextSp); | ||||||
|  | static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext); | ||||||
|  | static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0); | ||||||
|  | static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0); | ||||||
|  | static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0); | ||||||
|  | static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs); | ||||||
|  | static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs); | ||||||
|  |  | ||||||
|  | } // namespace Core | ||||||
							
								
								
									
										147
									
								
								src/core/arm/nce/instructions.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								src/core/arm/nce/instructions.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors | ||||||
|  | // SPDX-License-Identifier: MPL-2.0 | ||||||
|  |  | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  |  | ||||||
|  | namespace Core::NCE { | ||||||
|  |  | ||||||
|  | enum SystemRegister : u32 { | ||||||
|  |     TpidrEl0 = 0x5E82, | ||||||
|  |     TpidrroEl0 = 0x5E83, | ||||||
|  |     CntfrqEl0 = 0x5F00, | ||||||
|  |     CntpctEl0 = 0x5F01, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call- | ||||||
|  | union SVC { | ||||||
|  |     constexpr explicit SVC(u32 raw_) : raw{raw_} {} | ||||||
|  |  | ||||||
|  |     constexpr bool Verify() { | ||||||
|  |         return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSig0() { | ||||||
|  |         return decltype(sig0)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetValue() { | ||||||
|  |         return decltype(value)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSig1() { | ||||||
|  |         return decltype(sig1)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32 raw; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     BitField<0, 5, u32> sig0;   // 0x1 | ||||||
|  |     BitField<5, 16, u32> value; // 16-bit immediate | ||||||
|  |     BitField<21, 11, u32> sig1; // 0x6A0 | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(SVC) == sizeof(u32)); | ||||||
|  | static_assert(SVC(0xD40000C1).Verify()); | ||||||
|  | static_assert(SVC(0xD40000C1).GetValue() == 0x6); | ||||||
|  |  | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register- | ||||||
|  | union MRS { | ||||||
|  |     constexpr explicit MRS(u32 raw_) : raw{raw_} {} | ||||||
|  |  | ||||||
|  |     constexpr bool Verify() { | ||||||
|  |         return (this->GetSig() == 0xD53); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetRt() { | ||||||
|  |         return decltype(rt)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSystemReg() { | ||||||
|  |         return decltype(system_reg)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSig() { | ||||||
|  |         return decltype(sig)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32 raw; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     BitField<0, 5, u32> rt;          // destination register | ||||||
|  |     BitField<5, 15, u32> system_reg; // source system register | ||||||
|  |     BitField<20, 12, u32> sig;       // 0xD53 | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(MRS) == sizeof(u32)); | ||||||
|  | static_assert(MRS(0xD53BE020).Verify()); | ||||||
|  | static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0); | ||||||
|  | static_assert(MRS(0xD53BE020).GetRt() == 0x0); | ||||||
|  |  | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register- | ||||||
|  | union MSR { | ||||||
|  |     constexpr explicit MSR(u32 raw_) : raw{raw_} {} | ||||||
|  |  | ||||||
|  |     constexpr bool Verify() { | ||||||
|  |         return this->GetSig() == 0xD51; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetRt() { | ||||||
|  |         return decltype(rt)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSystemReg() { | ||||||
|  |         return decltype(system_reg)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSig() { | ||||||
|  |         return decltype(sig)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32 raw; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     BitField<0, 5, u32> rt;          // source register | ||||||
|  |     BitField<5, 15, u32> system_reg; // destination system register | ||||||
|  |     BitField<20, 12, u32> sig;       // 0xD51 | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(MSR) == sizeof(u32)); | ||||||
|  | static_assert(MSR(0xD51BD040).Verify()); | ||||||
|  | static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0); | ||||||
|  | static_assert(MSR(0xD51BD040).GetRt() == 0x0); | ||||||
|  |  | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register- | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers- | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register- | ||||||
|  | // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers- | ||||||
|  | union Exclusive { | ||||||
|  |     constexpr explicit Exclusive(u32 raw_) : raw{raw_} {} | ||||||
|  |  | ||||||
|  |     constexpr bool Verify() { | ||||||
|  |         return this->GetSig() == 0x10; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 GetSig() { | ||||||
|  |         return decltype(sig)::ExtractValue(raw); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     constexpr u32 AsOrdered() { | ||||||
|  |         return raw | decltype(o0)::FormatValue(1); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32 raw; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     BitField<0, 5, u32> rt;    // memory operand | ||||||
|  |     BitField<5, 5, u32> rn;    // register operand 1 | ||||||
|  |     BitField<10, 5, u32> rt2;  // register operand 2 | ||||||
|  |     BitField<15, 1, u32> o0;   // ordered | ||||||
|  |     BitField<16, 5, u32> rs;   // status register | ||||||
|  |     BitField<21, 2, u32> l;    // operation type | ||||||
|  |     BitField<23, 7, u32> sig;  // 0x10 | ||||||
|  |     BitField<30, 2, u32> size; // size | ||||||
|  | }; | ||||||
|  | static_assert(Exclusive(0xC85FFC00).Verify()); | ||||||
|  | static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00); | ||||||
|  | static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00); | ||||||
|  | static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440); | ||||||
|  |  | ||||||
|  | } // namespace Core::NCE | ||||||
							
								
								
									
										472
									
								
								src/core/arm/nce/patch.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										472
									
								
								src/core/arm/nce/patch.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,472 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #include "common/arm64/native_clock.h" | ||||||
|  | #include "common/bit_cast.h" | ||||||
|  | #include "common/literals.h" | ||||||
|  | #include "core/arm/nce/arm_nce.h" | ||||||
|  | #include "core/arm/nce/guest_context.h" | ||||||
|  | #include "core/arm/nce/instructions.h" | ||||||
|  | #include "core/arm/nce/patch.h" | ||||||
|  | #include "core/core.h" | ||||||
|  | #include "core/core_timing.h" | ||||||
|  | #include "core/hle/kernel/svc.h" | ||||||
|  |  | ||||||
|  | namespace Core::NCE { | ||||||
|  |  | ||||||
|  | using namespace Common::Literals; | ||||||
|  | using namespace oaknut::util; | ||||||
|  |  | ||||||
|  | using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; | ||||||
|  |  | ||||||
|  | constexpr size_t MaxRelativeBranch = 128_MiB; | ||||||
|  |  | ||||||
|  | Patcher::Patcher() : c(m_patch_instructions) {} | ||||||
|  |  | ||||||
|  | Patcher::~Patcher() = default; | ||||||
|  |  | ||||||
|  | void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, | ||||||
|  |                         const Kernel::CodeSet::Segment& code) { | ||||||
|  |  | ||||||
|  |     // Write save context helper function. | ||||||
|  |     c.l(m_save_context); | ||||||
|  |     WriteSaveContext(); | ||||||
|  |  | ||||||
|  |     // Write load context helper function. | ||||||
|  |     c.l(m_load_context); | ||||||
|  |     WriteLoadContext(); | ||||||
|  |  | ||||||
|  |     // Retrieve text segment data. | ||||||
|  |     const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||||||
|  |     const auto text_words = | ||||||
|  |         std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)}; | ||||||
|  |  | ||||||
|  |     // Loop through instructions, patching as needed. | ||||||
|  |     for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) { | ||||||
|  |         const u32 inst = text_words[i]; | ||||||
|  |  | ||||||
|  |         const auto AddRelocations = [&] { | ||||||
|  |             const uintptr_t this_offset = i * sizeof(u32); | ||||||
|  |             const uintptr_t next_offset = this_offset + sizeof(u32); | ||||||
|  |  | ||||||
|  |             // Relocate from here to patch. | ||||||
|  |             this->BranchToPatch(this_offset); | ||||||
|  |  | ||||||
|  |             // Relocate from patch to next instruction. | ||||||
|  |             return next_offset; | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // SVC | ||||||
|  |         if (auto svc = SVC{inst}; svc.Verify()) { | ||||||
|  |             WriteSvcTrampoline(AddRelocations(), svc.GetValue()); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // MRS Xn, TPIDR_EL0 | ||||||
|  |         // MRS Xn, TPIDRRO_EL0 | ||||||
|  |         if (auto mrs = MRS{inst}; | ||||||
|  |             mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { | ||||||
|  |             const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 | ||||||
|  |                                                                   : oaknut::SystemReg::TPIDR_EL0; | ||||||
|  |             const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())}; | ||||||
|  |             WriteMrsHandler(AddRelocations(), dest_reg, src_reg); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // MRS Xn, CNTPCT_EL0 | ||||||
|  |         if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { | ||||||
|  |             WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())}); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // MRS Xn, CNTFRQ_EL0 | ||||||
|  |         if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // MSR TPIDR_EL0, Xn | ||||||
|  |         if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { | ||||||
|  |             WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())}); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Determine patching mode for the final relocation step | ||||||
|  |     const size_t image_size = program_image.size(); | ||||||
|  |     this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, | ||||||
|  |                               const Kernel::CodeSet::Segment& code, | ||||||
|  |                               Kernel::PhysicalMemory& program_image, | ||||||
|  |                               EntryTrampolines* out_trampolines) { | ||||||
|  |     const size_t patch_size = SectionSize(); | ||||||
|  |     const size_t image_size = program_image.size(); | ||||||
|  |  | ||||||
|  |     // Retrieve text segment data. | ||||||
|  |     const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||||||
|  |     const auto text_words = | ||||||
|  |         std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)}; | ||||||
|  |  | ||||||
|  |     const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { | ||||||
|  |         oaknut::CodeGenerator rc{target}; | ||||||
|  |         if (mode == PatchMode::PreText) { | ||||||
|  |             rc.B(rel.patch_offset - patch_size - rel.module_offset); | ||||||
|  |         } else { | ||||||
|  |             rc.B(image_size - rel.module_offset + rel.patch_offset); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { | ||||||
|  |         oaknut::CodeGenerator rc{target}; | ||||||
|  |         if (mode == PatchMode::PreText) { | ||||||
|  |             rc.B(patch_size - rel.patch_offset + rel.module_offset); | ||||||
|  |         } else { | ||||||
|  |             rc.B(rel.module_offset - image_size - rel.patch_offset); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     const auto RebasePatch = [&](ptrdiff_t patch_offset) { | ||||||
|  |         if (mode == PatchMode::PreText) { | ||||||
|  |             return GetInteger(load_base) + patch_offset; | ||||||
|  |         } else { | ||||||
|  |             return GetInteger(load_base) + image_size + patch_offset; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     const auto RebasePc = [&](uintptr_t module_offset) { | ||||||
|  |         if (mode == PatchMode::PreText) { | ||||||
|  |             return GetInteger(load_base) + patch_size + module_offset; | ||||||
|  |         } else { | ||||||
|  |             return GetInteger(load_base) + module_offset; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // We are now ready to relocate! | ||||||
|  |     for (const Relocation& rel : m_branch_to_patch_relocations) { | ||||||
|  |         ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); | ||||||
|  |     } | ||||||
|  |     for (const Relocation& rel : m_branch_to_module_relocations) { | ||||||
|  |         ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), | ||||||
|  |                                       rel); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Rewrite PC constants and record post trampolines | ||||||
|  |     for (const Relocation& rel : m_write_module_pc_relocations) { | ||||||
|  |         oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; | ||||||
|  |         rc.dx(RebasePc(rel.module_offset)); | ||||||
|  |     } | ||||||
|  |     for (const Trampoline& rel : m_trampolines) { | ||||||
|  |         out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. | ||||||
|  |     // Convert to ordered to preserve this assumption | ||||||
|  |     for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) { | ||||||
|  |         const u32 inst = text_words[i]; | ||||||
|  |         if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { | ||||||
|  |             text_words[i] = exclusive.AsOrdered(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Copy to program image | ||||||
|  |     if (this->mode == PatchMode::PreText) { | ||||||
|  |         std::memcpy(program_image.data(), m_patch_instructions.data(), | ||||||
|  |                     m_patch_instructions.size() * sizeof(u32)); | ||||||
|  |     } else { | ||||||
|  |         program_image.resize(image_size + patch_size); | ||||||
|  |         std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), | ||||||
|  |                     m_patch_instructions.size() * sizeof(u32)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | size_t Patcher::SectionSize() const noexcept { | ||||||
|  |     return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteLoadContext() { | ||||||
|  |     // This function was called, which modifies X30, so use that as a scratch register. | ||||||
|  |     // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes | ||||||
|  |     // of stack. | ||||||
|  |     c.STR(X30, SP, 8); | ||||||
|  |     c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||||||
|  |  | ||||||
|  |     // Load system registers. | ||||||
|  |     c.LDR(W0, X30, offsetof(GuestContext, fpsr)); | ||||||
|  |     c.MSR(oaknut::SystemReg::FPSR, X0); | ||||||
|  |     c.LDR(W0, X30, offsetof(GuestContext, fpcr)); | ||||||
|  |     c.MSR(oaknut::SystemReg::FPCR, X0); | ||||||
|  |     c.LDR(W0, X30, offsetof(GuestContext, nzcv)); | ||||||
|  |     c.MSR(oaknut::SystemReg::NZCV, X0); | ||||||
|  |  | ||||||
|  |     // Load all vector registers. | ||||||
|  |     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||||||
|  |     for (int i = 0; i <= 30; i += 2) { | ||||||
|  |         c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Load all general-purpose registers except X30. | ||||||
|  |     for (int i = 0; i <= 28; i += 2) { | ||||||
|  |         c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Reload our return X30 from the stack and return. | ||||||
|  |     // The patch code will reload the guest X30 for us. | ||||||
|  |     c.LDR(X30, SP, 8); | ||||||
|  |     c.RET(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteSaveContext() { | ||||||
|  |     // This function was called, which modifies X30, so use that as a scratch register. | ||||||
|  |     // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of | ||||||
|  |     // stack. | ||||||
|  |     c.STR(X30, SP, 8); | ||||||
|  |     c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||||||
|  |  | ||||||
|  |     // Store all general-purpose registers except X30. | ||||||
|  |     for (int i = 0; i <= 28; i += 2) { | ||||||
|  |         c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Store all vector registers. | ||||||
|  |     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||||||
|  |     for (int i = 0; i <= 30; i += 2) { | ||||||
|  |         c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Store guest system registers, X30 and SP, using X0 as a scratch register. | ||||||
|  |     c.STR(X0, SP, PRE_INDEXED, -16); | ||||||
|  |     c.LDR(X0, SP, 16); | ||||||
|  |     c.STR(X0, X30, 8 * 30); | ||||||
|  |     c.ADD(X0, SP, 32); | ||||||
|  |     c.STR(X0, X30, offsetof(GuestContext, sp)); | ||||||
|  |     c.MRS(X0, oaknut::SystemReg::FPSR); | ||||||
|  |     c.STR(W0, X30, offsetof(GuestContext, fpsr)); | ||||||
|  |     c.MRS(X0, oaknut::SystemReg::FPCR); | ||||||
|  |     c.STR(W0, X30, offsetof(GuestContext, fpcr)); | ||||||
|  |     c.MRS(X0, oaknut::SystemReg::NZCV); | ||||||
|  |     c.STR(W0, X30, offsetof(GuestContext, nzcv)); | ||||||
|  |     c.LDR(X0, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Reload our return X30 from the stack, and return. | ||||||
|  |     c.LDR(X30, SP, 8); | ||||||
|  |     c.RET(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { | ||||||
|  |     LOG_ERROR(Core_ARM, "Patching SVC {:#x} at {:#x}", svc_id, module_dest - 4); | ||||||
|  |     // We are about to start saving state, so we need to lock the context. | ||||||
|  |     this->LockContext(); | ||||||
|  |  | ||||||
|  |     // Store guest X30 to the stack. Then, save the context and restore the stack. | ||||||
|  |     // This will save all registers except PC, but we know PC at patch time. | ||||||
|  |     c.STR(X30, SP, PRE_INDEXED, -16); | ||||||
|  |     c.BL(m_save_context); | ||||||
|  |     c.LDR(X30, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Now that we've saved all registers, we can use any registers as scratch. | ||||||
|  |     // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. | ||||||
|  |     oaknut::Label pc_after_svc; | ||||||
|  |     c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||||||
|  |     c.LDR(X2, pc_after_svc); | ||||||
|  |     c.STR(X2, X1, offsetof(GuestContext, pc)); | ||||||
|  |  | ||||||
|  |     // Store SVC number to execute when we return | ||||||
|  |     c.MOV(X2, svc_id); | ||||||
|  |     c.STR(W2, X1, offsetof(GuestContext, svc_swi)); | ||||||
|  |  | ||||||
|  |     // We are calling a SVC. Clear esr_el1 and return it. | ||||||
|  |     static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>); | ||||||
|  |     oaknut::Label retry; | ||||||
|  |     c.ADD(X2, X1, offsetof(GuestContext, esr_el1)); | ||||||
|  |     c.l(retry); | ||||||
|  |     c.LDAXR(X0, X2); | ||||||
|  |     c.STLXR(W3, XZR, X2); | ||||||
|  |     c.CBNZ(W3, retry); | ||||||
|  |  | ||||||
|  |     // Add "calling SVC" flag. Since this is X0, this is now our return value. | ||||||
|  |     c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall)); | ||||||
|  |  | ||||||
|  |     // Offset the GuestContext pointer to the HostContext member. | ||||||
|  |     // STP has limited range of [-512, 504] which we can't reach otherwise | ||||||
|  |     // NB: Due to this all offsets below are from the start of HostContext. | ||||||
|  |     c.ADD(X1, X1, offsetof(GuestContext, host_ctx)); | ||||||
|  |  | ||||||
|  |     // Reload host TPIDR_EL0 and SP. | ||||||
|  |     static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); | ||||||
|  |     c.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); | ||||||
|  |     c.MOV(SP, X2); | ||||||
|  |     c.MSR(oaknut::SystemReg::TPIDR_EL0, X3); | ||||||
|  |  | ||||||
|  |     // Load callee-saved host registers and return to host. | ||||||
|  |     static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); | ||||||
|  |     static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); | ||||||
|  |     c.LDP(X19, X20, X1, HOST_REGS_OFF); | ||||||
|  |     c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); | ||||||
|  |     c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); | ||||||
|  |     c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); | ||||||
|  |     c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); | ||||||
|  |     c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); | ||||||
|  |     c.LDP(Q8, Q9, X1, HOST_VREGS_OFF); | ||||||
|  |     c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); | ||||||
|  |     c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); | ||||||
|  |     c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); | ||||||
|  |     c.RET(); | ||||||
|  |  | ||||||
|  |     // Write the post-SVC trampoline address, which will jump back to the guest after restoring its | ||||||
|  |     // state. | ||||||
|  |     m_trampolines.push_back({c.offset(), module_dest}); | ||||||
|  |  | ||||||
|  |     // Host called this location. Save the return address so we can | ||||||
|  |     // unwind the stack properly when jumping back. | ||||||
|  |     c.MRS(X2, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); | ||||||
|  |     c.ADD(X0, X2, offsetof(GuestContext, host_ctx)); | ||||||
|  |     c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); | ||||||
|  |  | ||||||
|  |     // Reload all guest registers except X30 and PC. | ||||||
|  |     // The function also expects 16 bytes of stack already allocated. | ||||||
|  |     c.STR(X30, SP, PRE_INDEXED, -16); | ||||||
|  |     c.BL(m_load_context); | ||||||
|  |     c.LDR(X30, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Use X1 as a scratch register to restore X30. | ||||||
|  |     c.STR(X1, SP, PRE_INDEXED, -16); | ||||||
|  |     c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||||||
|  |     c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); | ||||||
|  |     c.LDR(X1, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Unlock the context. | ||||||
|  |     this->UnlockContext(); | ||||||
|  |  | ||||||
|  |     // Jump back to the instruction after the emulated SVC. | ||||||
|  |     this->BranchToModule(module_dest); | ||||||
|  |  | ||||||
|  |     // Store PC after call. | ||||||
|  |     c.l(pc_after_svc); | ||||||
|  |     this->WriteModulePc(module_dest); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||||||
|  |                               oaknut::SystemReg src_reg) { | ||||||
|  |     // Retrieve emulated TLS register from GuestContext. | ||||||
|  |     c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { | ||||||
|  |         c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); | ||||||
|  |     } else { | ||||||
|  |         c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Jump back to the instruction after the emulated MRS. | ||||||
|  |     this->BranchToModule(module_dest); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { | ||||||
|  |     const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; | ||||||
|  |     c.STR(scratch_reg, SP, PRE_INDEXED, -16); | ||||||
|  |  | ||||||
|  |     // Save guest value to NativeExecutionParameters::tpidr_el0. | ||||||
|  |     c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||||||
|  |  | ||||||
|  |     // Restore scratch register. | ||||||
|  |     c.LDR(scratch_reg, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Jump back to the instruction after the emulated MSR. | ||||||
|  |     this->BranchToModule(module_dest); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { | ||||||
|  |     static Common::Arm64::NativeClock clock{}; | ||||||
|  |     const auto factor = clock.GetGuestCNTFRQFactor(); | ||||||
|  |     const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor); | ||||||
|  |  | ||||||
|  |     const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; | ||||||
|  |     oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; | ||||||
|  |     oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; | ||||||
|  |  | ||||||
|  |     oaknut::Label factorlo; | ||||||
|  |     oaknut::Label factorhi; | ||||||
|  |  | ||||||
|  |     // Save scratches. | ||||||
|  |     c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); | ||||||
|  |  | ||||||
|  |     // Load counter value. | ||||||
|  |     c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); | ||||||
|  |  | ||||||
|  |     // Load scaling factor. | ||||||
|  |     c.LDR(scratch0, factorlo); | ||||||
|  |     c.LDR(scratch1, factorhi); | ||||||
|  |  | ||||||
|  |     // Multiply low bits and get result. | ||||||
|  |     c.UMULH(scratch0, dest_reg, scratch0); | ||||||
|  |  | ||||||
|  |     // Multiply high bits and add low bit result. | ||||||
|  |     c.MADD(dest_reg, dest_reg, scratch1, scratch0); | ||||||
|  |  | ||||||
|  |     // Reload scratches. | ||||||
|  |     c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); | ||||||
|  |  | ||||||
|  |     // Jump back to the instruction after the emulated MRS. | ||||||
|  |     this->BranchToModule(module_dest); | ||||||
|  |  | ||||||
|  |     // Scaling factor constant values. | ||||||
|  |     c.l(factorlo); | ||||||
|  |     c.dx(raw_factor[0]); | ||||||
|  |     c.l(factorhi); | ||||||
|  |     c.dx(raw_factor[1]); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::LockContext() { | ||||||
|  |     oaknut::Label retry; | ||||||
|  |  | ||||||
|  |     // Save scratches. | ||||||
|  |     c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||||||
|  |  | ||||||
|  |     // Reload lock pointer. | ||||||
|  |     c.l(retry); | ||||||
|  |     c.CLREX(); | ||||||
|  |     c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||||||
|  |  | ||||||
|  |     static_assert(SpinLockLocked == 0); | ||||||
|  |  | ||||||
|  |     // Load-linked with acquire ordering. | ||||||
|  |     c.LDAXR(W1, X0); | ||||||
|  |  | ||||||
|  |     // If the value was SpinLockLocked, clear monitor and retry. | ||||||
|  |     c.CBZ(W1, retry); | ||||||
|  |  | ||||||
|  |     // Store-conditional SpinLockLocked with relaxed ordering. | ||||||
|  |     c.STXR(W1, WZR, X0); | ||||||
|  |  | ||||||
|  |     // If we failed to store, retry. | ||||||
|  |     c.CBNZ(W1, retry); | ||||||
|  |  | ||||||
|  |     // We succeeded! Reload scratches. | ||||||
|  |     c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Patcher::UnlockContext() { | ||||||
|  |     // Save scratches. | ||||||
|  |     c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||||||
|  |  | ||||||
|  |     // Load lock pointer. | ||||||
|  |     c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||||||
|  |     c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||||||
|  |  | ||||||
|  |     // Load SpinLockUnlocked. | ||||||
|  |     c.MOV(W1, SpinLockUnlocked); | ||||||
|  |  | ||||||
|  |     // Store value with release ordering. | ||||||
|  |     c.STLR(W1, X0); | ||||||
|  |  | ||||||
|  |     // Load scratches. | ||||||
|  |     c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace Core::NCE | ||||||
							
								
								
									
										107
									
								
								src/core/arm/nce/patch.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								src/core/arm/nce/patch.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <span> | ||||||
|  | #include <unordered_map> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | #pragma clang diagnostic push | ||||||
|  | #pragma clang diagnostic ignored "-Wshorten-64-to-32" | ||||||
|  | #include <oaknut/code_block.hpp> | ||||||
|  | #include <oaknut/oaknut.hpp> | ||||||
|  | #pragma clang diagnostic pop | ||||||
|  |  | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "core/hle/kernel/code_set.h" | ||||||
|  | #include "core/hle/kernel/k_typed_address.h" | ||||||
|  | #include "core/hle/kernel/physical_memory.h" | ||||||
|  |  | ||||||
|  | #include <signal.h> | ||||||
|  |  | ||||||
|  | namespace Core { | ||||||
|  | struct GuestContext; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | namespace Core::NCE { | ||||||
|  |  | ||||||
|  | enum class PatchMode : u32 { | ||||||
|  |     None, | ||||||
|  |     PreText,  ///< Patch section is inserted before .text | ||||||
|  |     PostData, ///< Patch section is inserted after .data | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | using ModuleTextAddress = u64; | ||||||
|  | using PatchTextAddress = u64; | ||||||
|  | using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>; | ||||||
|  |  | ||||||
|  | class Patcher { | ||||||
|  | public: | ||||||
|  |     explicit Patcher(); | ||||||
|  |     ~Patcher(); | ||||||
|  |  | ||||||
|  |     void PatchText(const Kernel::PhysicalMemory& program_image, | ||||||
|  |                    const Kernel::CodeSet::Segment& code); | ||||||
|  |     void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, | ||||||
|  |                          Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); | ||||||
|  |     size_t SectionSize() const noexcept; | ||||||
|  |  | ||||||
|  |     [[nodiscard]] PatchMode Mode() const noexcept { | ||||||
|  |         return mode; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     using ModuleDestLabel = uintptr_t; | ||||||
|  |  | ||||||
|  |     struct Trampoline { | ||||||
|  |         ptrdiff_t patch_offset; | ||||||
|  |         uintptr_t module_offset; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     void WriteLoadContext(); | ||||||
|  |     void WriteSaveContext(); | ||||||
|  |     void LockContext(); | ||||||
|  |     void UnlockContext(); | ||||||
|  |     void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id); | ||||||
|  |     void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||||||
|  |                          oaknut::SystemReg src_reg); | ||||||
|  |     void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg); | ||||||
|  |     void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     void BranchToPatch(uintptr_t module_dest) { | ||||||
|  |         m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void BranchToModule(uintptr_t module_dest) { | ||||||
|  |         m_branch_to_module_relocations.push_back({c.offset(), module_dest}); | ||||||
|  |         c.dw(0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void WriteModulePc(uintptr_t module_dest) { | ||||||
|  |         m_write_module_pc_relocations.push_back({c.offset(), module_dest}); | ||||||
|  |         c.dx(0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     // List of patch instructions we have generated. | ||||||
|  |     std::vector<u32> m_patch_instructions{}; | ||||||
|  |  | ||||||
|  |     // Relocation type for relative branch from module to patch. | ||||||
|  |     struct Relocation { | ||||||
|  |         ptrdiff_t patch_offset;  ///< Offset in bytes from the start of the patch section. | ||||||
|  |         uintptr_t module_offset; ///< Offset in bytes from the start of the text section. | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     oaknut::VectorCodeGenerator c; | ||||||
|  |     std::vector<Trampoline> m_trampolines; | ||||||
|  |     std::vector<Relocation> m_branch_to_patch_relocations{}; | ||||||
|  |     std::vector<Relocation> m_branch_to_module_relocations{}; | ||||||
|  |     std::vector<Relocation> m_write_module_pc_relocations{}; | ||||||
|  |     oaknut::Label m_save_context{}; | ||||||
|  |     oaknut::Label m_load_context{}; | ||||||
|  |     PatchMode mode{PatchMode::None}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace Core::NCE | ||||||
| @@ -136,9 +136,7 @@ struct System::Impl { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     void Initialize(System& system) { |     void Initialize(System& system) { | ||||||
|         const bool direct_mapped_address = Settings::IsNceEnabled(); |         device_memory = std::make_unique<Core::DeviceMemory>(); | ||||||
|         device_memory = std::make_unique<Core::DeviceMemory>(direct_mapped_address); |  | ||||||
|  |  | ||||||
|         is_multicore = Settings::values.use_multi_core.GetValue(); |         is_multicore = Settings::values.use_multi_core.GetValue(); | ||||||
|         extended_memory_layout = |         extended_memory_layout = | ||||||
|             Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; |             Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; | ||||||
|   | |||||||
| @@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) { | |||||||
|         system.GPU().ObtainContext(); |         system.GPU().ObtainContext(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     system.ArmInterface(core).Initialize(); | ||||||
|  |  | ||||||
|     auto& kernel = system.Kernel(); |     auto& kernel = system.Kernel(); | ||||||
|     auto& scheduler = *kernel.CurrentScheduler(); |     auto& scheduler = *kernel.CurrentScheduler(); | ||||||
|     auto* thread = scheduler.GetSchedulerCurrentThread(); |     auto* thread = scheduler.GetSchedulerCurrentThread(); | ||||||
|   | |||||||
| @@ -12,13 +12,9 @@ constexpr size_t VirtualReserveSize = 1ULL << 38; | |||||||
| constexpr size_t VirtualReserveSize = 1ULL << 39; | constexpr size_t VirtualReserveSize = 1ULL << 39; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| DeviceMemory::DeviceMemory(bool direct_mapped_address) | DeviceMemory::DeviceMemory() | ||||||
|     : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), |     : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), | ||||||
|              VirtualReserveSize} { |              VirtualReserveSize} {} | ||||||
|     if (direct_mapped_address) { |  | ||||||
|         buffer.EnableDirectMappedAddress(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| DeviceMemory::~DeviceMemory() = default; | DeviceMemory::~DeviceMemory() = default; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ enum : u64 { | |||||||
|  |  | ||||||
| class DeviceMemory { | class DeviceMemory { | ||||||
| public: | public: | ||||||
|     explicit DeviceMemory(bool direct_mapped_address); |     explicit DeviceMemory(); | ||||||
|     ~DeviceMemory(); |     ~DeviceMemory(); | ||||||
|  |  | ||||||
|     DeviceMemory& operator=(const DeviceMemory&) = delete; |     DeviceMemory& operator=(const DeviceMemory&) = delete; | ||||||
|   | |||||||
| @@ -75,11 +75,20 @@ struct CodeSet final { | |||||||
|         return segments[2]; |         return segments[2]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     Segment& PatchSegment() { | ||||||
|  |         return patch_segment; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     const Segment& PatchSegment() const { | ||||||
|  |         return patch_segment; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// The overall data that backs this code set. |     /// The overall data that backs this code set. | ||||||
|     Kernel::PhysicalMemory memory; |     Kernel::PhysicalMemory memory; | ||||||
|  |  | ||||||
|     /// The segments that comprise this code set. |     /// The segments that comprise this code set. | ||||||
|     std::array<Segment, 3> segments; |     std::array<Segment, 3> segments; | ||||||
|  |     Segment patch_segment; | ||||||
|  |  | ||||||
|     /// The entry point address for this code set. |     /// The entry point address for this code set. | ||||||
|     KProcessAddress entrypoint = 0; |     KProcessAddress entrypoint = 0; | ||||||
|   | |||||||
| @@ -25,8 +25,8 @@ constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{ | |||||||
|    { .bit_width = 36, .address = 2_GiB       , .size = 64_GiB  - 2_GiB  , .type = KAddressSpaceInfo::Type::MapLarge, }, |    { .bit_width = 36, .address = 2_GiB       , .size = 64_GiB  - 2_GiB  , .type = KAddressSpaceInfo::Type::MapLarge, }, | ||||||
|    { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB            , .type = KAddressSpaceInfo::Type::Heap,     }, |    { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB            , .type = KAddressSpaceInfo::Type::Heap,     }, | ||||||
|    { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB            , .type = KAddressSpaceInfo::Type::Alias,    }, |    { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB            , .type = KAddressSpaceInfo::Type::Alias,    }, | ||||||
| #ifdef ANDROID | #ifdef ARCHITECTURE_arm64 | ||||||
|    // With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. |    // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. | ||||||
|    { .bit_width = 39, .address = 128_MiB     , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, |    { .bit_width = 39, .address = 128_MiB     , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, | ||||||
| #else | #else | ||||||
|    { .bit_width = 39, .address = 128_MiB     , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, |    { .bit_width = 39, .address = 128_MiB     , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, | ||||||
|   | |||||||
| @@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { | |||||||
|     ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); |     ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); | ||||||
|     ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); |     ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); | ||||||
|     ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); |     ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |     if (Settings::IsNceEnabled()) { | ||||||
|  |         auto& buffer = m_kernel.System().DeviceMemory().buffer; | ||||||
|  |         const auto& code = code_set.CodeSegment(); | ||||||
|  |         const auto& patch = code_set.PatchSegment(); | ||||||
|  |         buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true); | ||||||
|  |         buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true); | ||||||
|  |         ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { | bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { | ||||||
|   | |||||||
| @@ -112,6 +112,7 @@ private: | |||||||
|     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{}; |     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{}; | ||||||
|     std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{}; |     std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{}; | ||||||
|     std::map<KProcessAddress, u64> m_debug_page_refcounts{}; |     std::map<KProcessAddress, u64> m_debug_page_refcounts{}; | ||||||
|  |     std::unordered_map<u64, u64> m_post_handlers{}; | ||||||
|     std::atomic<s64> m_cpu_time{}; |     std::atomic<s64> m_cpu_time{}; | ||||||
|     std::atomic<s64> m_num_process_switches{}; |     std::atomic<s64> m_num_process_switches{}; | ||||||
|     std::atomic<s64> m_num_thread_switches{}; |     std::atomic<s64> m_num_thread_switches{}; | ||||||
| @@ -467,6 +468,14 @@ public: | |||||||
|  |  | ||||||
|     static void Switch(KProcess* cur_process, KProcess* next_process); |     static void Switch(KProcess* cur_process, KProcess* next_process); | ||||||
|  |  | ||||||
|  |     std::unordered_map<u64, u64>& GetPostHandlers() noexcept { | ||||||
|  |         return m_post_handlers; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     KernelCore& GetKernel() noexcept { | ||||||
|  |         return m_kernel; | ||||||
|  |     } | ||||||
|  |  | ||||||
| public: | public: | ||||||
|     // Attempts to insert a watchpoint into a free slot. Returns false if none are available. |     // Attempts to insert a watchpoint into a free slot. Returns false if none are available. | ||||||
|     bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); |     bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); | ||||||
|   | |||||||
| @@ -655,6 +655,21 @@ public: | |||||||
|         return m_stack_top; |         return m_stack_top; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | public: | ||||||
|  |     // TODO: This shouldn't be defined in kernel namespace | ||||||
|  |     struct NativeExecutionParameters { | ||||||
|  |         u64 tpidr_el0{}; | ||||||
|  |         u64 tpidrro_el0{}; | ||||||
|  |         void* native_context{}; | ||||||
|  |         std::atomic<u32> lock{1}; | ||||||
|  |         bool is_running{}; | ||||||
|  |         u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')}; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     NativeExecutionParameters& GetNativeExecutionParameters() { | ||||||
|  |         return m_native_execution_parameters; | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, |     KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, | ||||||
|                                bool is_kernel_address_key); |                                bool is_kernel_address_key); | ||||||
| @@ -914,6 +929,7 @@ private: | |||||||
|     ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; |     ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; | ||||||
|     uintptr_t m_argument{}; |     uintptr_t m_argument{}; | ||||||
|     KProcessAddress m_stack_top{}; |     KProcessAddress m_stack_top{}; | ||||||
|  |     NativeExecutionParameters m_native_execution_parameters{}; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|     using ConditionVariableThreadTreeType = ConditionVariableThreadTree; |     using ConditionVariableThreadTreeType = ConditionVariableThreadTree; | ||||||
|   | |||||||
| @@ -1,8 +1,12 @@ | |||||||
| // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later | // SPDX-License-Identifier: GPL-2.0-or-later | ||||||
|  |  | ||||||
|  | #include "common/settings.h" | ||||||
| #include "core/arm/dynarmic/arm_dynarmic_32.h" | #include "core/arm/dynarmic/arm_dynarmic_32.h" | ||||||
| #include "core/arm/dynarmic/arm_dynarmic_64.h" | #include "core/arm/dynarmic/arm_dynarmic_64.h" | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  | #include "core/arm/nce/arm_nce.h" | ||||||
|  | #endif | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/hle/kernel/k_scheduler.h" | #include "core/hle/kernel/k_scheduler.h" | ||||||
| #include "core/hle/kernel/kernel.h" | #include "core/hle/kernel/kernel.h" | ||||||
| @@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu | |||||||
|     : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { |     : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { | ||||||
| #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) | #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) | ||||||
|     // TODO(bunnei): Initialization relies on a core being available. We may later replace this with |     // TODO(bunnei): Initialization relies on a core being available. We may later replace this with | ||||||
|     // a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager. |     // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU | ||||||
|  |     // manager. | ||||||
|     auto& kernel = system.Kernel(); |     auto& kernel = system.Kernel(); | ||||||
|     m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( |     m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( | ||||||
|         system, kernel.IsMulticore(), |         system, kernel.IsMulticore(), | ||||||
| @@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu | |||||||
| PhysicalCore::~PhysicalCore() = default; | PhysicalCore::~PhysicalCore() = default; | ||||||
|  |  | ||||||
| void PhysicalCore::Initialize(bool is_64_bit) { | void PhysicalCore::Initialize(bool is_64_bit) { | ||||||
|  | #if defined(ARCHITECTURE_arm64) | ||||||
|  |     if (Settings::IsNceEnabled()) { | ||||||
|  |         m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(), | ||||||
|  |                                                           m_core_index); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) | #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) | ||||||
|     auto& kernel = m_system.Kernel(); |     auto& kernel = m_system.Kernel(); | ||||||
|     if (!is_64_bit) { |     if (!is_64_bit) { | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
|  |  | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/file_sys/content_archive.h" | #include "core/file_sys/content_archive.h" | ||||||
| #include "core/file_sys/control_metadata.h" | #include "core/file_sys/control_metadata.h" | ||||||
| @@ -14,6 +15,10 @@ | |||||||
| #include "core/loader/deconstructed_rom_directory.h" | #include "core/loader/deconstructed_rom_directory.h" | ||||||
| #include "core/loader/nso.h" | #include "core/loader/nso.h" | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  | #include "core/arm/nce/patch.h" | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace Loader { | namespace Loader { | ||||||
|  |  | ||||||
| AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, | AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, | ||||||
| @@ -124,21 +129,41 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect | |||||||
|     } |     } | ||||||
|     metadata.Print(); |     metadata.Print(); | ||||||
|  |  | ||||||
|     const auto static_modules = {"rtld",    "main",    "subsdk0", "subsdk1", "subsdk2", |     // Enable NCE only for 64-bit programs. | ||||||
|                                  "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", |     Settings::SetNceEnabled(metadata.Is64BitProgram()); | ||||||
|                                  "subsdk8", "subsdk9", "sdk"}; |  | ||||||
|  |     const std::array static_modules = {"rtld",    "main",    "subsdk0", "subsdk1", "subsdk2", | ||||||
|  |                                        "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", | ||||||
|  |                                        "subsdk8", "subsdk9", "sdk"}; | ||||||
|  |  | ||||||
|  |     std::size_t code_size{}; | ||||||
|  |  | ||||||
|  |     // Define an nce patch context for each potential module. | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |     std::array<Core::NCE::Patcher, 13> module_patchers; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |     const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* { | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |         if (Settings::IsNceEnabled()) { | ||||||
|  |             return &module_patchers[i]; | ||||||
|  |         } | ||||||
|  | #endif | ||||||
|  |         return nullptr; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     // Use the NSO module loader to figure out the code layout |     // Use the NSO module loader to figure out the code layout | ||||||
|     std::size_t code_size{}; |     for (size_t i = 0; i < static_modules.size(); i++) { | ||||||
|     for (const auto& module : static_modules) { |         const auto& module = static_modules[i]; | ||||||
|         const FileSys::VirtualFile module_file{dir->GetFile(module)}; |         const FileSys::VirtualFile module_file{dir->GetFile(module)}; | ||||||
|         if (!module_file) { |         if (!module_file) { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; |         const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; | ||||||
|         const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( |         const auto tentative_next_load_addr = | ||||||
|             process, system, *module_file, code_size, should_pass_arguments, false); |             AppLoader_NSO::LoadModule(process, system, *module_file, code_size, | ||||||
|  |                                       should_pass_arguments, false, {}, GetPatcher(i)); | ||||||
|         if (!tentative_next_load_addr) { |         if (!tentative_next_load_addr) { | ||||||
|             return {ResultStatus::ErrorLoadingNSO, {}}; |             return {ResultStatus::ErrorLoadingNSO, {}}; | ||||||
|         } |         } | ||||||
| @@ -146,8 +171,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect | |||||||
|         code_size = *tentative_next_load_addr; |         code_size = *tentative_next_load_addr; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // Enable direct memory mapping in case of NCE. | ||||||
|  |     const u64 fastmem_base = [&]() -> size_t { | ||||||
|  |         if (Settings::IsNceEnabled()) { | ||||||
|  |             auto& buffer = system.DeviceMemory().buffer; | ||||||
|  |             buffer.EnableDirectMappedAddress(); | ||||||
|  |             return reinterpret_cast<u64>(buffer.VirtualBasePointer()); | ||||||
|  |         } | ||||||
|  |         return 0; | ||||||
|  |     }(); | ||||||
|  |  | ||||||
|     // Setup the process code layout |     // Setup the process code layout | ||||||
|     if (process.LoadFromMetadata(metadata, code_size, 0, is_hbl).IsError()) { |     if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) { | ||||||
|         return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; |         return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -157,7 +192,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect | |||||||
|     VAddr next_load_addr{base_address}; |     VAddr next_load_addr{base_address}; | ||||||
|     const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), |     const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), | ||||||
|                                    system.GetContentProvider()}; |                                    system.GetContentProvider()}; | ||||||
|     for (const auto& module : static_modules) { |     for (size_t i = 0; i < static_modules.size(); i++) { | ||||||
|  |         const auto& module = static_modules[i]; | ||||||
|         const FileSys::VirtualFile module_file{dir->GetFile(module)}; |         const FileSys::VirtualFile module_file{dir->GetFile(module)}; | ||||||
|         if (!module_file) { |         if (!module_file) { | ||||||
|             continue; |             continue; | ||||||
| @@ -165,15 +201,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect | |||||||
|  |  | ||||||
|         const VAddr load_addr{next_load_addr}; |         const VAddr load_addr{next_load_addr}; | ||||||
|         const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; |         const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; | ||||||
|         const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( |         const auto tentative_next_load_addr = | ||||||
|             process, system, *module_file, load_addr, should_pass_arguments, true, pm); |             AppLoader_NSO::LoadModule(process, system, *module_file, load_addr, | ||||||
|  |                                       should_pass_arguments, true, pm, GetPatcher(i)); | ||||||
|         if (!tentative_next_load_addr) { |         if (!tentative_next_load_addr) { | ||||||
|             return {ResultStatus::ErrorLoadingNSO, {}}; |             return {ResultStatus::ErrorLoadingNSO, {}}; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         next_load_addr = *tentative_next_load_addr; |         next_load_addr = *tentative_next_load_addr; | ||||||
|         modules.insert_or_assign(load_addr, module); |         modules.insert_or_assign(load_addr, module); | ||||||
|         LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); |         LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Find the RomFS by searching for a ".romfs" file in this directory |     // Find the RomFS by searching for a ".romfs" file in this directory | ||||||
|   | |||||||
| @@ -22,6 +22,10 @@ | |||||||
| #include "core/loader/nso.h" | #include "core/loader/nso.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  | #include "core/arm/nce/patch.h" | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace Loader { | namespace Loader { | ||||||
|  |  | ||||||
| struct NroSegmentHeader { | struct NroSegmentHeader { | ||||||
| @@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) { | |||||||
|     return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); |     return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) { | static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, | ||||||
|  |                         const std::vector<u8>& data) { | ||||||
|     if (data.size() < sizeof(NroHeader)) { |     if (data.size() < sizeof(NroHeader)) { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| @@ -195,14 +200,60 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) | |||||||
|     codeset.DataSegment().size += bss_size; |     codeset.DataSegment().size += bss_size; | ||||||
|     program_image.resize(static_cast<u32>(program_image.size()) + bss_size); |     program_image.resize(static_cast<u32>(program_image.size()) + bss_size); | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |     const auto& code = codeset.CodeSegment(); | ||||||
|  |  | ||||||
|  |     // NROs are always 64-bit programs. | ||||||
|  |     Settings::SetNceEnabled(true); | ||||||
|  |  | ||||||
|  |     // Create NCE patcher | ||||||
|  |     Core::NCE::Patcher patch{}; | ||||||
|  |     size_t image_size = program_image.size(); | ||||||
|  |  | ||||||
|  |     if (Settings::IsNceEnabled()) { | ||||||
|  |         // Patch SVCs and MRS calls in the guest code | ||||||
|  |         patch.PatchText(program_image, code); | ||||||
|  |  | ||||||
|  |         // We only support PostData patching for NROs. | ||||||
|  |         ASSERT(patch.Mode() == Core::NCE::PatchMode::PostData); | ||||||
|  |  | ||||||
|  |         // Update patch section. | ||||||
|  |         auto& patch_segment = codeset.PatchSegment(); | ||||||
|  |         patch_segment.addr = image_size; | ||||||
|  |         patch_segment.size = static_cast<u32>(patch.SectionSize()); | ||||||
|  |  | ||||||
|  |         // Add patch section size to the module size. | ||||||
|  |         image_size += patch_segment.size; | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |     // Enable direct memory mapping in case of NCE. | ||||||
|  |     const u64 fastmem_base = [&]() -> size_t { | ||||||
|  |         if (Settings::IsNceEnabled()) { | ||||||
|  |             auto& buffer = system.DeviceMemory().buffer; | ||||||
|  |             buffer.EnableDirectMappedAddress(); | ||||||
|  |             return reinterpret_cast<u64>(buffer.VirtualBasePointer()); | ||||||
|  |         } | ||||||
|  |         return 0; | ||||||
|  |     }(); | ||||||
|  |  | ||||||
|     // Setup the process code layout |     // Setup the process code layout | ||||||
|     if (process |     if (process | ||||||
|             .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0, |             .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base, | ||||||
|                               false) |                               false) | ||||||
|             .IsError()) { |             .IsError()) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // Relocate code patch and copy to the program_image if running under NCE. | ||||||
|  |     // This needs to be after LoadFromMetadata so we can use the process entry point. | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |     if (Settings::IsNceEnabled()) { | ||||||
|  |         patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image, | ||||||
|  |                               &process.GetPostHandlers()); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     // Load codeset for current process |     // Load codeset for current process | ||||||
|     codeset.memory = std::move(program_image); |     codeset.memory = std::move(program_image); | ||||||
|     process.LoadModule(std::move(codeset), process.GetEntryPoint()); |     process.LoadModule(std::move(codeset), process.GetEntryPoint()); | ||||||
| @@ -210,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) { | bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process, | ||||||
|     return LoadNroImpl(process, nro_file.ReadAllBytes()); |                             const FileSys::VfsFile& nro_file) { | ||||||
|  |     return LoadNroImpl(system, process, nro_file.ReadAllBytes()); | ||||||
| } | } | ||||||
|  |  | ||||||
| AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { | AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { | ||||||
| @@ -219,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S | |||||||
|         return {ResultStatus::ErrorAlreadyLoaded, {}}; |         return {ResultStatus::ErrorAlreadyLoaded, {}}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (!LoadNro(process, *file)) { |     if (!LoadNro(system, process, *file)) { | ||||||
|         return {ResultStatus::ErrorLoadingNRO, {}}; |         return {ResultStatus::ErrorLoadingNRO, {}}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -54,7 +54,7 @@ public: | |||||||
|     bool IsRomFSUpdatable() const override; |     bool IsRomFSUpdatable() const override; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file); |     bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file); | ||||||
|  |  | ||||||
|     std::vector<u8> icon_data; |     std::vector<u8> icon_data; | ||||||
|     std::unique_ptr<FileSys::NACP> nacp; |     std::unique_ptr<FileSys::NACP> nacp; | ||||||
|   | |||||||
| @@ -20,6 +20,10 @@ | |||||||
| #include "core/loader/nso.h" | #include "core/loader/nso.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  | #include "core/arm/nce/patch.h" | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace Loader { | namespace Loader { | ||||||
| namespace { | namespace { | ||||||
| struct MODHeader { | struct MODHeader { | ||||||
| @@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) { | |||||||
| std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, | std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, | ||||||
|                                                const FileSys::VfsFile& nso_file, VAddr load_base, |                                                const FileSys::VfsFile& nso_file, VAddr load_base, | ||||||
|                                                bool should_pass_arguments, bool load_into_process, |                                                bool should_pass_arguments, bool load_into_process, | ||||||
|                                                std::optional<FileSys::PatchManager> pm) { |                                                std::optional<FileSys::PatchManager> pm, | ||||||
|  |                                                Core::NCE::Patcher* patch) { | ||||||
|     if (nso_file.GetSize() < sizeof(NSOHeader)) { |     if (nso_file.GetSize() < sizeof(NSOHeader)) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
| @@ -86,6 +91,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: | |||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // Allocate some space at the beginning if we are patching in PreText mode. | ||||||
|  |     const size_t module_start = [&]() -> size_t { | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |         if (patch && patch->Mode() == Core::NCE::PatchMode::PreText) { | ||||||
|  |             return patch->SectionSize(); | ||||||
|  |         } | ||||||
|  | #endif | ||||||
|  |         return 0; | ||||||
|  |     }(); | ||||||
|  |  | ||||||
|     // Build program image |     // Build program image | ||||||
|     Kernel::CodeSet codeset; |     Kernel::CodeSet codeset; | ||||||
|     Kernel::PhysicalMemory program_image; |     Kernel::PhysicalMemory program_image; | ||||||
| @@ -95,11 +110,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: | |||||||
|         if (nso_header.IsSegmentCompressed(i)) { |         if (nso_header.IsSegmentCompressed(i)) { | ||||||
|             data = DecompressSegment(data, nso_header.segments[i]); |             data = DecompressSegment(data, nso_header.segments[i]); | ||||||
|         } |         } | ||||||
|         program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size())); |         program_image.resize(module_start + nso_header.segments[i].location + | ||||||
|         std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), |                              static_cast<u32>(data.size())); | ||||||
|                     data.size()); |         std::memcpy(program_image.data() + module_start + nso_header.segments[i].location, | ||||||
|         codeset.segments[i].addr = nso_header.segments[i].location; |                     data.data(), data.size()); | ||||||
|         codeset.segments[i].offset = nso_header.segments[i].location; |         codeset.segments[i].addr = module_start + nso_header.segments[i].location; | ||||||
|  |         codeset.segments[i].offset = module_start + nso_header.segments[i].location; | ||||||
|         codeset.segments[i].size = nso_header.segments[i].size; |         codeset.segments[i].size = nso_header.segments[i].size; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -118,7 +134,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     codeset.DataSegment().size += nso_header.segments[2].bss_size; |     codeset.DataSegment().size += nso_header.segments[2].bss_size; | ||||||
|     const u32 image_size{ |     u32 image_size{ | ||||||
|         PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; |         PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; | ||||||
|     program_image.resize(image_size); |     program_image.resize(image_size); | ||||||
|  |  | ||||||
| @@ -139,6 +155,32 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: | |||||||
|         std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); |         std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | #ifdef ARCHITECTURE_arm64 | ||||||
|  |     // If we are computing the process code layout and using nce backend, patch. | ||||||
|  |     const auto& code = codeset.CodeSegment(); | ||||||
|  |     if (patch && patch->Mode() == Core::NCE::PatchMode::None) { | ||||||
|  |         // Patch SVCs and MRS calls in the guest code | ||||||
|  |         patch->PatchText(program_image, code); | ||||||
|  |  | ||||||
|  |         // Add patch section size to the module size. | ||||||
|  |         image_size += patch->SectionSize(); | ||||||
|  |     } else if (patch) { | ||||||
|  |         // Relocate code patch and copy to the program_image. | ||||||
|  |         patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers()); | ||||||
|  |  | ||||||
|  |         // Update patch section. | ||||||
|  |         auto& patch_segment = codeset.PatchSegment(); | ||||||
|  |         patch_segment.addr = patch->Mode() == Core::NCE::PatchMode::PreText ? 0 : image_size; | ||||||
|  |         patch_segment.size = static_cast<u32>(patch->SectionSize()); | ||||||
|  |  | ||||||
|  |         // Add patch section size to the module size. In PreText mode image_size | ||||||
|  |         // already contains the patch segment as part of module_start. | ||||||
|  |         if (patch->Mode() == Core::NCE::PatchMode::PostData) { | ||||||
|  |             image_size += patch_segment.size; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     // If we aren't actually loading (i.e. just computing the process code layout), we are done |     // If we aren't actually loading (i.e. just computing the process code layout), we are done | ||||||
|     if (!load_into_process) { |     if (!load_into_process) { | ||||||
|         return load_base + image_size; |         return load_base + image_size; | ||||||
|   | |||||||
| @@ -15,6 +15,10 @@ namespace Core { | |||||||
| class System; | class System; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | namespace Core::NCE { | ||||||
|  | class Patcher; | ||||||
|  | } | ||||||
|  |  | ||||||
| namespace Kernel { | namespace Kernel { | ||||||
| class KProcess; | class KProcess; | ||||||
| } | } | ||||||
| @@ -88,7 +92,8 @@ public: | |||||||
|     static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system, |     static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system, | ||||||
|                                            const FileSys::VfsFile& nso_file, VAddr load_base, |                                            const FileSys::VfsFile& nso_file, VAddr load_base, | ||||||
|                                            bool should_pass_arguments, bool load_into_process, |                                            bool should_pass_arguments, bool load_into_process, | ||||||
|                                            std::optional<FileSys::PatchManager> pm = {}); |                                            std::optional<FileSys::PatchManager> pm = {}, | ||||||
|  |                                            Core::NCE::Patcher* patch = nullptr); | ||||||
|  |  | ||||||
|     LoadResult Load(Kernel::KProcess& process, Core::System& system) override; |     LoadResult Load(Kernel::KProcess& process, Core::System& system) override; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1001,4 +1001,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | |||||||
|     impl->FlushRegion(dest_addr, size); |     impl->FlushRegion(dest_addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | ||||||
|  |     bool mapped = true; | ||||||
|  |     u8* const ptr = impl->GetPointerImpl( | ||||||
|  |         GetInteger(vaddr), | ||||||
|  |         [&] { | ||||||
|  |             LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size, | ||||||
|  |                       GetInteger(vaddr)); | ||||||
|  |             mapped = false; | ||||||
|  |         }, | ||||||
|  |         [&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); }); | ||||||
|  |     return mapped && ptr != nullptr; | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace Core::Memory | } // namespace Core::Memory | ||||||
|   | |||||||
| @@ -474,6 +474,7 @@ public: | |||||||
|  |  | ||||||
|     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); |     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||||||
|     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); |     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); | ||||||
|  |     bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); | ||||||
|     void FlushRegion(Common::ProcessAddress dest_addr, size_t size); |     void FlushRegion(Common::ProcessAddress dest_addr, size_t size); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user