From d7c532d8894ce806c9af13b8dd3eec975642b348 Mon Sep 17 00:00:00 2001 From: comex Date: Sat, 1 Jul 2023 15:00:39 -0700 Subject: [PATCH] Fixes and workarounds to make UBSan happier on macOS There are still some other issues not addressed here, but it's a start. Workarounds for false-positive reports: - `RasterizerAccelerated`: Put a gigantic array behind a `unique_ptr`, because UBSan has a [hardcoded limit](https://stackoverflow.com/questions/64531383/c-runtime-error-using-fsanitize-undefined-object-has-a-possibly-invalid-vp) of how big it thinks objects can be, specifically when dealing with offset-to-top values used with multiple inheritance. Hopefully this doesn't have a performance impact. - `QueryCacheBase::QueryCacheBase`: Avoid an operation that UBSan thinks is UB even though it at least arguably isn't. See the link in the comment for more information. Fixes for correct reports: - `PageTable`, `Memory`: Use `uintptr_t` values instead of pointers to avoid UB from pointer overflow (when pointer arithmetic wraps around the address space). - `KScheduler::Reload`: `thread->GetOwnerProcess()` can be `nullptr`; avoid calling methods on it in this case. (The existing code returns a garbage reference to a field, which is then passed into `LoadWatchpointArray`, and apparently it's never used, so it's harmless in practice but still triggers UBSan.) - `KAutoObject::Close`: This function calls `this->Destroy()`, which overwrites the beginning of the object with junk (specifically a free list pointer). Then it calls `this->UnregisterWithKernel()`. UBSan complains about a type mismatch because the vtable has been overwritten, and I believe this is indeed UB. `UnregisterWithKernel` also loads `m_kernel` from the 'freed' object, which seems to be technically safe (the overwriting doesn't extend as far as that field), but seems dubious. Switch to a `static` method and load `m_kernel` in advance. --- src/common/page_table.h | 12 ++++----- src/core/arm/arm_interface.cpp | 4 +-- src/core/arm/arm_interface.h | 2 +- src/core/hle/kernel/k_auto_object.cpp | 4 +-- src/core/hle/kernel/k_auto_object.h | 5 ++-- src/core/hle/kernel/k_scheduler.cpp | 3 ++- src/core/hle/kernel/k_thread.cpp | 2 +- src/core/memory.cpp | 30 +++++++++++++---------- src/video_core/query_cache.h | 4 ++- src/video_core/rasterizer_accelerated.cpp | 5 ++-- src/video_core/rasterizer_accelerated.h | 3 ++- 11 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/common/page_table.h b/src/common/page_table.h index fec8378f3..e653d52ad 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -51,7 +51,7 @@ struct PageTable { class PageInfo { public: /// Returns the page pointer - [[nodiscard]] u8* Pointer() const noexcept { + [[nodiscard]] uintptr_t Pointer() const noexcept { return ExtractPointer(raw.load(std::memory_order_relaxed)); } @@ -61,7 +61,7 @@ struct PageTable { } /// Returns the page pointer and attribute pair, extracted from the same atomic read - [[nodiscard]] std::pair PointerType() const noexcept { + [[nodiscard]] std::pair PointerType() const noexcept { const uintptr_t non_atomic_raw = raw.load(std::memory_order_relaxed); return {ExtractPointer(non_atomic_raw), ExtractType(non_atomic_raw)}; } @@ -73,13 +73,13 @@ struct PageTable { } /// Write a page pointer and type pair atomically - void Store(u8* pointer, PageType type) noexcept { - raw.store(reinterpret_cast(pointer) | static_cast(type)); + void Store(uintptr_t pointer, PageType type) noexcept { + raw.store(pointer | static_cast(type)); } /// Unpack a pointer from a page info raw representation - [[nodiscard]] static u8* ExtractPointer(uintptr_t raw) noexcept { - return reinterpret_cast(raw & (~uintptr_t{0} << ATTRIBUTE_BITS)); + [[nodiscard]] static uintptr_t ExtractPointer(uintptr_t raw) noexcept { + return raw & (~uintptr_t{0} << ATTRIBUTE_BITS); } /// Unpack a page type from a page info raw representation diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index aa0eb9791..0c012f094 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -217,8 +217,8 @@ void ARM_Interface::Run() { } } -void ARM_Interface::LoadWatchpointArray(const WatchpointArray& wp) { - watchpoints = ℘ +void ARM_Interface::LoadWatchpointArray(const WatchpointArray* wp) { + watchpoints = wp; } const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint( diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index d5f2fa09a..3d866ff6f 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -186,7 +186,7 @@ public: virtual void SaveContext(ThreadContext64& ctx) const = 0; virtual void LoadContext(const ThreadContext32& ctx) = 0; virtual void LoadContext(const ThreadContext64& ctx) = 0; - void LoadWatchpointArray(const WatchpointArray& wp); + void LoadWatchpointArray(const WatchpointArray* wp); /// Clears the exclusive monitor's state. virtual void ClearExclusiveState() = 0; diff --git a/src/core/hle/kernel/k_auto_object.cpp b/src/core/hle/kernel/k_auto_object.cpp index 0ae42c95c..9cd7a9fd5 100644 --- a/src/core/hle/kernel/k_auto_object.cpp +++ b/src/core/hle/kernel/k_auto_object.cpp @@ -15,8 +15,8 @@ void KAutoObject::RegisterWithKernel() { m_kernel.RegisterKernelObject(this); } -void KAutoObject::UnregisterWithKernel() { - m_kernel.UnregisterKernelObject(this); +void KAutoObject::UnregisterWithKernel(KernelCore& kernel, KAutoObject* self) { + kernel.UnregisterKernelObject(self); } } // namespace Kernel diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index f384b1568..8d4e0df44 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h @@ -159,14 +159,15 @@ public: // If ref count hits zero, destroy the object. if (cur_ref_count - 1 == 0) { + KernelCore& kernel = m_kernel; this->Destroy(); - this->UnregisterWithKernel(); + KAutoObject::UnregisterWithKernel(kernel, this); } } private: void RegisterWithKernel(); - void UnregisterWithKernel(); + static void UnregisterWithKernel(KernelCore& kernel, KAutoObject* self); protected: KernelCore& m_kernel; diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index 75ce5a23c..d8143c650 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -510,11 +510,12 @@ void KScheduler::Unload(KThread* thread) { void KScheduler::Reload(KThread* thread) { auto& cpu_core = m_kernel.System().ArmInterface(m_core_id); + auto* process = thread->GetOwnerProcess(); cpu_core.LoadContext(thread->GetContext32()); cpu_core.LoadContext(thread->GetContext64()); cpu_core.SetTlsAddress(GetInteger(thread->GetTlsAddress())); cpu_core.SetTPIDR_EL0(thread->GetTpidrEl0()); - cpu_core.LoadWatchpointArray(thread->GetOwnerProcess()->GetWatchpoints()); + cpu_core.LoadWatchpointArray(process ? &process->GetWatchpoints() : nullptr); cpu_core.ClearExclusiveState(); } diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index adb6ec581..2a105a762 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -129,7 +129,7 @@ Result KThread::Initialize(KThreadFunction func, uintptr_t arg, KProcessAddress case ThreadType::User: ASSERT(((owner == nullptr) || (owner->GetCoreMask() | (1ULL << virt_core)) == owner->GetCoreMask())); - ASSERT(((owner == nullptr) || + ASSERT(((owner == nullptr) || (prio > Svc::LowestThreadPriority) || (owner->GetPriorityMask() | (1ULL << prio)) == owner->GetPriorityMask())); break; case ThreadType::Kernel: diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 805963178..7538c1d23 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -73,7 +73,7 @@ struct Memory::Impl { return {}; } - return system.DeviceMemory().GetPointer(paddr) + vaddr; + return system.DeviceMemory().GetPointer(paddr + vaddr); } [[nodiscard]] u8* GetPointerFromDebugMemory(u64 vaddr) const { @@ -84,7 +84,7 @@ struct Memory::Impl { return {}; } - return system.DeviceMemory().GetPointer(paddr) + vaddr; + return system.DeviceMemory().GetPointer(paddr + vaddr); } u8 Read8(const Common::ProcessAddress addr) { @@ -204,7 +204,8 @@ struct Memory::Impl { break; } case Common::PageType::Memory: { - u8* mem_ptr = pointer + page_offset + (page_index << YUZU_PAGEBITS); + u8* mem_ptr = + reinterpret_cast(pointer + page_offset + (page_index << YUZU_PAGEBITS)); on_memory(copy_amount, mem_ptr); break; } @@ -448,7 +449,7 @@ struct Memory::Impl { break; case Common::PageType::Memory: current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store( - nullptr, Common::PageType::DebugMemory); + 0, Common::PageType::DebugMemory); break; default: UNREACHABLE(); @@ -466,7 +467,8 @@ struct Memory::Impl { case Common::PageType::DebugMemory: { u8* const pointer{GetPointerFromDebugMemory(vaddr & ~YUZU_PAGEMASK)}; current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store( - pointer - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory); + reinterpret_cast(pointer) - (vaddr & ~YUZU_PAGEMASK), + Common::PageType::Memory); break; } default: @@ -506,7 +508,7 @@ struct Memory::Impl { case Common::PageType::DebugMemory: case Common::PageType::Memory: current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store( - nullptr, Common::PageType::RasterizerCachedMemory); + 0, Common::PageType::RasterizerCachedMemory); break; case Common::PageType::RasterizerCachedMemory: // There can be more than one GPU region mapped per CPU region, so it's common @@ -534,10 +536,11 @@ struct Memory::Impl { // pagetable after unmapping a VMA. In that case the underlying VMA will no // longer exist, and we should just leave the pagetable entry blank. current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store( - nullptr, Common::PageType::Unmapped); + 0, Common::PageType::Unmapped); } else { current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store( - pointer - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory); + reinterpret_cast(pointer) - (vaddr & ~YUZU_PAGEMASK), + Common::PageType::Memory); } break; } @@ -584,7 +587,7 @@ struct Memory::Impl { "Mapping memory page without a pointer @ {:016x}", base * YUZU_PAGESIZE); while (base != end) { - page_table.pointers[base].Store(nullptr, type); + page_table.pointers[base].Store(0, type); page_table.backing_addr[base] = 0; page_table.blocks[base] = 0; base += 1; @@ -593,7 +596,8 @@ struct Memory::Impl { auto orig_base = base; while (base != end) { auto host_ptr = - system.DeviceMemory().GetPointer(target) - (base << YUZU_PAGEBITS); + reinterpret_cast(system.DeviceMemory().GetPointer(target)) - + (base << YUZU_PAGEBITS); auto backing = GetInteger(target) - (base << YUZU_PAGEBITS); page_table.pointers[base].Store(host_ptr, type); page_table.backing_addr[base] = backing; @@ -619,8 +623,8 @@ struct Memory::Impl { // Avoid adding any extra logic to this fast-path block const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw(); - if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - return &pointer[vaddr]; + if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + return reinterpret_cast(pointer + vaddr); } switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { case Common::PageType::Unmapped: @@ -814,7 +818,7 @@ bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const { return false; } const auto [pointer, type] = page_table.pointers[page].PointerType(); - return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory || + return pointer != 0 || type == Common::PageType::RasterizerCachedMemory || type == Common::PageType::DebugMemory; } diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 1528cc1dd..7047e2e63 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -103,7 +103,9 @@ public: explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, Core::Memory::Memory& cpu_memory_) : rasterizer{rasterizer_}, - cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast(*this), + // Use reinterpret_cast instead of static_cast as workaround for + // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) + cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast(*this), VideoCore::QueryType::SamplesPassed}}} { (void)slot_async_jobs.insert(); // Null value } diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 4a197d65d..f200a650f 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -13,7 +13,8 @@ namespace VideoCore { using namespace Core::Memory; -RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} +RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) + : cached_pages(std::make_unique()), cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; @@ -26,7 +27,7 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del std::atomic_thread_fence(std::memory_order_acquire); const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { - std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page); + std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); if (delta > 0) { ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 7118b8aff..e6c0ea87a 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -41,7 +41,8 @@ private: }; static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); - std::array cached_pages; + using CachedPages = std::array; + std::unique_ptr cached_pages; Core::Memory::Memory& cpu_memory; };