From 2bb7f89c3021e3d19b446ba6e816a2769dff3574 Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Thu, 28 Dec 2023 12:46:57 +0200 Subject: [PATCH] video_core: Refactor GPU interface (#7272) * video_core: Refactor GPU interface * citra_qt: Better debug widget lifetime --- CMakeModules/GenerateSCMRev.cmake | 18 +- .../src/main/jni/emu_window/emu_window.cpp | 1 - .../app/src/main/jni/emu_window/emu_window.h | 4 + .../src/main/jni/emu_window/emu_window_gl.cpp | 16 +- .../src/main/jni/emu_window/emu_window_gl.h | 7 +- .../src/main/jni/emu_window/emu_window_vk.cpp | 1 - src/android/app/src/main/jni/native.cpp | 30 +- src/citra/citra.cpp | 8 +- src/citra/emu_window/emu_window_sdl2_gl.cpp | 5 +- src/citra/emu_window/emu_window_sdl2_sw.cpp | 3 +- src/citra_qt/bootmanager.cpp | 15 +- .../configuration/configure_graphics.cpp | 1 - src/citra_qt/debugger/graphics/graphics.cpp | 40 +- src/citra_qt/debugger/graphics/graphics.h | 12 +- .../graphics/graphics_breakpoint_observer.cpp | 3 +- .../graphics/graphics_breakpoint_observer.h | 6 +- .../graphics/graphics_breakpoints.cpp | 4 +- .../debugger/graphics/graphics_breakpoints.h | 6 +- .../debugger/graphics/graphics_cmdlists.cpp | 16 +- .../debugger/graphics/graphics_surface.cpp | 30 +- .../debugger/graphics/graphics_surface.h | 10 +- .../debugger/graphics/graphics_tracing.cpp | 68 +- .../debugger/graphics/graphics_tracing.h | 12 +- .../graphics/graphics_vertex_shader.cpp | 44 +- .../graphics/graphics_vertex_shader.h | 16 +- src/citra_qt/main.cpp | 28 +- src/common/CMakeLists.txt | 18 +- src/common/memory_ref.h | 6 +- src/core/CMakeLists.txt | 8 +- src/core/cheats/cheats.cpp | 1 - src/core/core.cpp | 56 +- src/core/core.h | 10 +- src/core/core_timing.h | 4 + src/core/dumping/ffmpeg_backend.cpp | 12 +- src/core/dumping/ffmpeg_backend.h | 7 +- src/core/hle/service/gsp/gsp.cpp | 22 +- src/core/hle/service/gsp/gsp.h | 13 - src/core/hle/service/gsp/gsp_command.h | 110 +++ src/core/hle/service/gsp/gsp_gpu.cpp | 436 +++-------- src/core/hle/service/gsp/gsp_gpu.h | 146 +--- src/core/hle/service/gsp/gsp_interrupt.h | 42 ++ src/core/hle/service/hid/hid.cpp | 1 - src/core/hw/gpu.cpp | 572 --------------- src/core/hw/gpu.h | 344 --------- src/core/hw/hw.cpp | 102 --- src/core/hw/hw.h | 54 -- src/core/hw/lcd.cpp | 76 -- src/core/hw/y2r.cpp | 2 +- src/core/memory.cpp | 134 ++-- src/core/memory.h | 25 - src/core/movie.cpp | 5 +- src/core/perf_stats.cpp | 5 +- src/core/tracer/citrace.h | 11 +- src/core/tracer/recorder.cpp | 29 +- src/core/tracer/recorder.h | 5 +- .../video_core/shader/shader_jit_compiler.cpp | 57 +- src/video_core/CMakeLists.txt | 48 +- src/video_core/command_processor.cpp | 677 ------------------ src/video_core/command_processor.h | 37 - src/video_core/debug_utils/debug_utils.cpp | 182 +---- src/video_core/debug_utils/debug_utils.h | 61 +- src/video_core/gpu.cpp | 419 +++++++++++ src/video_core/gpu.h | 113 +++ src/video_core/gpu_debugger.h | 30 +- src/video_core/pica.cpp | 70 -- src/video_core/pica.h | 16 - .../{ => pica}/geometry_pipeline.cpp | 139 ++-- src/video_core/{ => pica}/geometry_pipeline.h | 28 +- src/video_core/pica/output_vertex.cpp | 50 ++ src/video_core/pica/output_vertex.h | 48 ++ src/video_core/pica/packed_attribute.h | 74 ++ src/video_core/pica/pica_core.cpp | 592 +++++++++++++++ src/video_core/pica/pica_core.h | 287 ++++++++ src/video_core/pica/primitive_assembly.cpp | 53 ++ .../{ => pica}/primitive_assembly.h | 40 +- src/video_core/pica/regs_external.h | 217 ++++++ src/video_core/{ => pica}/regs_framebuffer.h | 0 .../{regs.cpp => pica/regs_internal.cpp} | 20 +- .../{regs.h => pica/regs_internal.h} | 23 +- .../hw/lcd.h => video_core/pica/regs_lcd.h} | 53 +- src/video_core/{ => pica}/regs_lighting.h | 12 +- src/video_core/{ => pica}/regs_pipeline.h | 20 +- src/video_core/{ => pica}/regs_rasterizer.h | 11 + src/video_core/{ => pica}/regs_shader.h | 17 +- src/video_core/{ => pica}/regs_texturing.h | 0 src/video_core/pica/shader_setup.cpp | 61 ++ src/video_core/pica/shader_setup.h | 103 +++ src/video_core/pica/shader_unit.cpp | 63 ++ src/video_core/pica/shader_unit.h | 120 ++++ src/video_core/pica/vertex_loader.cpp | 109 +++ src/video_core/pica/vertex_loader.h | 47 ++ src/video_core/pica_state.h | 255 ------- src/video_core/primitive_assembly.cpp | 87 --- src/video_core/rasterizer_accelerated.cpp | 18 +- src/video_core/rasterizer_accelerated.h | 20 +- .../rasterizer_cache/framebuffer_base.h | 2 +- .../rasterizer_cache/pixel_format.cpp | 13 +- .../rasterizer_cache/pixel_format.h | 11 +- .../rasterizer_cache/rasterizer_cache.h | 16 +- .../rasterizer_cache/rasterizer_cache_base.h | 17 +- .../rasterizer_cache/sampler_params.h | 2 +- .../rasterizer_cache/texture_cube.h | 2 +- src/video_core/rasterizer_interface.h | 23 +- src/video_core/renderer_base.cpp | 4 - .../renderer_opengl/gl_rasterizer.cpp | 48 +- .../renderer_opengl/gl_rasterizer.h | 18 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 2 +- .../renderer_opengl/gl_shader_disk_cache.h | 13 +- .../renderer_opengl/gl_shader_manager.cpp | 51 +- .../renderer_opengl/gl_shader_manager.h | 14 +- src/video_core/renderer_opengl/pica_to_gl.h | 6 +- .../renderer_opengl/renderer_opengl.cpp | 98 ++- .../renderer_opengl/renderer_opengl.h | 19 +- .../renderer_software/renderer_software.cpp | 36 +- .../renderer_software/renderer_software.h | 4 +- .../renderer_software/sw_blitter.cpp | 346 +++++++++ src/video_core/renderer_software/sw_blitter.h | 38 + .../renderer_software/sw_clipper.cpp | 2 +- .../renderer_software/sw_framebuffer.cpp | 8 +- .../renderer_software/sw_framebuffer.h | 2 +- .../renderer_software/sw_lighting.cpp | 4 +- .../renderer_software/sw_lighting.h | 4 +- .../renderer_software/sw_proctex.cpp | 8 +- src/video_core/renderer_software/sw_proctex.h | 4 +- .../renderer_software/sw_rasterizer.cpp | 26 +- .../renderer_software/sw_rasterizer.h | 20 +- .../renderer_software/sw_texturing.cpp | 2 +- .../renderer_software/sw_texturing.h | 2 +- src/video_core/renderer_vulkan/pica_to_vk.h | 14 +- .../renderer_vulkan/renderer_vulkan.cpp | 68 +- .../renderer_vulkan/renderer_vulkan.h | 26 +- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +- src/video_core/renderer_vulkan/vk_instance.h | 2 +- .../renderer_vulkan/vk_master_semaphore.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- .../renderer_vulkan/vk_pipeline_cache.h | 13 +- .../renderer_vulkan/vk_rasterizer.cpp | 32 +- .../renderer_vulkan/vk_rasterizer.h | 17 +- .../shader/generator/glsl_fs_shader_gen.cpp | 4 +- .../generator/glsl_shader_decompiler.cpp | 22 +- .../shader/generator/glsl_shader_decompiler.h | 7 +- .../shader/generator/glsl_shader_gen.cpp | 5 +- .../shader/generator/glsl_shader_gen.h | 14 +- .../shader/generator/pica_fs_config.cpp | 4 +- .../shader/generator/pica_fs_config.h | 7 +- .../shader/generator/shader_gen.cpp | 16 +- src/video_core/shader/generator/shader_gen.h | 15 +- .../shader/generator/shader_uniforms.cpp | 6 +- .../shader/generator/shader_uniforms.h | 11 +- .../shader/generator/spv_fs_shader_gen.cpp | 1 + .../shader/generator/spv_fs_shader_gen.h | 8 +- src/video_core/shader/shader.cpp | 161 +---- src/video_core/shader/shader.h | 303 +------- src/video_core/shader/shader_interpreter.cpp | 41 +- src/video_core/shader/shader_interpreter.h | 9 +- src/video_core/shader/shader_jit.cpp | 15 +- src/video_core/shader/shader_jit.h | 2 +- .../shader/shader_jit_a64_compiler.cpp | 41 +- .../shader/shader_jit_a64_compiler.h | 8 +- .../shader/shader_jit_x64_compiler.cpp | 50 +- .../shader/shader_jit_x64_compiler.h | 10 +- src/video_core/texture/texture_decode.cpp | 3 +- src/video_core/texture/texture_decode.h | 6 +- src/video_core/vertex_loader.cpp | 161 ----- src/video_core/vertex_loader.h | 42 -- src/video_core/video_core.cpp | 51 +- src/video_core/video_core.h | 28 +- 167 files changed, 4172 insertions(+), 4866 deletions(-) create mode 100644 src/core/hle/service/gsp/gsp_command.h create mode 100644 src/core/hle/service/gsp/gsp_interrupt.h delete mode 100644 src/core/hw/gpu.cpp delete mode 100644 src/core/hw/gpu.h delete mode 100644 src/core/hw/hw.cpp delete mode 100644 src/core/hw/hw.h delete mode 100644 src/core/hw/lcd.cpp delete mode 100644 src/video_core/command_processor.cpp delete mode 100644 src/video_core/command_processor.h create mode 100644 src/video_core/gpu.cpp create mode 100644 src/video_core/gpu.h delete mode 100644 src/video_core/pica.cpp delete mode 100644 src/video_core/pica.h rename src/video_core/{ => pica}/geometry_pipeline.cpp (74%) rename src/video_core/{ => pica}/geometry_pipeline.h (70%) create mode 100644 src/video_core/pica/output_vertex.cpp create mode 100644 src/video_core/pica/output_vertex.h create mode 100644 src/video_core/pica/packed_attribute.h create mode 100644 src/video_core/pica/pica_core.cpp create mode 100644 src/video_core/pica/pica_core.h create mode 100644 src/video_core/pica/primitive_assembly.cpp rename src/video_core/{ => pica}/primitive_assembly.h (69%) create mode 100644 src/video_core/pica/regs_external.h rename src/video_core/{ => pica}/regs_framebuffer.h (100%) rename src/video_core/{regs.cpp => pica/regs_internal.cpp} (97%) rename src/video_core/{regs.h => pica/regs_internal.h} (87%) rename src/{core/hw/lcd.h => video_core/pica/regs_lcd.h} (64%) rename src/video_core/{ => pica}/regs_lighting.h (96%) rename src/video_core/{ => pica}/regs_pipeline.h (94%) rename src/video_core/{ => pica}/regs_rasterizer.h (88%) rename src/video_core/{ => pica}/regs_shader.h (89%) rename src/video_core/{ => pica}/regs_texturing.h (100%) create mode 100644 src/video_core/pica/shader_setup.cpp create mode 100644 src/video_core/pica/shader_setup.h create mode 100644 src/video_core/pica/shader_unit.cpp create mode 100644 src/video_core/pica/shader_unit.h create mode 100644 src/video_core/pica/vertex_loader.cpp create mode 100644 src/video_core/pica/vertex_loader.h delete mode 100644 src/video_core/pica_state.h delete mode 100644 src/video_core/primitive_assembly.cpp create mode 100644 src/video_core/renderer_software/sw_blitter.cpp create mode 100644 src/video_core/renderer_software/sw_blitter.h delete mode 100644 src/video_core/vertex_loader.cpp delete mode 100644 src/video_core/vertex_loader.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index baf13335e..ad85ae0b7 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -26,16 +26,14 @@ set(HASH_FILES "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" - "${VIDEO_CORE}/pica.cpp" - "${VIDEO_CORE}/pica.h" - "${VIDEO_CORE}/regs_framebuffer.h" - "${VIDEO_CORE}/regs_lighting.h" - "${VIDEO_CORE}/regs_pipeline.h" - "${VIDEO_CORE}/regs_rasterizer.h" - "${VIDEO_CORE}/regs_shader.h" - "${VIDEO_CORE}/regs_texturing.h" - "${VIDEO_CORE}/regs.cpp" - "${VIDEO_CORE}/regs.h" + "${VIDEO_CORE}/pica/regs_framebuffer.h" + "${VIDEO_CORE}/pica/regs_lighting.h" + "${VIDEO_CORE}/pica/regs_pipeline.h" + "${VIDEO_CORE}/pica/regs_rasterizer.h" + "${VIDEO_CORE}/pica/regs_shader.h" + "${VIDEO_CORE}/pica/regs_texturing.h" + "${VIDEO_CORE}/pica/regs_internal.cpp" + "${VIDEO_CORE}/pica/regs_internal.h" ) set(COMBINED "") foreach (F IN LISTS HASH_FILES) diff --git a/src/android/app/src/main/jni/emu_window/emu_window.cpp b/src/android/app/src/main/jni/emu_window/emu_window.cpp index 5e53a9282..436442acd 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window.cpp @@ -15,7 +15,6 @@ #include "jni/input_manager.h" #include "network/network.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" static bool IsPortraitMode() { return JNI_FALSE != IDCache::GetEnvForThread()->CallStaticBooleanMethod( diff --git a/src/android/app/src/main/jni/emu_window/emu_window.h b/src/android/app/src/main/jni/emu_window/emu_window.h index 3dd9f30df..64ead5c7f 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.h +++ b/src/android/app/src/main/jni/emu_window/emu_window.h @@ -7,6 +7,10 @@ #include #include "core/frontend/emu_window.h" +namespace Core { +class System; +} + class EmuWindow_Android : public Frontend::EmuWindow { public: EmuWindow_Android(ANativeWindow* surface); diff --git a/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp b/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp index b910c3184..f3bb97dc3 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp @@ -12,10 +12,11 @@ #include "common/logging/log.h" #include "common/settings.h" +#include "core/core.h" #include "input_common/main.h" #include "jni/emu_window/emu_window_gl.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" static constexpr std::array egl_attribs{EGL_SURFACE_TYPE, EGL_WINDOW_BIT, @@ -71,8 +72,8 @@ private: EGLContext egl_context{}; }; -EmuWindow_Android_OpenGL::EmuWindow_Android_OpenGL(ANativeWindow* surface) - : EmuWindow_Android{surface} { +EmuWindow_Android_OpenGL::EmuWindow_Android_OpenGL(Core::System& system_, ANativeWindow* surface) + : EmuWindow_Android{surface}, system{system_} { if (egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); egl_display == EGL_NO_DISPLAY) { LOG_CRITICAL(Frontend, "eglGetDisplay() failed"); return; @@ -199,6 +200,9 @@ void EmuWindow_Android_OpenGL::StopPresenting() { } void EmuWindow_Android_OpenGL::TryPresenting() { + if (!system.IsPoweredOn()) { + return; + } if (presenting_state == PresentingState::Initial) [[unlikely]] { eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); @@ -208,8 +212,6 @@ void EmuWindow_Android_OpenGL::TryPresenting() { return; } eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0); - if (VideoCore::g_renderer) { - VideoCore::g_renderer->TryPresent(0); - eglSwapBuffers(egl_display, egl_surface); - } + system.GPU().Renderer().TryPresent(0); + eglSwapBuffers(egl_display, egl_surface); } diff --git a/src/android/app/src/main/jni/emu_window/emu_window_gl.h b/src/android/app/src/main/jni/emu_window/emu_window_gl.h index f92950b94..a705174ac 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window_gl.h +++ b/src/android/app/src/main/jni/emu_window/emu_window_gl.h @@ -11,11 +11,15 @@ #include "jni/emu_window/emu_window.h" +namespace Core { +class System; +} + struct ANativeWindow; class EmuWindow_Android_OpenGL : public EmuWindow_Android { public: - EmuWindow_Android_OpenGL(ANativeWindow* surface); + EmuWindow_Android_OpenGL(Core::System& system, ANativeWindow* surface); ~EmuWindow_Android_OpenGL() override = default; void TryPresenting() override; @@ -30,6 +34,7 @@ private: void DestroyContext() override; private: + Core::System& system; EGLConfig egl_config; EGLSurface egl_surface{}; EGLContext egl_context{}; diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp index 70b216197..238e1ae1a 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp @@ -7,7 +7,6 @@ #include "common/logging/log.h" #include "common/settings.h" #include "jni/emu_window/emu_window_vk.h" -#include "video_core/video_core.h" class GraphicsContext_Android final : public Frontend::GraphicsContext { public: diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 68049a95e..84fcf6337 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -51,8 +51,9 @@ #include "jni/id_cache.h" #include "jni/input_manager.h" #include "jni/ndk_motion.h" +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" #if CITRA_ARCH(arm64) #include @@ -126,7 +127,7 @@ static bool CheckMicPermission() { static Core::System::ResultStatus RunCitra(const std::string& filepath) { // Citra core only supports a single running instance - std::lock_guard lock(running_mutex); + std::scoped_lock lock(running_mutex); LOG_INFO(Frontend, "Citra starting..."); @@ -137,10 +138,12 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { return Core::System::ResultStatus::ErrorLoader; } + Core::System& system{Core::System::GetInstance()}; + const auto graphics_api = Settings::values.graphics_api.GetValue(); switch (graphics_api) { case Settings::GraphicsAPI::OpenGL: - window = std::make_unique(s_surf); + window = std::make_unique(system, s_surf); break; case Settings::GraphicsAPI::Vulkan: window = std::make_unique(s_surf, vulkan_library); @@ -150,8 +153,6 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { window = std::make_unique(s_surf, vulkan_library); } - Core::System& system{Core::System::GetInstance()}; - // Forces a config reload on game boot, if the user changed settings in the UI Config{}; // Replace with game-specific settings @@ -179,6 +180,7 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { // Register microphone permission check system.RegisterMicPermissionCheck(&CheckMicPermission); + Pica::g_debug_context = Pica::DebugContext::Construct(); InputManager::Init(); window->MakeCurrent(); @@ -196,7 +198,7 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { LoadDiskCacheProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); std::unique_ptr cpu_context; - system.Renderer().Rasterizer()->LoadDiskResources(stop_run, &LoadDiskCacheProgress); + system.GPU().Renderer().Rasterizer()->LoadDiskResources(stop_run, &LoadDiskCacheProgress); LoadDiskCacheProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); @@ -275,8 +277,10 @@ void Java_org_citra_citra_1emu_NativeLibrary_surfaceChanged(JNIEnv* env, if (window) { window->OnSurfaceChanged(s_surf); } - if (VideoCore::g_renderer) { - VideoCore::g_renderer->NotifySurfaceChanged(); + + auto& system = Core::System::GetInstance(); + if (system.IsPoweredOn()) { + system.GPU().Renderer().NotifySurfaceChanged(); } LOG_INFO(Frontend, "Surface changed"); @@ -311,8 +315,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_notifyOrientationChange([[maybe_unu jint layout_option, jint rotation) { Settings::values.layout_option = static_cast(layout_option); - if (VideoCore::g_renderer) { - VideoCore::g_renderer->UpdateCurrentFramebufferLayout(!(rotation % 2)); + auto& system = Core::System::GetInstance(); + if (system.IsPoweredOn()) { + system.GPU().Renderer().UpdateCurrentFramebufferLayout(!(rotation % 2)); } InputManager::screen_rotation = rotation; Camera::NDK::g_rotation = rotation; @@ -322,8 +327,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_swapScreens([[maybe_unused]] JNIEnv [[maybe_unused]] jobject obj, jboolean swap_screens, jint rotation) { Settings::values.swap_screen = swap_screens; - if (VideoCore::g_renderer) { - VideoCore::g_renderer->UpdateCurrentFramebufferLayout(!(rotation % 2)); + auto& system = Core::System::GetInstance(); + if (system.IsPoweredOn()) { + system.GPU().Renderer().UpdateCurrentFramebufferLayout(!(rotation % 2)); } InputManager::screen_rotation = rotation; Camera::NDK::g_rotation = rotation; diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index f44d5d887..7288f3407 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -36,6 +36,7 @@ #include "core/telemetry_session.h" #include "input_common/main.h" #include "network/network.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" #ifdef __unix__ @@ -438,9 +439,10 @@ int main(int argc, char** argv) { movie.StartRecording(movie_record, movie_record_author); } if (!dump_video.empty() && DynamicLibrary::FFmpeg::LoadFFmpeg()) { + auto& renderer = system.GPU().Renderer(); const auto layout{ - Layout::FrameLayoutFromResolutionScale(system.Renderer().GetResolutionScaleFactor())}; - auto dumper = std::make_shared(); + Layout::FrameLayoutFromResolutionScale(renderer.GetResolutionScaleFactor())}; + auto dumper = std::make_shared(renderer); if (dumper->StartDumping(dump_video, layout)) { system.RegisterVideoDumper(dumper); } @@ -458,7 +460,7 @@ int main(int argc, char** argv) { }); std::atomic_bool stop_run; - system.Renderer().Rasterizer()->LoadDiskResources( + system.GPU().Renderer().Rasterizer()->LoadDiskResources( stop_run, [](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { LOG_DEBUG(Frontend, "Loading stage {} progress {} {}", static_cast(stage), value, total); diff --git a/src/citra/emu_window/emu_window_sdl2_gl.cpp b/src/citra/emu_window/emu_window_sdl2_gl.cpp index 956dd6acf..09a7f5995 100644 --- a/src/citra/emu_window/emu_window_sdl2_gl.cpp +++ b/src/citra/emu_window/emu_window_sdl2_gl.cpp @@ -11,8 +11,9 @@ #include "citra/emu_window/emu_window_sdl2_gl.h" #include "common/scm_rev.h" #include "common/settings.h" +#include "core/core.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" class SDLGLContext : public Frontend::GraphicsContext { public: @@ -159,7 +160,7 @@ void EmuWindow_SDL2_GL::Present() { SDL_GL_MakeCurrent(render_window, window_context); SDL_GL_SetSwapInterval(1); while (IsOpen()) { - VideoCore::g_renderer->TryPresent(100, is_secondary); + system.GPU().Renderer().TryPresent(100, is_secondary); SDL_GL_SwapWindow(render_window); } SDL_GL_MakeCurrent(render_window, nullptr); diff --git a/src/citra/emu_window/emu_window_sdl2_sw.cpp b/src/citra/emu_window/emu_window_sdl2_sw.cpp index f7b6b2bb8..3f2cf6db9 100644 --- a/src/citra/emu_window/emu_window_sdl2_sw.cpp +++ b/src/citra/emu_window/emu_window_sdl2_sw.cpp @@ -13,6 +13,7 @@ #include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "video_core/gpu.h" #include "video_core/renderer_software/renderer_software.h" class DummyContext : public Frontend::GraphicsContext {}; @@ -94,7 +95,7 @@ void EmuWindow_SDL2_SW::Present() { } SDL_Surface* EmuWindow_SDL2_SW::LoadFramebuffer(VideoCore::ScreenId screen_id) { - const auto& renderer = static_cast(system.Renderer()); + const auto& renderer = static_cast(system.GPU().Renderer()); const auto& info = renderer.Screen(screen_id); const int width = static_cast(info.width); const int height = static_cast(info.height); diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index ca2289b79..17c1b30fd 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -22,9 +22,9 @@ #include "input_common/main.h" #include "input_common/motion_emu.h" #include "video_core/custom_textures/custom_tex_manager.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" #include "video_core/renderer_software/renderer_software.h" -#include "video_core/video_core.h" #ifdef HAS_OPENGL #include @@ -73,7 +73,7 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); - system.Renderer().Rasterizer()->LoadDiskResources( + system.GPU().Renderer().Rasterizer()->LoadDiskResources( stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { emit LoadProgress(stage, value, total); }); @@ -284,9 +284,7 @@ public: } context->MakeCurrent(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - if (VideoCore::g_renderer) { - VideoCore::g_renderer->TryPresent(100, is_secondary); - } + system.GPU().Renderer().TryPresent(100, is_secondary); context->SwapBuffers(); glFinish(); } @@ -367,7 +365,7 @@ struct SoftwareRenderWidget : public RenderWidget { } QImage LoadFramebuffer(VideoCore::ScreenId screen_id) { - const auto& renderer = static_cast(system.Renderer()); + const auto& renderer = static_cast(system.GPU().Renderer()); const auto& info = renderer.Screen(screen_id); const int width = static_cast(info.width); const int height = static_cast(info.height); @@ -678,13 +676,14 @@ void GRenderWindow::ReleaseRenderTarget() { } void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { + auto& renderer = system.GPU().Renderer(); if (res_scale == 0) { - res_scale = system.Renderer().GetResolutionScaleFactor(); + res_scale = renderer.GetResolutionScaleFactor(); } const auto layout{Layout::FrameLayoutFromResolutionScale(res_scale, is_secondary)}; screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); - system.Renderer().RequestScreenshot( + renderer.RequestScreenshot( screenshot_image.bits(), [this, screenshot_path](bool invert_y) { const std::string std_screenshot_path = screenshot_path.toStdString(); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 463b9c848..7e2fcaee1 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -20,7 +20,6 @@ ConfigureGraphics::ConfigureGraphics(std::span physical_devices, ui->physical_device_combo->addItem(name); } - ui->toggle_vsync_new->setEnabled(!is_powered_on); ui->graphics_api_combo->setEnabled(!is_powered_on); ui->physical_device_combo->setEnabled(!is_powered_on); ui->toggle_async_shaders->setEnabled(!is_powered_on); diff --git a/src/citra_qt/debugger/graphics/graphics.cpp b/src/citra_qt/debugger/graphics/graphics.cpp index 674b83100..021e1c720 100644 --- a/src/citra_qt/debugger/graphics/graphics.cpp +++ b/src/citra_qt/debugger/graphics/graphics.cpp @@ -5,8 +5,8 @@ #include #include "citra_qt/debugger/graphics/graphics.h" #include "citra_qt/util/util.h" - -extern GraphicsDebugger g_debugger; +#include "core/core.h" +#include "video_core/gpu.h" GPUCommandStreamItemModel::GPUCommandStreamItemModel(QObject* parent) : QAbstractListModel(parent), command_count(0) { @@ -19,19 +19,19 @@ int GPUCommandStreamItemModel::rowCount([[maybe_unused]] const QModelIndex& pare } QVariant GPUCommandStreamItemModel::data(const QModelIndex& index, int role) const { - if (!index.isValid()) + if (!index.isValid() || !GetDebugger()) return QVariant(); int command_index = index.row(); const Service::GSP::Command& command = GetDebugger()->ReadGXCommandHistory(command_index); if (role == Qt::DisplayRole) { std::map command_names = { - {Service::GSP::CommandId::REQUEST_DMA, "REQUEST_DMA"}, - {Service::GSP::CommandId::SUBMIT_GPU_CMDLIST, "SUBMIT_GPU_CMDLIST"}, - {Service::GSP::CommandId::SET_MEMORY_FILL, "SET_MEMORY_FILL"}, - {Service::GSP::CommandId::SET_DISPLAY_TRANSFER, "SET_DISPLAY_TRANSFER"}, - {Service::GSP::CommandId::SET_TEXTURE_COPY, "SET_TEXTURE_COPY"}, - {Service::GSP::CommandId::CACHE_FLUSH, "CACHE_FLUSH"}, + {Service::GSP::CommandId::RequestDma, "REQUEST_DMA"}, + {Service::GSP::CommandId::SubmitCmdList, "SUBMIT_GPU_CMDLIST"}, + {Service::GSP::CommandId::MemoryFill, "SET_MEMORY_FILL"}, + {Service::GSP::CommandId::DisplayTransfer, "SET_DISPLAY_TRANSFER"}, + {Service::GSP::CommandId::TextureCopy, "SET_TEXTURE_COPY"}, + {Service::GSP::CommandId::CacheFlush, "CACHE_FLUSH"}, }; const u32* command_data = reinterpret_cast(&command); QString str = QStringLiteral("%1 %2 %3 %4 %5 %6 %7 %8 %9") @@ -63,8 +63,8 @@ void GPUCommandStreamItemModel::OnGXCommandFinishedInternal(int total_command_co emit dataChanged(index(prev_command_count, 0), index(total_command_count - 1, 0)); } -GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) - : QDockWidget(tr("Graphics Debugger"), parent), model(this) { +GPUCommandStreamWidget::GPUCommandStreamWidget(Core::System& system_, QWidget* parent) + : QDockWidget(tr("Graphics Debugger"), parent), system{system_}, model(this) { setObjectName(QStringLiteral("GraphicsDebugger")); auto* command_list = new QListView; @@ -74,12 +74,26 @@ GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) setWidget(command_list); } +void GPUCommandStreamWidget::Register() { + auto& debugger = system.GPU().Debugger(); + debugger.RegisterObserver(&model); +} + +void GPUCommandStreamWidget::Unregister() { + auto& debugger = system.GPU().Debugger(); + debugger.UnregisterObserver(&model); +} + void GPUCommandStreamWidget::showEvent(QShowEvent* event) { - g_debugger.RegisterObserver(&model); + if (system.IsPoweredOn()) { + Register(); + } QDockWidget::showEvent(event); } void GPUCommandStreamWidget::hideEvent(QHideEvent* event) { - g_debugger.UnregisterObserver(&model); + if (system.IsPoweredOn()) { + Unregister(); + } QDockWidget::hideEvent(event); } diff --git a/src/citra_qt/debugger/graphics/graphics.h b/src/citra_qt/debugger/graphics/graphics.h index 9193c2821..67be1f77c 100644 --- a/src/citra_qt/debugger/graphics/graphics.h +++ b/src/citra_qt/debugger/graphics/graphics.h @@ -8,8 +8,12 @@ #include #include "video_core/gpu_debugger.h" +namespace Core { +class System; +} + class GPUCommandStreamItemModel : public QAbstractListModel, - public GraphicsDebugger::DebuggerObserver { + public VideoCore::GraphicsDebugger::DebuggerObserver { Q_OBJECT public: @@ -35,12 +39,16 @@ class GPUCommandStreamWidget : public QDockWidget { Q_OBJECT public: - GPUCommandStreamWidget(QWidget* parent = nullptr); + GPUCommandStreamWidget(Core::System& system, QWidget* parent = nullptr); + + void Register(); + void Unregister(); protected: void showEvent(QShowEvent* event) override; void hideEvent(QHideEvent* event) override; private: + Core::System& system; GPUCommandStreamItemModel model; }; diff --git a/src/citra_qt/debugger/graphics/graphics_breakpoint_observer.cpp b/src/citra_qt/debugger/graphics/graphics_breakpoint_observer.cpp index 3d38b6a35..5d4327610 100644 --- a/src/citra_qt/debugger/graphics/graphics_breakpoint_observer.cpp +++ b/src/citra_qt/debugger/graphics/graphics_breakpoint_observer.cpp @@ -18,7 +18,8 @@ BreakPointObserverDock::BreakPointObserverDock(std::shared_ptr debug_context, const QString& title, QWidget* parent = nullptr); - void OnPicaBreakPointHit(Pica::DebugContext::Event event, void* data) override; + void OnPicaBreakPointHit(Pica::DebugContext::Event event, const void* data) override; void OnPicaResume() override; signals: void Resumed(); - void BreakPointHit(Pica::DebugContext::Event event, void* data); + void BreakPointHit(Pica::DebugContext::Event event, const void* data); private: - virtual void OnBreakPointHit(Pica::DebugContext::Event event, void* data) = 0; + virtual void OnBreakPointHit(Pica::DebugContext::Event event, const void* data) = 0; virtual void OnResumed() = 0; }; diff --git a/src/citra_qt/debugger/graphics/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics/graphics_breakpoints.cpp index 081547ef1..9835b514e 100644 --- a/src/citra_qt/debugger/graphics/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics/graphics_breakpoints.cpp @@ -191,12 +191,12 @@ GraphicsBreakPointsWidget::GraphicsBreakPointsWidget( setWidget(main_widget); } -void GraphicsBreakPointsWidget::OnPicaBreakPointHit(Event event, void* data) { +void GraphicsBreakPointsWidget::OnPicaBreakPointHit(Event event, const void* data) { // Process in GUI thread emit BreakPointHit(event, data); } -void GraphicsBreakPointsWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { +void GraphicsBreakPointsWidget::OnBreakPointHit(Pica::DebugContext::Event event, const void* data) { status_text->setText(tr("Emulation halted at breakpoint")); resume_button->setEnabled(true); } diff --git a/src/citra_qt/debugger/graphics/graphics_breakpoints.h b/src/citra_qt/debugger/graphics/graphics_breakpoints.h index 13fc84e74..dd2a7341e 100644 --- a/src/citra_qt/debugger/graphics/graphics_breakpoints.h +++ b/src/citra_qt/debugger/graphics/graphics_breakpoints.h @@ -23,16 +23,16 @@ public: explicit GraphicsBreakPointsWidget(std::shared_ptr debug_context, QWidget* parent = nullptr); - void OnPicaBreakPointHit(Pica::DebugContext::Event event, void* data) override; + void OnPicaBreakPointHit(Pica::DebugContext::Event event, const void* data) override; void OnPicaResume() override; signals: void Resumed(); - void BreakPointHit(Pica::DebugContext::Event event, void* data); + void BreakPointHit(Pica::DebugContext::Event event, const void* data); void BreakPointsChanged(const QModelIndex& topLeft, const QModelIndex& bottomRight); private: - void OnBreakPointHit(Pica::DebugContext::Event event, void* data); + void OnBreakPointHit(Pica::DebugContext::Event event, const void* data); void OnItemDoubleClicked(const QModelIndex&); void OnResumeRequested(); void OnResumed(); diff --git a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp index aaf733a7d..94a66120f 100644 --- a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp @@ -19,8 +19,8 @@ #include "core/core.h" #include "core/memory.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/pica_state.h" -#include "video_core/regs.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" #include "video_core/texture/texture_decode.h" namespace { @@ -73,7 +73,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const { if (role == Qt::DisplayRole) { switch (index.column()) { case 0: - return QString::fromLatin1(Pica::Regs::GetRegisterName(write.cmd_id)); + return QString::fromLatin1(Pica::RegsInternal::GetRegisterName(write.cmd_id)); case 1: return QStringLiteral("%1").arg(write.cmd_id, 3, 16, QLatin1Char('0')); case 2: @@ -119,8 +119,7 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& } #define COMMAND_IN_RANGE(cmd_id, reg_name) \ - (cmd_id >= PICA_REG_INDEX(reg_name) && \ - cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::g_state.regs.reg_name)) / 4) + (cmd_id >= PICA_REG_INDEX(reg_name) && cmd_id <= PICA_REG_INDEX(reg_name)) void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { const unsigned int command_id = @@ -147,13 +146,13 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { QWidget* new_info_widget = nullptr; - const unsigned int command_id = + const u32 command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); if (COMMAND_IN_RANGE(command_id, texturing.texture0) || COMMAND_IN_RANGE(command_id, texturing.texture1) || COMMAND_IN_RANGE(command_id, texturing.texture2)) { - unsigned texture_index; + u32 texture_index; if (COMMAND_IN_RANGE(command_id, texturing.texture0)) { texture_index = 0; } else if (COMMAND_IN_RANGE(command_id, texturing.texture1)) { @@ -162,7 +161,8 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { texture_index = 2; } - const auto texture = Pica::g_state.regs.texturing.GetTextures()[texture_index]; + auto& pica = system.GPU().PicaCore(); + const auto texture = pica.regs.internal.texturing.GetTextures()[texture_index]; const auto config = texture.config; const auto format = texture.format; diff --git a/src/citra_qt/debugger/graphics/graphics_surface.cpp b/src/citra_qt/debugger/graphics/graphics_surface.cpp index 13d6556cd..4983d2475 100644 --- a/src/citra_qt/debugger/graphics/graphics_surface.cpp +++ b/src/citra_qt/debugger/graphics/graphics_surface.cpp @@ -15,10 +15,10 @@ #include "citra_qt/debugger/graphics/graphics_surface.h" #include "citra_qt/util/spinbox.h" #include "common/color.h" +#include "core/core.h" #include "core/memory.h" -#include "video_core/pica_state.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_texturing.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" #include "video_core/texture/texture_decode.h" #include "video_core/utils.h" @@ -49,10 +49,10 @@ void SurfacePicture::mouseMoveEvent(QMouseEvent* event) { mousePressEvent(event); } -GraphicsSurfaceWidget::GraphicsSurfaceWidget(Memory::MemorySystem& memory_, +GraphicsSurfaceWidget::GraphicsSurfaceWidget(Core::System& system_, std::shared_ptr debug_context, QWidget* parent) - : BreakPointObserverDock(debug_context, tr("Pica Surface Viewer"), parent), memory{memory_}, + : BreakPointObserverDock(debug_context, tr("Pica Surface Viewer"), parent), system{system_}, surface_source(Source::ColorBuffer) { setObjectName(QStringLiteral("PicaSurface")); @@ -214,7 +214,7 @@ GraphicsSurfaceWidget::GraphicsSurfaceWidget(Memory::MemorySystem& memory_, } } -void GraphicsSurfaceWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { +void GraphicsSurfaceWidget::OnBreakPointHit(Pica::DebugContext::Event event, const void* data) { emit Update(); widget()->setEnabled(true); } @@ -289,7 +289,7 @@ void GraphicsSurfaceWidget::Pick(int x, int y) { return; } - const u8* buffer = memory.GetPhysicalPointer(surface_address); + const u8* buffer = system.Memory().GetPhysicalPointer(surface_address); if (!buffer) { surface_info_label->setText(tr("(unable to access pixel data)")); surface_info_label->setAlignment(Qt::AlignCenter); @@ -410,13 +410,13 @@ void GraphicsSurfaceWidget::Pick(int x, int y) { void GraphicsSurfaceWidget::OnUpdate() { QPixmap pixmap; + const auto& regs = system.GPU().PicaCore().regs.internal; switch (surface_source) { case Source::ColorBuffer: { // TODO: Store a reference to the registers in the debug context instead of accessing them // directly... - const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer; - + const auto& framebuffer = regs.framebuffer.framebuffer; surface_address = framebuffer.GetColorBufferPhysicalAddress(); surface_width = framebuffer.GetWidth(); surface_height = framebuffer.GetHeight(); @@ -451,8 +451,7 @@ void GraphicsSurfaceWidget::OnUpdate() { } case Source::DepthBuffer: { - const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer; - + const auto& framebuffer = regs.framebuffer.framebuffer; surface_address = framebuffer.GetDepthBufferPhysicalAddress(); surface_width = framebuffer.GetWidth(); surface_height = framebuffer.GetHeight(); @@ -479,8 +478,7 @@ void GraphicsSurfaceWidget::OnUpdate() { } case Source::StencilBuffer: { - const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer; - + const auto& framebuffer = regs.framebuffer.framebuffer; surface_address = framebuffer.GetDepthBufferPhysicalAddress(); surface_width = framebuffer.GetWidth(); surface_height = framebuffer.GetHeight(); @@ -513,7 +511,7 @@ void GraphicsSurfaceWidget::OnUpdate() { break; } - const auto texture = Pica::g_state.regs.texturing.GetTextures()[texture_index]; + const auto texture = regs.texturing.GetTextures()[texture_index]; auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); surface_address = info.physical_address; @@ -545,7 +543,7 @@ void GraphicsSurfaceWidget::OnUpdate() { // TODO: Implement a good way to visualize alpha components! QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); - const u8* buffer = memory.GetPhysicalPointer(surface_address); + const u8* buffer = system.Memory().GetPhysicalPointer(surface_address); if (!buffer) { surface_picture_label->hide(); @@ -681,7 +679,7 @@ void GraphicsSurfaceWidget::SaveSurface() { tr("Failed to save surface data to file '%1'").arg(filename)); } } else if (selected_filter == bin_filter) { - const u8* const buffer = memory.GetPhysicalPointer(surface_address); + const u8* const buffer = system.Memory().GetPhysicalPointer(surface_address); ASSERT_MSG(buffer, "Memory not accessible"); QFile file{filename}; diff --git a/src/citra_qt/debugger/graphics/graphics_surface.h b/src/citra_qt/debugger/graphics/graphics_surface.h index 7fd6b8111..e5dbc30ec 100644 --- a/src/citra_qt/debugger/graphics/graphics_surface.h +++ b/src/citra_qt/debugger/graphics/graphics_surface.h @@ -14,8 +14,8 @@ class CSpinBox; class GraphicsSurfaceWidget; -namespace Memory { -class MemorySystem; +namespace Core { +class System; } class SurfacePicture : public QLabel { @@ -76,7 +76,7 @@ class GraphicsSurfaceWidget : public BreakPointObserverDock { static unsigned int NibblesPerPixel(Format format); public: - explicit GraphicsSurfaceWidget(Memory::MemorySystem& memory, + explicit GraphicsSurfaceWidget(Core::System& system, std::shared_ptr debug_context, QWidget* parent = nullptr); void Pick(int x, int y); @@ -95,12 +95,12 @@ signals: void Update(); private: - void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; + void OnBreakPointHit(Pica::DebugContext::Event event, const void* data) override; void OnResumed() override; void SaveSurface(); - Memory::MemorySystem& memory; + Core::System& system; QComboBox* surface_source_list; CSpinBox* surface_address_control; QSpinBox* surface_width_control; diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp index 1fb2e1810..e78acb096 100644 --- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp @@ -14,14 +14,15 @@ #include #include "citra_qt/debugger/graphics/graphics_tracing.h" #include "common/common_types.h" -#include "core/hw/gpu.h" -#include "core/hw/lcd.h" +#include "core/core.h" #include "core/tracer/recorder.h" -#include "video_core/pica_state.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" -GraphicsTracingWidget::GraphicsTracingWidget(std::shared_ptr debug_context, +GraphicsTracingWidget::GraphicsTracingWidget(Core::System& system_, + std::shared_ptr debug_context, QWidget* parent) - : BreakPointObserverDock(debug_context, tr("CiTrace Recorder"), parent) { + : BreakPointObserverDock(debug_context, tr("CiTrace Recorder"), parent), system{system_} { setObjectName(QStringLiteral("CiTracing")); @@ -61,45 +62,46 @@ void GraphicsTracingWidget::StartRecording() { if (!context) return; - auto shader_binary = Pica::g_state.vs.program_code; - auto swizzle_data = Pica::g_state.vs.swizzle_data; + auto& pica = system.GPU().PicaCore(); + auto shader_binary = pica.vs_setup.program_code; + auto swizzle_data = pica.vs_setup.swizzle_data; // Encode floating point numbers to 24-bit values // TODO: Drop this explicit conversion once we store float24 values bit-correctly internally. std::array default_attributes; - for (unsigned i = 0; i < 16; ++i) { - for (unsigned comp = 0; comp < 3; ++comp) { - default_attributes[4 * i + comp] = nihstro::to_float24( - Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32()); + for (u32 i = 0; i < 16; ++i) { + for (u32 comp = 0; comp < 3; ++comp) { + default_attributes[4 * i + comp] = + nihstro::to_float24(pica.input_default_attributes[i][comp].ToFloat32()); } } std::array vs_float_uniforms; - for (unsigned i = 0; i < 96; ++i) - for (unsigned comp = 0; comp < 3; ++comp) + for (u32 i = 0; i < 96; ++i) { + for (u32 comp = 0; comp < 3; ++comp) { vs_float_uniforms[4 * i + comp] = - nihstro::to_float24(Pica::g_state.vs.uniforms.f[i][comp].ToFloat32()); + nihstro::to_float24(pica.vs_setup.uniforms.f[i][comp].ToFloat32()); + } + } CiTrace::Recorder::InitialState state; - std::copy_n((u32*)&GPU::g_regs, sizeof(GPU::g_regs) / sizeof(u32), - std::back_inserter(state.gpu_registers)); - std::copy_n((u32*)&LCD::g_regs, sizeof(LCD::g_regs) / sizeof(u32), - std::back_inserter(state.lcd_registers)); - std::copy_n((u32*)&Pica::g_state.regs, sizeof(Pica::g_state.regs) / sizeof(u32), - std::back_inserter(state.pica_registers)); - std::copy(default_attributes.begin(), default_attributes.end(), - std::back_inserter(state.default_attributes)); - std::copy(shader_binary.begin(), shader_binary.end(), - std::back_inserter(state.vs_program_binary)); - std::copy(swizzle_data.begin(), swizzle_data.end(), std::back_inserter(state.vs_swizzle_data)); - std::copy(vs_float_uniforms.begin(), vs_float_uniforms.end(), - std::back_inserter(state.vs_float_uniforms)); - // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_program_binary)); - // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_swizzle_data)); - // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_float_uniforms)); - auto recorder = new CiTrace::Recorder(state); - context->recorder = std::shared_ptr(recorder); + const auto copy = [&](std::vector& dest, auto& data) { + dest.resize(sizeof(data)); + std::memcpy(dest.data(), std::addressof(data), sizeof(data)); + }; + + copy(state.pica_registers, pica.regs); + copy(state.lcd_registers, pica.regs_lcd); + copy(state.default_attributes, default_attributes); + copy(state.vs_program_binary, shader_binary); + copy(state.vs_swizzle_data, swizzle_data); + copy(state.vs_float_uniforms, vs_float_uniforms); + // copy(TODO: Not implemented, std::back_inserter(state.gs_program_binary)); + // copy(TODO: Not implemented, std::back_inserter(state.gs_swizzle_data)); + // copy(TODO: Not implemented, std::back_inserter(state.gs_float_uniforms)); + + context->recorder = std::make_shared(state); emit SetStartTracingButtonEnabled(false); emit SetStopTracingButtonEnabled(true); @@ -139,7 +141,7 @@ void GraphicsTracingWidget::AbortRecording() { emit SetStartTracingButtonEnabled(true); } -void GraphicsTracingWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { +void GraphicsTracingWidget::OnBreakPointHit(Pica::DebugContext::Event event, const void* data) { widget()->setEnabled(true); } diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.h b/src/citra_qt/debugger/graphics/graphics_tracing.h index eb1292c29..658f3ad0c 100644 --- a/src/citra_qt/debugger/graphics/graphics_tracing.h +++ b/src/citra_qt/debugger/graphics/graphics_tracing.h @@ -6,13 +6,18 @@ #include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" +namespace Core { +class System; +} + class EmuThread; class GraphicsTracingWidget : public BreakPointObserverDock { Q_OBJECT public: - explicit GraphicsTracingWidget(std::shared_ptr debug_context, + explicit GraphicsTracingWidget(Core::System& system, + std::shared_ptr debug_context, QWidget* parent = nullptr); void OnEmulationStarting(EmuThread* emu_thread); @@ -23,11 +28,14 @@ private slots: void StopRecording(); void AbortRecording(); - void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; + void OnBreakPointHit(Pica::DebugContext::Event event, const void* data) override; void OnResumed() override; signals: void SetStartTracingButtonEnabled(bool enable); void SetStopTracingButtonEnabled(bool enable); void SetAbortTracingButtonEnabled(bool enable); + +private: + Core::System& system; }; diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index 07e25be8c..46f8ec5cf 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -16,9 +16,9 @@ #include #include "citra_qt/debugger/graphics/graphics_vertex_shader.h" #include "citra_qt/util/util.h" -#include "video_core/pica_state.h" -#include "video_core/shader/debug_data.h" -#include "video_core/shader/shader.h" +#include "core/core.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" #include "video_core/shader/shader_interpreter.h" using nihstro::Instruction; @@ -352,16 +352,14 @@ void GraphicsVertexShaderWidget::DumpShader() { return; } - auto& setup = Pica::g_state.vs; - auto& config = Pica::g_state.regs.vs; - - Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, - Pica::g_state.regs.rasterizer.vs_output_attributes); + auto& pica = system.GPU().PicaCore(); + Pica::DebugUtils::DumpShader(filename.toStdString(), pica.regs.internal.vs, pica.vs_setup, + pica.regs.internal.rasterizer.vs_output_attributes); } GraphicsVertexShaderWidget::GraphicsVertexShaderWidget( - std::shared_ptr debug_context, QWidget* parent) - : BreakPointObserverDock(debug_context, tr("Pica Vertex Shader"), parent) { + Core::System& system_, std::shared_ptr debug_context, QWidget* parent) + : BreakPointObserverDock(debug_context, tr("Pica Vertex Shader"), parent), system{system_} { setObjectName(QStringLiteral("PicaVertexShader")); // Clear input vertex data so that it contains valid float values in case a debug shader @@ -472,7 +470,8 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget( widget()->setEnabled(false); } -void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { +void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, + const void* data) { if (event == Pica::DebugContext::Event::VertexShaderInvocation) { Reload(true, data); } else { @@ -482,7 +481,7 @@ void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event widget()->setEnabled(true); } -void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) { +void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, const void* vertex_data) { model->beginResetModel(); if (replace_vertex_data) { @@ -491,7 +490,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d for (unsigned attr = 0; attr < 16; ++attr) { for (unsigned comp = 0; comp < 4; ++comp) { input_data[4 * attr + comp]->setText( - QStringLiteral("%1").arg(input_vertex.attr[attr][comp].ToFloat32())); + QStringLiteral("%1").arg(input_vertex[attr][comp].ToFloat32())); } } breakpoint_warning->hide(); @@ -508,28 +507,27 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d // Reload shader code info.Clear(); - auto& shader_setup = Pica::g_state.vs; - auto& shader_config = Pica::g_state.regs.vs; - for (auto instr : shader_setup.program_code) + auto& pica = system.GPU().PicaCore(); + for (auto instr : pica.vs_setup.program_code) info.code.push_back({instr}); - int num_attributes = shader_config.max_input_attribute_index + 1; + int num_attributes = pica.regs.internal.vs.max_input_attribute_index + 1; - for (auto pattern : shader_setup.swizzle_data) { + for (auto pattern : pica.vs_setup.swizzle_data) { const nihstro::SwizzleInfo swizzle_info = {.pattern = nihstro::SwizzlePattern{pattern}}; info.swizzle_info.push_back(swizzle_info); } - u32 entry_point = Pica::g_state.regs.vs.main_offset; + u32 entry_point = pica.regs.internal.vs.main_offset; info.labels.insert({entry_point, "main"}); // Generate debug information Pica::Shader::InterpreterEngine shader_engine; - shader_engine.SetupBatch(shader_setup, entry_point); - debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config); + shader_engine.SetupBatch(pica.vs_setup, entry_point); + debug_data = shader_engine.ProduceDebugInfo(pica.vs_setup, input_vertex, pica.regs.internal.vs); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { - unsigned source_attr = shader_config.GetRegisterForAttribute(attr); + unsigned source_attr = pica.regs.internal.vs.GetRegisterForAttribute(attr); input_data_mapping[attr]->setText(QStringLiteral("-> v%1").arg(source_attr)); input_data_container[attr]->setVisible(true); } @@ -551,7 +549,7 @@ void GraphicsVertexShaderWidget::OnResumed() { void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { const f32 value = input_data[index]->text().toFloat(); - input_vertex.attr[index / 4][index % 4] = Pica::f24::FromFloat32(value); + input_vertex[index / 4][index % 4] = Pica::f24::FromFloat32(value); // Re-execute shader with updated value Reload(); } diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h index 166b6ba23..29577bf44 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h @@ -8,8 +8,12 @@ #include #include #include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" +#include "video_core/pica/output_vertex.h" #include "video_core/shader/debug_data.h" -#include "video_core/shader/shader.h" + +namespace Core { +class System; +} class QLabel; class QSpinBox; @@ -40,11 +44,12 @@ class GraphicsVertexShaderWidget : public BreakPointObserverDock { using Event = Pica::DebugContext::Event; public: - GraphicsVertexShaderWidget(std::shared_ptr debug_context, + GraphicsVertexShaderWidget(Core::System& system, + std::shared_ptr debug_context, QWidget* parent = nullptr); private slots: - void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; + void OnBreakPointHit(Pica::DebugContext::Event event, const void* data) override; void OnResumed() override; void OnInputAttributeChanged(int index); @@ -60,9 +65,10 @@ private slots: * specify that no valid vertex data can be retrieved currently. Only used if * replace_vertex_data is true. */ - void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr); + void Reload(bool replace_vertex_data = false, const void* vertex_data = nullptr); private: + Core::System& system; QLabel* instruction_description; QTreeView* binary_list; GraphicsVertexShaderModel* model; @@ -83,7 +89,7 @@ private: nihstro::ShaderInfo info; Pica::Shader::DebugData debug_data; - Pica::Shader::AttributeBuffer input_vertex; + Pica::AttributeBuffer input_vertex; friend class GraphicsVertexShaderModel; }; diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 934906160..147642826 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -74,7 +74,6 @@ #include "common/logging/backend.h" #include "common/logging/log.h" #include "common/memory_detect.h" -#include "common/microprofile.h" #include "common/scm_rev.h" #include "common/scope_exit.h" #if CITRA_ARCH(x86_64) @@ -96,8 +95,8 @@ #include "input_common/main.h" #include "network/network_settings.h" #include "ui_main.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" #ifdef __APPLE__ #include "common/apple_authorization.h" @@ -458,7 +457,7 @@ void GMainWindow::InitializeDebugWidgets() { connect(this, &GMainWindow::EmulationStopping, registersWidget, &RegistersWidget::OnEmulationStopping); - graphicsWidget = new GPUCommandStreamWidget(this); + graphicsWidget = new GPUCommandStreamWidget(system, this); addDockWidget(Qt::RightDockWidgetArea, graphicsWidget); graphicsWidget->hide(); debug_menu->addAction(graphicsWidget->toggleViewAction()); @@ -473,12 +472,13 @@ void GMainWindow::InitializeDebugWidgets() { graphicsBreakpointsWidget->hide(); debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction()); - graphicsVertexShaderWidget = new GraphicsVertexShaderWidget(Pica::g_debug_context, this); + graphicsVertexShaderWidget = + new GraphicsVertexShaderWidget(system, Pica::g_debug_context, this); addDockWidget(Qt::RightDockWidgetArea, graphicsVertexShaderWidget); graphicsVertexShaderWidget->hide(); debug_menu->addAction(graphicsVertexShaderWidget->toggleViewAction()); - graphicsTracingWidget = new GraphicsTracingWidget(Pica::g_debug_context, this); + graphicsTracingWidget = new GraphicsTracingWidget(system, Pica::g_debug_context, this); addDockWidget(Qt::RightDockWidgetArea, graphicsTracingWidget); graphicsTracingWidget->hide(); debug_menu->addAction(graphicsTracingWidget->toggleViewAction()); @@ -1237,6 +1237,11 @@ void GMainWindow::BootGame(const QString& filename) { video_dumping_path.clear(); } + // Register debug widgets + if (graphicsWidget->isVisible()) { + graphicsWidget->Register(); + } + // Create and start the emulation thread emu_thread = std::make_unique(system, *render_window); emit EmulationStarting(emu_thread.get()); @@ -1315,6 +1320,11 @@ void GMainWindow::ShutdownGame() { // TODO(bunnei): This function is not thread safe, but it's being used as if it were Pica::g_debug_context->ClearBreakpoints(); + // Unregister debug widgets + if (graphicsWidget->isVisible()) { + graphicsWidget->Unregister(); + } + // Frame advancing must be cancelled in order to release the emu thread from waiting system.frame_limiter.SetFrameAdvancing(false); @@ -2214,7 +2224,7 @@ void GMainWindow::OnToggleFilterBar() { void GMainWindow::OnCreateGraphicsSurfaceViewer() { auto graphicsSurfaceViewerWidget = - new GraphicsSurfaceWidget(system.Memory(), Pica::g_debug_context, this); + new GraphicsSurfaceWidget(system, Pica::g_debug_context, this); addDockWidget(Qt::RightDockWidgetArea, graphicsSurfaceViewerWidget); // TODO: Maybe graphicsSurfaceViewerWidget->setFloating(true); graphicsSurfaceViewerWidget->show(); @@ -2434,10 +2444,10 @@ void GMainWindow::OnStartVideoDumping() { } void GMainWindow::StartVideoDumping(const QString& path) { - Layout::FramebufferLayout layout{ - Layout::FrameLayoutFromResolutionScale(VideoCore::g_renderer->GetResolutionScaleFactor())}; + auto& renderer = system.GPU().Renderer(); + const auto layout{Layout::FrameLayoutFromResolutionScale(renderer.GetResolutionScaleFactor())}; - auto dumper = std::make_shared(); + auto dumper = std::make_shared(renderer); if (dumper->StartDumping(path.toStdString(), layout)) { system.RegisterVideoDumper(dumper); } else { diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index cba0945c3..f8c953a60 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -37,16 +37,14 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" - "${VIDEO_CORE}/pica.cpp" - "${VIDEO_CORE}/pica.h" - "${VIDEO_CORE}/regs_framebuffer.h" - "${VIDEO_CORE}/regs_lighting.h" - "${VIDEO_CORE}/regs_pipeline.h" - "${VIDEO_CORE}/regs_rasterizer.h" - "${VIDEO_CORE}/regs_shader.h" - "${VIDEO_CORE}/regs_texturing.h" - "${VIDEO_CORE}/regs.cpp" - "${VIDEO_CORE}/regs.h" + "${VIDEO_CORE}/pica/regs_framebuffer.h" + "${VIDEO_CORE}/pica/regs_lighting.h" + "${VIDEO_CORE}/pica/regs_pipeline.h" + "${VIDEO_CORE}/pica/regs_rasterizer.h" + "${VIDEO_CORE}/pica/regs_shader.h" + "${VIDEO_CORE}/pica/regs_texturing.h" + "${VIDEO_CORE}/pica/regs_internal.cpp" + "${VIDEO_CORE}/pica/regs_internal.h" # and also check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" diff --git a/src/common/memory_ref.h b/src/common/memory_ref.h index 894fe6c79..389528f15 100644 --- a/src/common/memory_ref.h +++ b/src/common/memory_ref.h @@ -110,8 +110,10 @@ public: return std::span{cptr, std::min(size, csize)}; } - std::span GetReadBytes(std::size_t size) const { - return std::span{cptr, std::min(size, csize)}; + template + std::span GetReadBytes(std::size_t size) const { + const auto* cptr_t = reinterpret_cast(cptr); + return std::span{cptr_t, std::min(size, csize) / sizeof(T)}; } std::size_t GetSize() const { diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e2939df8a..dc45e1eb8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -299,8 +299,10 @@ add_library(citra_core STATIC hle/service/fs/fs_user.h hle/service/gsp/gsp.cpp hle/service/gsp/gsp.h + hle/service/gsp/gsp_command.h hle/service/gsp/gsp_gpu.cpp hle/service/gsp/gsp_gpu.h + hle/service/gsp/gsp_interrupt.h hle/service/gsp/gsp_lcd.cpp hle/service/gsp/gsp_lcd.h hle/service/hid/hid.cpp @@ -433,12 +435,6 @@ add_library(citra_core STATIC hw/aes/ccm.h hw/aes/key.cpp hw/aes/key.h - hw/gpu.cpp - hw/gpu.h - hw/hw.cpp - hw/hw.h - hw/lcd.cpp - hw/lcd.h hw/rsa/rsa.cpp hw/rsa/rsa.h hw/y2r.cpp diff --git a/src/core/cheats/cheats.cpp b/src/core/cheats/cheats.cpp index 26a77edee..aff31f130 100644 --- a/src/core/cheats/cheats.cpp +++ b/src/core/cheats/cheats.cpp @@ -11,7 +11,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/process.h" -#include "core/hw/gpu.h" namespace Cheats { diff --git a/src/core/core.cpp b/src/core/core.cpp index 22d0a1de6..b0faf08c9 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -35,14 +35,13 @@ #include "core/hle/service/cam/cam.h" #include "core/hle/service/fs/archive.h" #include "core/hle/service/gsp/gsp.h" +#include "core/hle/service/gsp/gsp_gpu.h" #include "core/hle/service/ir/ir_rst.h" #include "core/hle/service/mic/mic_u.h" #include "core/hle/service/plgldr/plgldr.h" #include "core/hle/service/service.h" #include "core/hle/service/sm/sm.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" +#include "core/hw/aes/key.h" #include "core/loader/loader.h" #include "core/movie.h" #ifdef ENABLE_SCRIPTING @@ -51,8 +50,8 @@ #include "core/telemetry_session.h" #include "network/network.h" #include "video_core/custom_textures/custom_tex_manager.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" namespace Core { @@ -235,7 +234,6 @@ System::ResultStatus System::RunLoop(bool tight_loop) { GDBStub::SetCpuStepFlag(false); } - HW::Update(); Reschedule(); return status; @@ -433,7 +431,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, service_manager = std::make_unique(*this); archive_manager = std::make_unique(*this); - HW::Init(*memory); + HW::AES::InitKeys(); Service::Init(*this); GDBStub::DeferStart(); @@ -443,7 +441,10 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, custom_tex_manager = std::make_unique(*this); - VideoCore::Init(emu_window, secondary_window, *this); + auto gsp = service_manager->GetService("gsp::Gpu"); + gpu = std::make_unique(*this, emu_window, secondary_window); + gpu->SetInterruptHandler( + [gsp](Service::GSP::InterruptId interrupt_id) { gsp->SignalInterrupt(interrupt_id); }); LOG_DEBUG(Core, "Initialized OK"); @@ -452,8 +453,8 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, return ResultStatus::Success; } -VideoCore::RendererBase& System::Renderer() { - return *VideoCore::g_renderer; +VideoCore::GPU& System::GPU() { + return *gpu; } Service::SM::ServiceManager& System::ServiceManager() { @@ -555,8 +556,7 @@ void System::Shutdown(bool is_deserializing) { // Shutdown emulation session is_powered_on = false; - VideoCore::Shutdown(); - HW::Shutdown(); + gpu.reset(); if (!is_deserializing) { GDBStub::Shutdown(); perf_stats.reset(); @@ -626,18 +626,9 @@ void System::ApplySettings() { GDBStub::SetServerPort(Settings::values.gdbstub_port.GetValue()); GDBStub::ToggleServer(Settings::values.use_gdbstub.GetValue()); - VideoCore::g_shader_jit_enabled = Settings::values.use_shader_jit.GetValue(); - VideoCore::g_hw_shader_enabled = Settings::values.use_hw_shader.GetValue(); - VideoCore::g_hw_shader_accurate_mul = Settings::values.shaders_accurate_mul.GetValue(); - -#ifndef ANDROID - if (VideoCore::g_renderer) { - VideoCore::g_renderer->UpdateCurrentFramebufferLayout(); - } -#endif - - if (VideoCore::g_renderer) { - auto& settings = VideoCore::g_renderer->Settings(); + if (gpu) { + gpu->Renderer().UpdateCurrentFramebufferLayout(); + auto& settings = gpu->Renderer().Settings(); settings.bg_color_update_requested = true; settings.shader_update_requested = true; } @@ -699,17 +690,15 @@ void System::serialize(Archive& ar, const unsigned int file_version) { *m_emu_window, m_secondary_window, *memory_mode.first, *n3ds_hw_caps.first, num_cores); } - // flush on save, don't flush on load - bool should_flush = !Archive::is_loading::value; - Memory::RasterizerClearAll(should_flush); + // Flush on save, don't flush on load + const bool should_flush = !Archive::is_loading::value; + gpu->ClearAll(should_flush); ar&* timing.get(); for (u32 i = 0; i < num_cores; i++) { ar&* cpu_cores[i].get(); } ar&* service_manager.get(); ar&* archive_manager.get(); - ar& GPU::g_regs; - ar& LCD::g_regs; // NOTE: DSP doesn't like being destroyed and recreated. So instead we do an inline // serialization; this means that the DSP Settings need to match for loading to work. @@ -722,16 +711,21 @@ void System::serialize(Archive& ar, const unsigned int file_version) { ar&* memory.get(); ar&* kernel.get(); - VideoCore::serialize(ar, file_version); + ar&* gpu.get(); ar& movie; // This needs to be set from somewhere - might as well be here! if (Archive::is_loading::value) { timing->UnlockEventQueue(); - Service::GSP::SetGlobalModule(*this); memory->SetDSP(*dsp_core); cheat_engine->Connect(); - VideoCore::g_renderer->Sync(); + gpu->Sync(); + + // Re-register gpu callback, because gsp service changed after service_manager got + // serialized + auto gsp = service_manager->GetService("gsp::Gpu"); + gpu->SetInterruptHandler( + [gsp](Service::GSP::InterruptId interrupt_id) { gsp->SignalInterrupt(interrupt_id); }); } } diff --git a/src/core/core.h b/src/core/core.h index 434928aa0..87db18179 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -58,9 +58,13 @@ class Backend; namespace VideoCore { class CustomTexManager; -class RendererBase; +class GPU; } // namespace VideoCore +namespace Pica { +class DebugContext; +} + namespace Loader { class AppLoader; } @@ -217,7 +221,7 @@ public: return *dsp_core; } - [[nodiscard]] VideoCore::RendererBase& Renderer(); + [[nodiscard]] VideoCore::GPU& GPU(); /** * Gets a reference to the service manager. @@ -384,6 +388,8 @@ private: /// Telemetry session for this emulation session std::unique_ptr telemetry_session; + std::unique_ptr gpu; + /// Service manager std::unique_ptr service_manager; diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 9de9011a5..9f9c40027 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -37,6 +37,10 @@ constexpr u64 BASE_CLOCK_RATE_ARM11 = 268111856; constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits::max() / BASE_CLOCK_RATE_ARM11; +/// Refresh rate defined by ratio of ARM11 frequency to ARM11 ticks per frame +/// (268,111,856) / (4,481,136) = 59.83122493939037Hz +constexpr double SCREEN_REFRESH_RATE = BASE_CLOCK_RATE_ARM11 / static_cast(4481136ull); + constexpr s64 msToCycles(int ms) { // since ms is int there is no way to overflow return BASE_CLOCK_RATE_ARM11 * static_cast(ms) / 1000; diff --git a/src/core/dumping/ffmpeg_backend.cpp b/src/core/dumping/ffmpeg_backend.cpp index cafef39b7..1fa1be39c 100644 --- a/src/core/dumping/ffmpeg_backend.cpp +++ b/src/core/dumping/ffmpeg_backend.cpp @@ -11,10 +11,10 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "common/string_util.h" +#include "core/core_timing.h" #include "core/dumping/ffmpeg_backend.h" -#include "core/hw/gpu.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" using namespace DynamicLibrary; @@ -381,7 +381,7 @@ bool FFmpegVideoStream::InitFilters() { } // Configure buffer source - static constexpr AVRational src_time_base{static_cast(GPU::frame_ticks), + static constexpr AVRational src_time_base{static_cast(VideoCore::FRAME_TICKS), static_cast(BASE_CLOCK_RATE_ARM11)}; const std::string in_args = fmt::format("video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect=1", layout.width, @@ -732,7 +732,7 @@ void FFmpegMuxer::WriteTrailer() { FFmpeg::av_write_trailer(format_context.get()); } -FFmpegBackend::FFmpegBackend() = default; +FFmpegBackend::FFmpegBackend(VideoCore::RendererBase& renderer_) : renderer{renderer_} {} FFmpegBackend::~FFmpegBackend() { ASSERT_MSG(!IsDumping(), "Dumping must be stopped first"); @@ -796,7 +796,7 @@ bool FFmpegBackend::StartDumping(const std::string& path, const Layout::Framebuf } }); - VideoCore::g_renderer->PrepareVideoDumping(); + renderer.PrepareVideoDumping(); is_dumping = true; return true; @@ -829,7 +829,7 @@ void FFmpegBackend::AddAudioSample(const std::array& sample) { void FFmpegBackend::StopDumping() { is_dumping = false; - VideoCore::g_renderer->CleanupVideoDumping(); + renderer.CleanupVideoDumping(); // Flush the video processing queue AddVideoFrame(VideoFrame()); diff --git a/src/core/dumping/ffmpeg_backend.h b/src/core/dumping/ffmpeg_backend.h index c8eb0ae3f..64e0f3fc2 100644 --- a/src/core/dumping/ffmpeg_backend.h +++ b/src/core/dumping/ffmpeg_backend.h @@ -18,6 +18,10 @@ #include "common/threadsafe_queue.h" #include "core/dumping/backend.h" +namespace VideoCore { +class RendererBase; +} + namespace VideoDumper { using VariableAudioFrame = std::vector; @@ -181,7 +185,7 @@ private: */ class FFmpegBackend : public Backend { public: - FFmpegBackend(); + FFmpegBackend(VideoCore::RendererBase& renderer); ~FFmpegBackend() override; bool StartDumping(const std::string& path, const Layout::FramebufferLayout& layout) override; void AddVideoFrame(VideoFrame frame) override; @@ -194,6 +198,7 @@ public: private: void EndDumping(); + VideoCore::RendererBase& renderer; std::atomic_bool is_dumping = false; ///< Whether the backend is currently dumping FFmpegMuxer ffmpeg{}; diff --git a/src/core/hle/service/gsp/gsp.cpp b/src/core/hle/service/gsp/gsp.cpp index c360d895c..5396e2a80 100644 --- a/src/core/hle/service/gsp/gsp.cpp +++ b/src/core/hle/service/gsp/gsp.cpp @@ -2,33 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "core/core.h" -#include "core/hle/kernel/event.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/service/gsp/gsp.h" +#include "core/hle/service/gsp/gsp_gpu.h" +#include "core/hle/service/gsp/gsp_lcd.h" namespace Service::GSP { -static std::weak_ptr gsp_gpu; - -void SignalInterrupt(InterruptId interrupt_id) { - auto gpu = gsp_gpu.lock(); - ASSERT(gpu != nullptr); - return gpu->SignalInterrupt(interrupt_id); -} - void InstallInterfaces(Core::System& system) { auto& service_manager = system.ServiceManager(); - auto gpu = std::make_shared(system); - gpu->InstallAsService(service_manager); - gsp_gpu = gpu; - + std::make_shared(system)->InstallAsService(service_manager); std::make_shared()->InstallAsService(service_manager); } -void SetGlobalModule(Core::System& system) { - gsp_gpu = system.ServiceManager().GetService("gsp::Gpu"); -} - } // namespace Service::GSP diff --git a/src/core/hle/service/gsp/gsp.h b/src/core/hle/service/gsp/gsp.h index a4dd84f27..505825969 100644 --- a/src/core/hle/service/gsp/gsp.h +++ b/src/core/hle/service/gsp/gsp.h @@ -4,25 +4,12 @@ #pragma once -#include -#include -#include "common/common_types.h" -#include "core/hle/result.h" -#include "core/hle/service/gsp/gsp_gpu.h" -#include "core/hle/service/gsp/gsp_lcd.h" - namespace Core { class System; } namespace Service::GSP { -/** - * Signals that the specified interrupt type has occurred to userland code - * @param interrupt_id ID of interrupt that is being signalled - */ -void SignalInterrupt(InterruptId interrupt_id); void InstallInterfaces(Core::System& system); -void SetGlobalModule(Core::System& system); } // namespace Service::GSP diff --git a/src/core/hle/service/gsp/gsp_command.h b/src/core/hle/service/gsp/gsp_command.h new file mode 100644 index 000000000..ddc203615 --- /dev/null +++ b/src/core/hle/service/gsp/gsp_command.h @@ -0,0 +1,110 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" + +namespace Service::GSP { + +/// GSP command ID +enum class CommandId : u32 { + RequestDma = 0x00, + SubmitCmdList = 0x01, + MemoryFill = 0x02, + DisplayTransfer = 0x03, + TextureCopy = 0x04, + CacheFlush = 0x05, +}; + +struct DmaCommand { + u32 source_address; + u32 dest_address; + u32 size; +}; + +struct SubmitCmdListCommand { + u32 address; + u32 size; + u32 flags; + u32 unused[3]; + u32 do_flush; +}; + +struct MemoryFillCommand { + u32 start1; + u32 value1; + u32 end1; + + u32 start2; + u32 value2; + u32 end2; + + u16 control1; + u16 control2; +}; + +struct DisplayTransferCommand { + u32 in_buffer_address; + u32 out_buffer_address; + u32 in_buffer_size; + u32 out_buffer_size; + u32 flags; +}; + +struct TextureCopyCommand { + u32 in_buffer_address; + u32 out_buffer_address; + u32 size; + u32 in_width_gap; + u32 out_width_gap; + u32 flags; +}; + +struct CacheFlushCommand { + struct { + u32 address; + u32 size; + } regions[3]; +}; + +/// GSP command +struct Command { + BitField<0, 8, CommandId> id; + union { + DmaCommand dma_request; + SubmitCmdListCommand submit_gpu_cmdlist; + MemoryFillCommand memory_fill; + DisplayTransferCommand display_transfer; + TextureCopyCommand texture_copy; + CacheFlushCommand cache_flush; + std::array raw_data; + }; +}; +static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size"); + +/// GSP shared memory GX command buffer header +struct CommandBuffer { + union { + u32 hex; + + // Current command index. This index is updated by GSP module after loading the command + // data, right before the command is processed. When this index is updated by GSP module, + // the total commands field is decreased by one as well. + BitField<0, 8, u32> index; + + // Total commands to process, must not be value 0 when GSP module handles commands. This + // must be <=15 when writing a command to shared memory. This is incremented by the + // application when writing a command to shared memory, after increasing this value + // TriggerCmdReqQueue is only used if this field is value 1. + BitField<8, 8, u32> number_commands; + }; + + u32 unk[7]; + + Command commands[0xF]; +}; +static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size"); + +} // namespace Service::GSP diff --git a/src/core/hle/service/gsp/gsp_gpu.cpp b/src/core/hle/service/gsp/gsp_gpu.cpp index d62f0d294..b12cb703c 100644 --- a/src/core/hle/service/gsp/gsp_gpu.cpp +++ b/src/core/hle/service/gsp/gsp_gpu.cpp @@ -9,30 +9,21 @@ #include #include "common/archives.h" #include "common/bit_field.h" -#include "common/microprofile.h" -#include "common/swap.h" #include "core/core.h" -#include "core/file_sys/plugin_3gx.h" -#include "core/hle/ipc.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/shared_page.h" #include "core/hle/result.h" #include "core/hle/service/gsp/gsp_gpu.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" #include "core/memory.h" -#include "video_core/debug_utils/debug_utils.h" +#include "video_core/gpu.h" #include "video_core/gpu_debugger.h" +#include "video_core/pica/regs_lcd.h" SERIALIZE_EXPORT_IMPL(Service::GSP::SessionData) SERIALIZE_EXPORT_IMPL(Service::GSP::GSP_GPU) SERVICE_CONSTRUCT_IMPL(Service::GSP::GSP_GPU) -// Main graphics debugger object - TODO: Here is probably not the best place for this -GraphicsDebugger g_debugger; - namespace Service::GSP { // Beginning address of HW regs @@ -59,60 +50,32 @@ constexpr ResultCode ERR_REGS_INVALID_SIZE(ErrorDescription::InvalidSize, ErrorM ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E02BEC -static PAddr VirtualToPhysicalAddress(VAddr addr) { - if (addr == 0) { - return 0; - } - - // Note: the region end check is inclusive because the game can pass in an address that - // represents an open right boundary - if (addr >= Memory::VRAM_VADDR && addr <= Memory::VRAM_VADDR_END) { - return addr - Memory::VRAM_VADDR + Memory::VRAM_PADDR; - } - if (addr >= Memory::LINEAR_HEAP_VADDR && addr <= Memory::LINEAR_HEAP_VADDR_END) { - return addr - Memory::LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; - } - if (addr >= Memory::NEW_LINEAR_HEAP_VADDR && addr <= Memory::NEW_LINEAR_HEAP_VADDR_END) { - return addr - Memory::NEW_LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; - } - if (addr >= Memory::PLUGIN_3GX_FB_VADDR && addr <= Memory::PLUGIN_3GX_FB_VADDR_END) { - return addr - Memory::PLUGIN_3GX_FB_VADDR + Service::PLGLDR::PLG_LDR::GetPluginFBAddr(); - } - - LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x{:08X}", addr); - // To help with debugging, set bit on address so that it's obviously invalid. - // TODO: find the correct way to handle this error - return addr | 0x80000000; -} - u32 GSP_GPU::GetUnusedThreadId() const { for (u32 id = 0; id < MaxGSPThreads; ++id) { - if (!used_thread_ids[id]) + if (!used_thread_ids[id]) { return id; + } } UNREACHABLE_MSG("All GSP threads are in use"); return 0; } -/// Gets a pointer to a thread command buffer in GSP shared memory -static inline u8* GetCommandBuffer(std::shared_ptr shared_memory, - u32 thread_id) { - return shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); +CommandBuffer* GSP_GPU::GetCommandBuffer(u32 thread_id) { + auto* ptr = shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); + return reinterpret_cast(ptr); } FrameBufferUpdate* GSP_GPU::GetFrameBufferInfo(u32 thread_id, u32 screen_index) { DEBUG_ASSERT_MSG(screen_index < 2, "Invalid screen index"); // For each thread there are two FrameBufferUpdate fields - u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate); + const u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate); u8* ptr = shared_memory->GetPointer(offset); return reinterpret_cast(ptr); } -/// Gets a pointer to the interrupt relay queue for a given thread index -static inline InterruptRelayQueue* GetInterruptRelayQueue( - std::shared_ptr shared_memory, u32 thread_id) { +InterruptRelayQueue* GSP_GPU::GetInterruptRelayQueue(u32 thread_id) { u8* ptr = shared_memory->GetPointer(sizeof(InterruptRelayQueue) * thread_id); return reinterpret_cast(ptr); } @@ -125,19 +88,6 @@ void GSP_GPU::ClientDisconnected(std::shared_ptr server_s SessionRequestHandler::ClientDisconnected(server_session); } -/** - * Writes a single GSP GPU hardware registers with a single u32 value - * (For internal use.) - * - * @param base_address The address of the register in question - * @param data Data to be written - */ -static void WriteSingleHWReg(u32 base_address, u32 data) { - DEBUG_ASSERT_MSG((base_address & 3) == 0 && base_address < 0x420000, - "Write address out of range or misaligned"); - HW::Write(base_address + REGS_BEGIN, data); -} - /** * Writes sequential GSP GPU hardware registers using an array of source data * @@ -146,7 +96,8 @@ static void WriteSingleHWReg(u32 base_address, u32 data) { * @param data A vector containing the source data * @return RESULT_SUCCESS if the parameters are valid, error code otherwise */ -static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span data) { +static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span data, + VideoCore::GPU& gpu) { // This magic number is verified to be done by the gsp module const u32 max_size_in_bytes = 0x80; @@ -155,28 +106,30 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span 0) { - u32 value; - std::memcpy(&value, &data[offset], sizeof(u32)); - WriteSingleHWReg(base_address, value); + } - size_in_bytes -= 4; - offset += 4; - base_address += 4; - } - return RESULT_SUCCESS; - } - - } else { + if (size_in_bytes > max_size_in_bytes) { LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes); return ERR_REGS_INVALID_SIZE; } + + if (size_in_bytes & 3) { + LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); + return ERR_REGS_MISALIGNED; + } + + std::size_t offset = 0; + while (size_in_bytes > 0) { + u32 value; + std::memcpy(&value, &data[offset], sizeof(u32)); + gpu.WriteReg(REGS_BEGIN + base_address, value); + + size_in_bytes -= 4; + offset += 4; + base_address += 4; + } + + return RESULT_SUCCESS; } /** @@ -190,7 +143,7 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span data, - std::span masks) { + std::span masks, VideoCore::GPU& gpu) { // This magic number is verified to be done by the gsp module const u32 max_size_in_bytes = 0x80; @@ -199,60 +152,58 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std:: "Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})", base_address, size_in_bytes); return ERR_REGS_OUTOFRANGE_OR_MISALIGNED; - } else if (size_in_bytes <= max_size_in_bytes) { - if (size_in_bytes & 3) { - LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); - return ERR_REGS_MISALIGNED; - } else { - std::size_t offset = 0; - while (size_in_bytes > 0) { - const u32 reg_address = base_address + REGS_BEGIN; + } - u32 reg_value; - HW::Read(reg_value, reg_address); - - u32 value, mask; - std::memcpy(&value, &data[offset], sizeof(u32)); - std::memcpy(&mask, &masks[offset], sizeof(u32)); - - // Update the current value of the register only for set mask bits - reg_value = (reg_value & ~mask) | (value & mask); - - WriteSingleHWReg(base_address, reg_value); - - size_in_bytes -= 4; - offset += 4; - base_address += 4; - } - return RESULT_SUCCESS; - } - - } else { + if (size_in_bytes > max_size_in_bytes) { LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes); return ERR_REGS_INVALID_SIZE; } + + if (size_in_bytes & 3) { + LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); + return ERR_REGS_MISALIGNED; + } + + std::size_t offset = 0; + while (size_in_bytes > 0) { + const u32 reg_address = base_address + REGS_BEGIN; + u32 reg_value = gpu.ReadReg(reg_address); + + u32 value, mask; + std::memcpy(&value, &data[offset], sizeof(u32)); + std::memcpy(&mask, &masks[offset], sizeof(u32)); + + // Update the current value of the register only for set mask bits + reg_value = (reg_value & ~mask) | (value & mask); + gpu.WriteReg(reg_address, reg_value); + + size_in_bytes -= 4; + offset += 4; + base_address += 4; + } + + return RESULT_SUCCESS; } void GSP_GPU::WriteHWRegs(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); - u32 reg_addr = rp.Pop(); - u32 size = rp.Pop(); - std::vector src_data = rp.PopStaticBuffer(); + const u32 reg_addr = rp.Pop(); + const u32 size = rp.Pop(); + const auto src_data = rp.PopStaticBuffer(); IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); - rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data)); + rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data, system.GPU())); } void GSP_GPU::WriteHWRegsWithMask(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); - u32 reg_addr = rp.Pop(); - u32 size = rp.Pop(); - - std::vector src_data = rp.PopStaticBuffer(); - std::vector mask_data = rp.PopStaticBuffer(); + const u32 reg_addr = rp.Pop(); + const u32 size = rp.Pop(); + const auto src_data = rp.PopStaticBuffer(); + const auto mask_data = rp.PopStaticBuffer(); IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); - rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data)); + rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data, system.GPU())); } void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { @@ -270,7 +221,7 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { return; } - // size should be word-aligned + // Size should be word-aligned if ((size % 4) != 0) { IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); rb.Push(ERR_REGS_MISALIGNED); @@ -279,8 +230,9 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { } std::vector buffer(size); - for (u32 offset = 0; offset < size; ++offset) { - HW::Read(buffer[offset], REGS_BEGIN + reg_addr + offset); + for (u32 word = 0; word < size / sizeof(u32); ++word) { + const u32 data = system.GPU().ReadReg(REGS_BEGIN + reg_addr + word * sizeof(u32)); + std::memcpy(buffer.data() + word * sizeof(u32), &data, sizeof(u32)); } IPC::RequestBuilder rb = rp.MakeBuilder(1, 2); @@ -288,53 +240,15 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { rb.PushStaticBuffer(std::move(buffer), 0); } -ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) { - u32 base_address = 0x400000; - PAddr phys_address_left = VirtualToPhysicalAddress(info.address_left); - PAddr phys_address_right = VirtualToPhysicalAddress(info.address_right); - if (info.active_fb == 0) { - WriteSingleHWReg(base_address + 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX( - screen_id, address_left1)), - phys_address_left); - WriteSingleHWReg(base_address + 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX( - screen_id, address_right1)), - phys_address_right); - } else { - WriteSingleHWReg(base_address + 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX( - screen_id, address_left2)), - phys_address_left); - WriteSingleHWReg(base_address + 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX( - screen_id, address_right2)), - phys_address_right); - } - WriteSingleHWReg(base_address + - 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX(screen_id, stride)), - info.stride); - WriteSingleHWReg(base_address + - 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX(screen_id, color_format)), - info.format); - WriteSingleHWReg(base_address + - 4 * static_cast(GPU_FRAMEBUFFER_REG_INDEX(screen_id, active_fb)), - info.shown_fb); - - if (Pica::g_debug_context) - Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr); - - if (screen_id == 0) { - MicroProfileFlip(); - Core::System::GetInstance().perf_stats->EndGameFrame(); - } - - return RESULT_SUCCESS; -} - void GSP_GPU::SetBufferSwap(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); u32 screen_id = rp.Pop(); auto fb_info = rp.PopRaw(); + system.GPU().SetBufferSwap(screen_id, fb_info); + IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); - rb.Push(GSP::SetBufferSwap(screen_id, fb_info)); + rb.Push(RESULT_SUCCESS); } void GSP_GPU::FlushDataCache(Kernel::HLERequestContext& ctx) { @@ -382,10 +296,9 @@ void GSP_GPU::RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) { u32 flags = rp.Pop(); auto interrupt_event = rp.PopObject(); - // TODO(mailwl): return right error code instead assert - ASSERT_MSG((interrupt_event != nullptr), "handle is not valid!"); + ASSERT_MSG(interrupt_event, "handle is not valid!"); - interrupt_event->SetName("GSP_GSP_GPU::interrupt_event"); + interrupt_event->SetName("GSP_GPU::interrupt_event"); SessionData* session_data = GetSessionData(ctx.Session()); session_data->interrupt_event = std::move(interrupt_event); @@ -422,15 +335,17 @@ void GSP_GPU::UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) { void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) { SessionData* session_data = FindRegisteredThreadData(thread_id); - if (session_data == nullptr) + if (!session_data) { return; + } auto interrupt_event = session_data->interrupt_event; if (interrupt_event == nullptr) { LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); return; } - InterruptRelayQueue* interrupt_relay_queue = GetInterruptRelayQueue(shared_memory, thread_id); + + auto* interrupt_relay_queue = GetInterruptRelayQueue(thread_id); u8 next = interrupt_relay_queue->index; next += interrupt_relay_queue->number_interrupts; next = next % 0x34; // 0x34 is the number of interrupt slots @@ -441,29 +356,20 @@ void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) interrupt_relay_queue->error_code = 0x0; // No error // Update framebuffer information if requested - // TODO(yuriks): Confirm where this code should be called. It is definitely updated without - // executing any GSP commands, only waiting on the event. - // TODO(Subv): The real GSP module triggers PDC0 after updating both the top and bottom - // screen, it is currently unknown what PDC1 does. - int screen_id = (interrupt_id == InterruptId::PDC0) ? 0 - : (interrupt_id == InterruptId::PDC1) ? 1 - : -1; + const s32 screen_id = (interrupt_id == InterruptId::PDC0) ? 0 + : (interrupt_id == InterruptId::PDC1) ? 1 + : -1; if (screen_id != -1) { - FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); + auto* info = GetFrameBufferInfo(thread_id, screen_id); if (info->is_dirty) { - GSP::SetBufferSwap(screen_id, info->framebuffer_info[info->index]); + system.GPU().SetBufferSwap(screen_id, info->framebuffer_info[info->index]); info->is_dirty.Assign(false); } } + interrupt_event->Signal(); } -/** - * Signals that the specified interrupt type has occurred to userland code - * @param interrupt_id ID of interrupt that is being signalled - * @todo This should probably take a thread_id parameter and only signal this thread? - * @todo This probably does not belong in the GSP module, instead move to video_core - */ void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) { if (nullptr == shared_memory) { LOG_WARNING(Service_GSP, "cannot synchronize until GSP shared memory has been created!"); @@ -488,154 +394,13 @@ void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) { SignalInterruptForThread(interrupt_id, active_thread_id); } -MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255)); - -/// Executes the next GSP command -static void ExecuteCommand(const Command& command, u32 thread_id) { - // Utility function to convert register ID to address - static auto WriteGPURegister = [](u32 id, u32 data) { - GPU::Write(0x1EF00000 + 4 * id, data); - }; - - switch (command.id) { - - // GX request DMA - typically used for copying memory from GSP heap to VRAM - case CommandId::REQUEST_DMA: { - MICROPROFILE_SCOPE(GPU_GSP_DMA); - Memory::MemorySystem& memory = Core::System::GetInstance().Memory(); - - // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever - // possible/likely - Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, - command.dma_request.size, Memory::FlushMode::Flush); - Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, - command.dma_request.size, - Memory::FlushMode::Invalidate); - - // TODO(Subv): These memory accesses should not go through the application's memory mapping. - // They should go through the GSP module's memory mapping. - memory.CopyBlock(*Core::System::GetInstance().Kernel().GetCurrentProcess(), - command.dma_request.dest_address, command.dma_request.source_address, - command.dma_request.size); - SignalInterrupt(InterruptId::DMA); - break; - } - // TODO: This will need some rework in the future. (why?) - case CommandId::SUBMIT_GPU_CMDLIST: { - auto& params = command.submit_gpu_cmdlist; - - if (params.do_flush) { - // This flag flushes the command list (params.address, params.size) from the cache. - // Command lists are not processed by the hardware renderer, so we don't need to - // actually flush them in Citra. - } - - WriteGPURegister(static_cast(GPU_REG_INDEX(command_processor_config.address)), - VirtualToPhysicalAddress(params.address) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(command_processor_config.size)), - params.size); - - // TODO: Not sure if we are supposed to always write this .. seems to trigger processing - // though - WriteGPURegister(static_cast(GPU_REG_INDEX(command_processor_config.trigger)), 1); - - // TODO(yuriks): Figure out the meaning of the `flags` field. - - break; - } - - // It's assumed that the two "blocks" behave equivalently. - // Presumably this is done simply to allow two memory fills to run in parallel. - case CommandId::SET_MEMORY_FILL: { - auto& params = command.memory_fill; - - if (params.start1 != 0) { - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[0].address_start)), - VirtualToPhysicalAddress(params.start1) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[0].address_end)), - VirtualToPhysicalAddress(params.end1) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[0].value_32bit)), - params.value1); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[0].control)), - params.control1); - } - - if (params.start2 != 0) { - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[1].address_start)), - VirtualToPhysicalAddress(params.start2) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[1].address_end)), - VirtualToPhysicalAddress(params.end2) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[1].value_32bit)), - params.value2); - WriteGPURegister(static_cast(GPU_REG_INDEX(memory_fill_config[1].control)), - params.control2); - } - break; - } - - case CommandId::SET_DISPLAY_TRANSFER: { - auto& params = command.display_transfer; - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.input_address)), - VirtualToPhysicalAddress(params.in_buffer_address) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.output_address)), - VirtualToPhysicalAddress(params.out_buffer_address) >> 3); - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.input_size)), - params.in_buffer_size); - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.output_size)), - params.out_buffer_size); - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.flags)), - params.flags); - WriteGPURegister(static_cast(GPU_REG_INDEX(display_transfer_config.trigger)), 1); - break; - } - - case CommandId::SET_TEXTURE_COPY: { - auto& params = command.texture_copy; - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), - VirtualToPhysicalAddress(params.in_buffer_address) >> 3); - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), - VirtualToPhysicalAddress(params.out_buffer_address) >> 3); - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), - params.size); - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), - params.in_width_gap); - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), - params.out_width_gap); - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), params.flags); - - // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to - // matter. - WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); - break; - } - - case CommandId::CACHE_FLUSH: { - // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers - // Use command.cache_flush.regions to implement this handler - break; - } - - default: - LOG_ERROR(Service_GSP, "unknown command 0x{:08X}", (int)command.id.Value()); - } - - if (Pica::g_debug_context) - Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::GSPCommandProcessed, - (void*)&command); -} - void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); + const bool enable_black = rp.Pop(); - bool enable_black = rp.Pop(); - LCD::Regs::ColorFill data = {0}; - - // Since data is already zeroed, there is no need to explicitly set - // the color to black (all zero). + Pica::ColorFill data{}; data.is_enabled.Assign(enable_black); - - LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD - LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD + system.GPU().SetColorFill(data); IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); rb.Push(RESULT_SUCCESS); @@ -644,20 +409,17 @@ void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) { void GSP_GPU::TriggerCmdReqQueue(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); - // Iterate through each thread's command queue... - for (unsigned thread_id = 0; thread_id < 0x4; ++thread_id) { - CommandBuffer* command_buffer = (CommandBuffer*)GetCommandBuffer(shared_memory, thread_id); + // Iterate through each command. + auto* command_buffer = GetCommandBuffer(active_thread_id); + auto& gpu = system.GPU(); + for (u32 i = 0; i < command_buffer->number_commands; i++) { + gpu.Debugger().GXCommandProcessed(command_buffer->commands[i]); - // Iterate through each command... - for (unsigned i = 0; i < command_buffer->number_commands; ++i) { - g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]); + // Decode and execute command + gpu.Execute(command_buffer->commands[i]); - // Decode and execute command - ExecuteCommand(command_buffer->commands[i], thread_id); - - // Indicates that command has completed - command_buffer->number_commands.Assign(command_buffer->number_commands - 1); - } + // Indicates that command has completed + command_buffer->number_commands.Assign(command_buffer->number_commands - 1); } IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); diff --git a/src/core/hle/service/gsp/gsp_gpu.h b/src/core/hle/service/gsp/gsp_gpu.h index bc3b5017d..30aeaee75 100644 --- a/src/core/hle/service/gsp/gsp_gpu.h +++ b/src/core/hle/service/gsp/gsp_gpu.h @@ -13,7 +13,8 @@ #include "common/common_types.h" #include "core/hle/kernel/event.h" #include "core/hle/kernel/hle_ipc.h" -#include "core/hle/result.h" +#include "core/hle/service/gsp/gsp_command.h" +#include "core/hle/service/gsp/gsp_interrupt.h" #include "core/hle/service/service.h" namespace Core { @@ -28,53 +29,6 @@ class SharedMemory; namespace Service::GSP { -/// GSP interrupt ID -enum class InterruptId : u8 { - PSC0 = 0x00, - PSC1 = 0x01, - PDC0 = 0x02, // Seems called every vertical screen line - PDC1 = 0x03, // Seems called every frame - PPF = 0x04, - P3D = 0x05, - DMA = 0x06, -}; - -/// GSP command ID -enum class CommandId : u32 { - REQUEST_DMA = 0x00, - /// Submits a commandlist for execution by the GPU. - SUBMIT_GPU_CMDLIST = 0x01, - - // Fills a given memory range with a particular value - SET_MEMORY_FILL = 0x02, - - // Copies an image and optionally performs color-conversion or scaling. - // This is highly similar to the GameCube's EFB copy feature - SET_DISPLAY_TRANSFER = 0x03, - - // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path - SET_TEXTURE_COPY = 0x04, - /// Flushes up to 3 cache regions in a single command. - CACHE_FLUSH = 0x05, -}; - -/// GSP thread interrupt relay queue -struct InterruptRelayQueue { - // Index of last interrupt in the queue - u8 index; - // Number of interrupts remaining to be processed by the userland code - u8 number_interrupts; - // Error code - zero on success, otherwise an error has occurred - u8 error_code; - u8 padding1; - - u32 missed_PDC0; - u32 missed_PDC1; - - InterruptId slot[0x34]; ///< Interrupt ID slots -}; -static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size"); - struct FrameBufferInfo { u32 active_fb; // 0 = first, 1 = second u32 address_left; @@ -96,95 +50,9 @@ struct FrameBufferUpdate { u32 pad2; }; static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size"); -// TODO: Not sure if this padding is correct. -// Chances are the second block is stored at offset 0x24 rather than 0x20. static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20, "FrameBufferInfo element has incorrect alignment"); -/// GSP command -struct Command { - BitField<0, 8, CommandId> id; - - union { - struct { - u32 source_address; - u32 dest_address; - u32 size; - } dma_request; - - struct { - u32 address; - u32 size; - u32 flags; - u32 unused[3]; - u32 do_flush; - } submit_gpu_cmdlist; - - struct { - u32 start1; - u32 value1; - u32 end1; - - u32 start2; - u32 value2; - u32 end2; - - u16 control1; - u16 control2; - } memory_fill; - - struct { - u32 in_buffer_address; - u32 out_buffer_address; - u32 in_buffer_size; - u32 out_buffer_size; - u32 flags; - } display_transfer; - - struct { - u32 in_buffer_address; - u32 out_buffer_address; - u32 size; - u32 in_width_gap; - u32 out_width_gap; - u32 flags; - } texture_copy; - - struct { - struct { - u32 address; - u32 size; - } regions[3]; - } cache_flush; - - u8 raw_data[0x1C]; - }; -}; -static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size"); - -/// GSP shared memory GX command buffer header -struct CommandBuffer { - union { - u32 hex; - - // Current command index. This index is updated by GSP module after loading the command - // data, right before the command is processed. When this index is updated by GSP module, - // the total commands field is decreased by one as well. - BitField<0, 8, u32> index; - - // Total commands to process, must not be value 0 when GSP module handles commands. This - // must be <=15 when writing a command to shared memory. This is incremented by the - // application when writing a command to shared memory, after increasing this value - // TriggerCmdReqQueue is only used if this field is value 1. - BitField<8, 8, u32> number_commands; - }; - - u32 unk[7]; - - Command commands[0xF]; -}; -static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size"); - constexpr u32 FRAMEBUFFER_WIDTH = 240; constexpr u32 FRAMEBUFFER_WIDTH_POW2 = 256; constexpr u32 TOP_FRAMEBUFFER_HEIGHT = 400; @@ -242,6 +110,12 @@ public: */ FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index); + /// Gets a pointer to a thread command buffer in GSP shared memory + CommandBuffer* GetCommandBuffer(u32 thread_id); + + /// Gets a pointer to the interrupt relay queue for a given thread index + InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id); + /** * Retreives the ID of the thread with GPU rights. */ @@ -513,7 +387,7 @@ private: static constexpr u32 MaxGSPThreads = 4; /// Thread ids currently in use by the sessions connected to the GSPGPU service. - std::array used_thread_ids = {false, false, false, false}; + std::array used_thread_ids{}; friend class SessionData; @@ -522,8 +396,6 @@ private: friend class boost::serialization::access; }; -ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info); - } // namespace Service::GSP BOOST_CLASS_EXPORT_KEY(Service::GSP::SessionData) diff --git a/src/core/hle/service/gsp/gsp_interrupt.h b/src/core/hle/service/gsp/gsp_interrupt.h new file mode 100644 index 000000000..db2b584a6 --- /dev/null +++ b/src/core/hle/service/gsp/gsp_interrupt.h @@ -0,0 +1,42 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" + +namespace Service::GSP { + +/// GSP interrupt ID +enum class InterruptId : u8 { + PSC0 = 0x00, + PSC1 = 0x01, + PDC0 = 0x02, + PDC1 = 0x03, + PPF = 0x04, + P3D = 0x05, + DMA = 0x06, +}; + +/// GSP thread interrupt relay queue +struct InterruptRelayQueue { + // Index of last interrupt in the queue + u8 index; + // Number of interrupts remaining to be processed by the userland code + u8 number_interrupts; + // Error code - zero on success, otherwise an error has occurred + u8 error_code; + u8 padding1; + + u32 missed_PDC0; + u32 missed_PDC1; + + InterruptId slot[0x34]; ///< Interrupt ID slots +}; +static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size"); + +using InterruptHandler = std::function; + +} // namespace Service::GSP diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index e2b0af35b..1fed2e516 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -22,7 +22,6 @@ #include "core/hle/service/hid/hid_user.h" #include "core/hle/service/service.h" #include "core/movie.h" -#include "video_core/video_core.h" SERVICE_CONSTRUCT_IMPL(Service::HID::Module) SERIALIZE_EXPORT_IMPL(Service::HID::Module) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp deleted file mode 100644 index 156fb4b14..000000000 --- a/src/core/hw/gpu.cpp +++ /dev/null @@ -1,572 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include "common/alignment.h" -#include "common/color.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/vector_math.h" -#include "core/core.h" -#include "core/core_timing.h" -#include "core/hle/service/gsp/gsp.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/memory.h" -#include "core/tracer/recorder.h" -#include "video_core/command_processor.h" -#include "video_core/debug_utils/debug_utils.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" -#include "video_core/utils.h" -#include "video_core/video_core.h" - -namespace GPU { - -Regs g_regs; -Memory::MemorySystem* g_memory; - -/// Event id for CoreTiming -static Core::TimingEventType* vblank_event; - -template -inline void Read(T& var, const u32 raw_addr) { - u32 addr = raw_addr - HW::VADDR_GPU; - u32 index = addr / 4; - - // Reads other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= Regs::NumIds() || !std::is_same::value) { - LOG_ERROR(HW_GPU, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); - return; - } - - var = g_regs[addr / 4]; -} - -static Common::Vec4 DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) { - switch (input_format) { - case Regs::PixelFormat::RGBA8: - return Common::Color::DecodeRGBA8(src_pixel); - - case Regs::PixelFormat::RGB8: - return Common::Color::DecodeRGB8(src_pixel); - - case Regs::PixelFormat::RGB565: - return Common::Color::DecodeRGB565(src_pixel); - - case Regs::PixelFormat::RGB5A1: - return Common::Color::DecodeRGB5A1(src_pixel); - - case Regs::PixelFormat::RGBA4: - return Common::Color::DecodeRGBA4(src_pixel); - - default: - LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", input_format); - return {0, 0, 0, 0}; - } -} - -MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); -MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); - -static void MemoryFill(const Regs::MemoryFillConfig& config) { - const PAddr start_addr = config.GetStartAddress(); - const PAddr end_addr = config.GetEndAddress(); - - // TODO: do hwtest with these cases - if (!g_memory->IsValidPhysicalAddress(start_addr)) { - LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr); - return; - } - - if (!g_memory->IsValidPhysicalAddress(end_addr)) { - LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr); - return; - } - - if (end_addr <= start_addr) { - LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr, - end_addr); - return; - } - - u8* start = g_memory->GetPhysicalPointer(start_addr); - u8* end = g_memory->GetPhysicalPointer(end_addr); - - if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) - return; - - Memory::RasterizerInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); - - if (config.fill_24bit) { - // fill with 24-bit values - for (u8* ptr = start; ptr < end; ptr += 3) { - ptr[0] = config.value_24bit_r; - ptr[1] = config.value_24bit_g; - ptr[2] = config.value_24bit_b; - } - } else if (config.fill_32bit) { - // fill with 32-bit values - if (end > start) { - u32 value = config.value_32bit; - std::size_t len = (end - start) / sizeof(u32); - for (std::size_t i = 0; i < len; ++i) - std::memcpy(&start[i * sizeof(u32)], &value, sizeof(u32)); - } - } else { - // fill with 16-bit values - u16 value_16bit = config.value_16bit.Value(); - for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) - std::memcpy(ptr, &value_16bit, sizeof(u16)); - } -} - -static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { - const PAddr src_addr = config.GetPhysicalInputAddress(); - PAddr dst_addr = config.GetPhysicalOutputAddress(); - - // TODO: do hwtest with these cases - if (!g_memory->IsValidPhysicalAddress(src_addr)) { - LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); - return; - } - - if (!g_memory->IsValidPhysicalAddress(dst_addr)) { - LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); - return; - } - - if (config.input_width == 0) { - LOG_CRITICAL(HW_GPU, "zero input width"); - return; - } - - if (config.input_height == 0) { - LOG_CRITICAL(HW_GPU, "zero input height"); - return; - } - - if (config.output_width == 0) { - LOG_CRITICAL(HW_GPU, "zero output width"); - return; - } - - if (config.output_height == 0) { - LOG_CRITICAL(HW_GPU, "zero output height"); - return; - } - - if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) - return; - - // Using flip_vertically alongside crop_input_lines produces skewed output on hardware. - // We have to emulate this because some games rely on this behaviour to render correctly. - if (config.flip_vertically && config.crop_input_lines && - config.input_width > config.output_width) { - dst_addr += (config.input_width - config.output_width) * (config.output_height - 1) * - GPU::Regs::BytesPerPixel(config.output_format); - } - - u8* src_pointer = g_memory->GetPhysicalPointer(src_addr); - u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr); - - if (config.scaling > config.ScaleXY) { - LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}", - config.scaling.Value()); - UNIMPLEMENTED(); - return; - } - - if (config.input_linear && config.scaling != config.NoScale) { - LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); - UNIMPLEMENTED(); - return; - } - - int horizontal_scale = config.scaling != config.NoScale ? 1 : 0; - int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0; - - u32 output_width = config.output_width >> horizontal_scale; - u32 output_height = config.output_height >> vertical_scale; - - u32 input_size = - config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); - u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); - - Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); - - for (u32 y = 0; y < output_height; ++y) { - for (u32 x = 0; x < output_width; ++x) { - Common::Vec4 src_color; - - // Calculate the [x,y] position of the input image - // based on the current output position and the scale - u32 input_x = x << horizontal_scale; - u32 input_y = y << vertical_scale; - - u32 output_y; - if (config.flip_vertically) { - // Flip the y value of the output data, - // we do this after calculating the [x,y] position of the input image - // to account for the scaling options. - output_y = output_height - y - 1; - } else { - output_y = y; - } - - u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); - u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); - u32 src_offset; - u32 dst_offset; - - if (config.input_linear) { - if (!config.dont_swizzle) { - // Interpret the input as linear and the output as tiled - u32 coarse_y = output_y & ~7; - u32 stride = output_width * dst_bytes_per_pixel; - - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; - dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + - coarse_y * stride; - } else { - // Both input and output are linear - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; - dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; - } - } else { - if (!config.dont_swizzle) { - // Interpret the input as tiled and the output as linear - u32 coarse_y = input_y & ~7; - u32 stride = config.input_width * src_bytes_per_pixel; - - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + - coarse_y * stride; - dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; - } else { - // Both input and output are tiled - u32 out_coarse_y = output_y & ~7; - u32 out_stride = output_width * dst_bytes_per_pixel; - - u32 in_coarse_y = input_y & ~7; - u32 in_stride = config.input_width * src_bytes_per_pixel; - - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + - in_coarse_y * in_stride; - dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + - out_coarse_y * out_stride; - } - } - - const u8* src_pixel = src_pointer + src_offset; - src_color = DecodePixel(config.input_format, src_pixel); - if (config.scaling == config.ScaleX) { - Common::Vec4 pixel = - DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); - src_color = ((src_color + pixel) / 2).Cast(); - } else if (config.scaling == config.ScaleXY) { - Common::Vec4 pixel1 = - DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); - Common::Vec4 pixel2 = - DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); - Common::Vec4 pixel3 = - DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); - src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast(); - } - - u8* dst_pixel = dst_pointer + dst_offset; - switch (config.output_format) { - case Regs::PixelFormat::RGBA8: - Common::Color::EncodeRGBA8(src_color, dst_pixel); - break; - - case Regs::PixelFormat::RGB8: - Common::Color::EncodeRGB8(src_color, dst_pixel); - break; - - case Regs::PixelFormat::RGB565: - Common::Color::EncodeRGB565(src_color, dst_pixel); - break; - - case Regs::PixelFormat::RGB5A1: - Common::Color::EncodeRGB5A1(src_color, dst_pixel); - break; - - case Regs::PixelFormat::RGBA4: - Common::Color::EncodeRGBA4(src_color, dst_pixel); - break; - - default: - LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}", - static_cast(config.output_format.Value())); - break; - } - } - } -} - -static void TextureCopy(const Regs::DisplayTransferConfig& config) { - const PAddr src_addr = config.GetPhysicalInputAddress(); - const PAddr dst_addr = config.GetPhysicalOutputAddress(); - - // TODO: do hwtest with invalid addresses - if (!g_memory->IsValidPhysicalAddress(src_addr)) { - LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); - return; - } - - if (!g_memory->IsValidPhysicalAddress(dst_addr)) { - LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); - return; - } - - if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config)) - return; - - u8* src_pointer = g_memory->GetPhysicalPointer(src_addr); - u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr); - - u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16); - - if (remaining_size == 0) { - LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this."); - return; - } - - u32 input_gap = config.texture_copy.input_gap * 16; - u32 output_gap = config.texture_copy.output_gap * 16; - - // Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width - // is assigned with the total size if gap = 0. - u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16; - u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16; - - if (input_width == 0) { - LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this."); - return; - } - - if (output_width == 0) { - LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this."); - return; - } - - std::size_t contiguous_input_size = - config.texture_copy.size / input_width * (input_width + input_gap); - Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), - static_cast(contiguous_input_size)); - - std::size_t contiguous_output_size = - config.texture_copy.size / output_width * (output_width + output_gap); - // Only need to flush output if it has a gap - const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion - : Memory::RasterizerInvalidateRegion; - FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); - - u32 remaining_input = input_width; - u32 remaining_output = output_width; - while (remaining_size > 0) { - u32 copy_size = std::min({remaining_input, remaining_output, remaining_size}); - - std::memcpy(dst_pointer, src_pointer, copy_size); - src_pointer += copy_size; - dst_pointer += copy_size; - - remaining_input -= copy_size; - remaining_output -= copy_size; - remaining_size -= copy_size; - - if (remaining_input == 0) { - remaining_input = input_width; - src_pointer += input_gap; - } - if (remaining_output == 0) { - remaining_output = output_width; - dst_pointer += output_gap; - } - } -} - -template -inline void Write(u32 addr, const T data) { - addr -= HW::VADDR_GPU; - u32 index = addr / 4; - - // Writes other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= Regs::NumIds() || !std::is_same::value) { - LOG_ERROR(HW_GPU, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, addr); - return; - } - - g_regs[index] = static_cast(data); - - switch (index) { - - // Memory fills are triggered once the fill value is written. - case GPU_REG_INDEX(memory_fill_config[0].trigger): - case GPU_REG_INDEX(memory_fill_config[1].trigger): { - const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); - auto& config = g_regs.memory_fill_config[is_second_filler]; - - if (config.trigger) { - MemoryFill(config); - LOG_TRACE(HW_GPU, "MemoryFill from {:#010X} to {:#010X}", config.GetStartAddress(), - config.GetEndAddress()); - - // It seems that it won't signal interrupt if "address_start" is zero. - // TODO: hwtest this - if (config.GetStartAddress() != 0) { - if (!is_second_filler) { - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0); - } else { - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1); - } - } - - // Reset "trigger" flag and set the "finish" flag - // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. - config.trigger.Assign(0); - config.finished.Assign(1); - } - break; - } - - case GPU_REG_INDEX(display_transfer_config.trigger): { - MICROPROFILE_SCOPE(GPU_DisplayTransfer); - - const auto& config = g_regs.display_transfer_config; - if (config.trigger & 1) { - - if (Pica::g_debug_context) - Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, - nullptr); - - if (config.is_texture_copy) { - TextureCopy(config); - LOG_TRACE(HW_GPU, - "TextureCopy: {:#X} bytes from {:#010X}({}+{})-> " - "{:#010X}({}+{}), flags {:#010X}", - config.texture_copy.size, config.GetPhysicalInputAddress(), - config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, - config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, - config.texture_copy.output_gap * 16, config.flags); - } else { - DisplayTransfer(config); - LOG_TRACE(HW_GPU, - "DisplayTransfer: {:#010X}({}x{})-> " - "{:#010X}({}x{}), dst format {:x}, flags {:#010X}", - config.GetPhysicalInputAddress(), config.input_width.Value(), - config.input_height.Value(), config.GetPhysicalOutputAddress(), - config.output_width.Value(), config.output_height.Value(), - static_cast(config.output_format.Value()), config.flags); - } - - g_regs.display_transfer_config.trigger = 0; - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF); - } - break; - } - - // Seems like writing to this register triggers processing - case GPU_REG_INDEX(command_processor_config.trigger): { - const auto& config = g_regs.command_processor_config; - if (config.trigger & 1) { - MICROPROFILE_SCOPE(GPU_CmdlistProcessing); - - Pica::CommandProcessor::ProcessCommandList(config.GetPhysicalAddress(), config.size); - - g_regs.command_processor_config.trigger = 0; - } - break; - } - - default: - break; - } - - // Notify tracer about the register write - // This is happening *after* handling the write to make sure we properly catch all memory reads. - if (Pica::g_debug_context && Pica::g_debug_context->recorder) { - // addr + GPU VBase - IO VBase + IO PBase - Pica::g_debug_context->recorder->RegisterWritten( - addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); - } -} - -// Explicitly instantiate template functions because we aren't defining this in the header: - -template void Read(u64& var, const u32 addr); -template void Read(u32& var, const u32 addr); -template void Read(u16& var, const u32 addr); -template void Read(u8& var, const u32 addr); - -template void Write(u32 addr, const u64 data); -template void Write(u32 addr, const u32 data); -template void Write(u32 addr, const u16 data); -template void Write(u32 addr, const u8 data); - -/// Update hardware -static void VBlankCallback(std::uintptr_t user_data, s64 cycles_late) { - VideoCore::g_renderer->SwapBuffers(); - - // Signal to GSP that GPU interrupt has occurred - // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub - // screen, or if both use the same interrupts and these two instead determine the - // beginning and end of the VBlank period. If needed, split the interrupt firing into - // two different intervals. - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0); - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1); - - // Reschedule recurrent event - Core::System::GetInstance().CoreTiming().ScheduleEvent(frame_ticks - cycles_late, vblank_event); -} - -/// Initialize hardware -void Init(Memory::MemorySystem& memory) { - g_memory = &memory; - std::memset(&g_regs, 0, sizeof(g_regs)); - - auto& framebuffer_top = g_regs.framebuffer_config[0]; - auto& framebuffer_sub = g_regs.framebuffer_config[1]; - - // Setup default framebuffer addresses (located in VRAM) - // .. or at least these are the ones used by system applets. - // There's probably a smarter way to come up with addresses - // like this which does not require hardcoding. - framebuffer_top.address_left1 = 0x181E6000; - framebuffer_top.address_left2 = 0x1822C800; - framebuffer_top.address_right1 = 0x18273000; - framebuffer_top.address_right2 = 0x182B9800; - framebuffer_sub.address_left1 = 0x1848F000; - framebuffer_sub.address_left2 = 0x184C7800; - - framebuffer_top.width.Assign(240); - framebuffer_top.height.Assign(400); - framebuffer_top.stride = 3 * 240; - framebuffer_top.color_format.Assign(Regs::PixelFormat::RGB8); - framebuffer_top.active_fb = 0; - - framebuffer_sub.width.Assign(240); - framebuffer_sub.height.Assign(320); - framebuffer_sub.stride = 3 * 240; - framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8); - framebuffer_sub.active_fb = 0; - - Core::Timing& timing = Core::System::GetInstance().CoreTiming(); - vblank_event = timing.RegisterEvent("GPU::VBlankCallback", VBlankCallback); - timing.ScheduleEvent(frame_ticks, vblank_event); - - LOG_DEBUG(HW_GPU, "initialized OK"); -} - -/// Shutdown hardware -void Shutdown() { - LOG_DEBUG(HW_GPU, "shutdown OK"); -} - -} // namespace GPU diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h deleted file mode 100644 index 418ab9747..000000000 --- a/src/core/hw/gpu.h +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "core/core_timing.h" - -namespace Memory { -class MemorySystem; -} - -namespace GPU { - -// Measured on hardware to be 2240568 timer cycles or 4481136 ARM11 cycles -constexpr u64 frame_ticks = 4481136ull; - -// Refresh rate defined by ratio of ARM11 frequency to ARM11 ticks per frame -// (268,111,856) / (4,481,136) = 59.83122493939037Hz -constexpr double SCREEN_REFRESH_RATE = BASE_CLOCK_RATE_ARM11 / static_cast(frame_ticks); - -// Returns index corresponding to the Regs member labeled by field_name -#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32)) - -// Returns index corresponding to the Regs::FramebufferConfig labeled by field_name -// screen_id is a subscript for Regs::framebuffer_config -#define GPU_FRAMEBUFFER_REG_INDEX(screen_id, field_name) \ - ((offsetof(GPU::Regs, framebuffer_config) + \ - sizeof(GPU::Regs::FramebufferConfig) * (screen_id) + \ - offsetof(GPU::Regs::FramebufferConfig, field_name)) / \ - sizeof(u32)) - -// MMIO region 0x1EFxxxxx -struct Regs { - -// helper macro to make sure the defined structures are of the expected size. -#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ - static_assert(sizeof(name) == size_in_bytes, \ - "Structure size and register block length don't match") - - // Components are laid out in reverse byte order, most significant bits first. - enum class PixelFormat : u32 { - RGBA8 = 0, - RGB8 = 1, - RGB565 = 2, - RGB5A1 = 3, - RGBA4 = 4, - }; - - /** - * Returns the number of bytes per pixel. - */ - static int BytesPerPixel(PixelFormat format) { - switch (format) { - case PixelFormat::RGBA8: - return 4; - case PixelFormat::RGB8: - return 3; - case PixelFormat::RGB565: - case PixelFormat::RGB5A1: - case PixelFormat::RGBA4: - return 2; - default: - UNREACHABLE(); - } - - return 0; - } - - INSERT_PADDING_WORDS(0x4); - - struct MemoryFillConfig { - u32 address_start; - u32 address_end; - - union { - u32 value_32bit; - - BitField<0, 16, u32> value_16bit; - - // TODO: Verify component order - BitField<0, 8, u32> value_24bit_r; - BitField<8, 8, u32> value_24bit_g; - BitField<16, 8, u32> value_24bit_b; - }; - - union { - u32 control; - - // Setting this field to 1 triggers the memory fill. - // This field also acts as a status flag, and gets reset to 0 upon completion. - BitField<0, 1, u32> trigger; - - // Set to 1 upon completion. - BitField<1, 1, u32> finished; - - // If both of these bits are unset, then it will fill the memory with a 16 bit value - // 1: fill with 24-bit wide values - BitField<8, 1, u32> fill_24bit; - // 1: fill with 32-bit wide values - BitField<9, 1, u32> fill_32bit; - }; - - inline u32 GetStartAddress() const { - return DecodeAddressRegister(address_start); - } - - inline u32 GetEndAddress() const { - return DecodeAddressRegister(address_end); - } - - inline std::string DebugName() const { - return fmt::format("from {:#X} to {:#X} with {}-bit value {:#X}", GetStartAddress(), - GetEndAddress(), fill_32bit ? "32" : (fill_24bit ? "24" : "16"), - value_32bit); - } - } memory_fill_config[2]; - ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10); - - INSERT_PADDING_WORDS(0x10b); - - struct FramebufferConfig { - union { - u32 size; - - BitField<0, 16, u32> width; - BitField<16, 16, u32> height; - }; - - INSERT_PADDING_WORDS(0x2); - - u32 address_left1; - u32 address_left2; - - union { - u32 format; - - BitField<0, 3, PixelFormat> color_format; - }; - - INSERT_PADDING_WORDS(0x1); - - union { - u32 active_fb; - - // 0: Use parameters ending with "1" - // 1: Use parameters ending with "2" - BitField<0, 1, u32> second_fb_active; - }; - - INSERT_PADDING_WORDS(0x5); - - // Distance between two pixel rows, in bytes - u32 stride; - - u32 address_right1; - u32 address_right2; - - INSERT_PADDING_WORDS(0x30); - } framebuffer_config[2]; - ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100); - - INSERT_PADDING_WORDS(0x169); - - struct DisplayTransferConfig { - u32 input_address; - u32 output_address; - - inline u32 GetPhysicalInputAddress() const { - return DecodeAddressRegister(input_address); - } - - inline u32 GetPhysicalOutputAddress() const { - return DecodeAddressRegister(output_address); - } - - inline std::string DebugName() const noexcept { - return fmt::format("from {:#x} to {:#x} with {} scaling and stride {}, width {}", - GetPhysicalInputAddress(), GetPhysicalOutputAddress(), - scaling == NoScale ? "no" : (scaling == ScaleX ? "X" : "XY"), - input_width.Value(), output_width.Value()); - } - - union { - u32 output_size; - - BitField<0, 16, u32> output_width; - BitField<16, 16, u32> output_height; - }; - - union { - u32 input_size; - - BitField<0, 16, u32> input_width; - BitField<16, 16, u32> input_height; - }; - - enum ScalingMode : u32 { - NoScale = 0, // Doesn't scale the image - ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter - ScaleXY = - 2, // Downscales the image in half in both the X and Y axes and applies a box filter - }; - - union { - u32 flags; - - BitField<0, 1, u32> flip_vertically; // flips input data vertically - BitField<1, 1, u32> input_linear; // Converts from linear to tiled format - BitField<2, 1, u32> crop_input_lines; - BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any - // processing and respecting texture copy fields - BitField<5, 1, u32> dont_swizzle; - BitField<8, 3, PixelFormat> input_format; - BitField<12, 3, PixelFormat> output_format; - /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. - BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented - BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer - }; - - INSERT_PADDING_WORDS(0x1); - - // it seems that writing to this field triggers the display transfer - u32 trigger; - - INSERT_PADDING_WORDS(0x1); - - struct { - u32 size; // The lower 4 bits are ignored - - union { - u32 input_size; - - BitField<0, 16, u32> input_width; - BitField<16, 16, u32> input_gap; - }; - - union { - u32 output_size; - - BitField<0, 16, u32> output_width; - BitField<16, 16, u32> output_gap; - }; - } texture_copy; - } display_transfer_config; - ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); - - INSERT_PADDING_WORDS(0x32D); - - struct { - // command list size (in bytes) - u32 size; - - INSERT_PADDING_WORDS(0x1); - - // command list address - u32 address; - - INSERT_PADDING_WORDS(0x1); - - // it seems that writing to this field triggers command list processing - u32 trigger; - - inline u32 GetPhysicalAddress() const { - return DecodeAddressRegister(address); - } - } command_processor_config; - ASSERT_MEMBER_SIZE(command_processor_config, 0x14); - - INSERT_PADDING_WORDS(0x9c3); - - static constexpr std::size_t NumIds() { - return sizeof(Regs) / sizeof(u32); - } - - const u32& operator[](int index) const { - const u32* content = reinterpret_cast(this); - return content[index]; - } - - u32& operator[](int index) { - u32* content = reinterpret_cast(this); - return content[index]; - } - -#undef ASSERT_MEMBER_SIZE - -private: - /* - * Most physical addresses which GPU registers refer to are 8-byte aligned. - * This function should be used to get the address from a raw register value. - */ - static inline u32 DecodeAddressRegister(u32 register_value) { - return register_value * 8; - } - - template - void serialize(Archive& ar, const unsigned int) { - ar& boost::serialization::make_binary_object(this, sizeof(Regs)); - } - friend class boost::serialization::access; -}; -static_assert(std::is_standard_layout::value, "Structure does not use standard layout"); - -#define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ - "Field " #field_name " has invalid position") - -ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); -ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); -ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); -ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); -ASSERT_REG_POSITION(display_transfer_config, 0x00300); -ASSERT_REG_POSITION(command_processor_config, 0x00638); - -#undef ASSERT_REG_POSITION - -// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value -// anyway. -static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); - -extern Regs g_regs; - -template -void Read(T& var, const u32 addr); - -template -void Write(u32 addr, const T data); - -/// Initialize hardware -void Init(Memory::MemorySystem& memory); - -/// Shutdown hardware -void Shutdown(); - -} // namespace GPU diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp deleted file mode 100644 index e7d80a5c7..000000000 --- a/src/core/hw/hw.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "common/logging/log.h" -#include "core/hw/aes/key.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" - -namespace HW { - -template -inline void Read(T& var, const u32 addr) { - switch (addr & 0xFFFFF000) { - case VADDR_GPU: - case VADDR_GPU + 0x1000: - case VADDR_GPU + 0x2000: - case VADDR_GPU + 0x3000: - case VADDR_GPU + 0x4000: - case VADDR_GPU + 0x5000: - case VADDR_GPU + 0x6000: - case VADDR_GPU + 0x7000: - case VADDR_GPU + 0x8000: - case VADDR_GPU + 0x9000: - case VADDR_GPU + 0xA000: - case VADDR_GPU + 0xB000: - case VADDR_GPU + 0xC000: - case VADDR_GPU + 0xD000: - case VADDR_GPU + 0xE000: - case VADDR_GPU + 0xF000: - GPU::Read(var, addr); - break; - case VADDR_LCD: - LCD::Read(var, addr); - break; - default: - LOG_ERROR(HW_Memory, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); - } -} - -template -inline void Write(u32 addr, const T data) { - switch (addr & 0xFFFFF000) { - case VADDR_GPU: - case VADDR_GPU + 0x1000: - case VADDR_GPU + 0x2000: - case VADDR_GPU + 0x3000: - case VADDR_GPU + 0x4000: - case VADDR_GPU + 0x5000: - case VADDR_GPU + 0x6000: - case VADDR_GPU + 0x7000: - case VADDR_GPU + 0x8000: - case VADDR_GPU + 0x9000: - case VADDR_GPU + 0xA000: - case VADDR_GPU + 0xB000: - case VADDR_GPU + 0xC000: - case VADDR_GPU + 0xD000: - case VADDR_GPU + 0xE000: - case VADDR_GPU + 0xF000: - GPU::Write(addr, data); - break; - case VADDR_LCD: - LCD::Write(addr, data); - break; - default: - LOG_ERROR(HW_Memory, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, - addr); - } -} - -// Explicitly instantiate template functions because we aren't defining this in the header: - -template void Read(u64& var, const u32 addr); -template void Read(u32& var, const u32 addr); -template void Read(u16& var, const u32 addr); -template void Read(u8& var, const u32 addr); - -template void Write(u32 addr, const u64 data); -template void Write(u32 addr, const u32 data); -template void Write(u32 addr, const u16 data); -template void Write(u32 addr, const u8 data); - -/// Update hardware -void Update() {} - -/// Initialize hardware -void Init(Memory::MemorySystem& memory) { - AES::InitKeys(); - GPU::Init(memory); - LCD::Init(); - LOG_DEBUG(HW, "initialized OK"); -} - -/// Shutdown hardware -void Shutdown() { - GPU::Shutdown(); - LCD::Shutdown(); - LOG_DEBUG(HW, "shutdown OK"); -} -} // namespace HW diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h deleted file mode 100644 index cc7b04294..000000000 --- a/src/core/hw/hw.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Memory { -class MemorySystem; -} - -namespace HW { - -/// Beginnings of IO register regions, in the user VA space. -enum : u32 { - VADDR_HASH = 0x1EC01000, - VADDR_CSND = 0x1EC03000, - VADDR_DSP = 0x1EC40000, - VADDR_PDN = 0x1EC41000, - VADDR_CODEC = 0x1EC41000, - VADDR_SPI = 0x1EC42000, - VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? - VADDR_I2C = 0x1EC44000, - VADDR_CODEC_2 = 0x1EC45000, - VADDR_HID = 0x1EC46000, - VADDR_GPIO = 0x1EC47000, - VADDR_I2C_2 = 0x1EC48000, - VADDR_SPI_3 = 0x1EC60000, - VADDR_I2C_3 = 0x1EC61000, - VADDR_MIC = 0x1EC62000, - VADDR_PXI = 0x1EC63000, - VADDR_LCD = 0x1ED02000, - VADDR_DSP_2 = 0x1ED03000, - VADDR_HASH_2 = 0x1EE01000, - VADDR_GPU = 0x1EF00000, -}; - -template -void Read(T& var, const u32 addr); - -template -void Write(u32 addr, const T data); - -/// Update hardware -void Update(); - -/// Initialize hardware -void Init(Memory::MemorySystem& memory); - -/// Shutdown hardware -void Shutdown(); - -} // namespace HW diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp deleted file mode 100644 index 0eabf9507..000000000 --- a/src/core/hw/lcd.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include "common/common_types.h" -#include "common/logging/log.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" -#include "core/tracer/recorder.h" -#include "video_core/debug_utils/debug_utils.h" - -namespace LCD { - -Regs g_regs; - -template -inline void Read(T& var, const u32 raw_addr) { - u32 addr = raw_addr - HW::VADDR_LCD; - u32 index = addr / 4; - - // Reads other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= 0x400 || !std::is_same::value) { - LOG_ERROR(HW_LCD, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); - return; - } - - var = g_regs[index]; -} - -template -inline void Write(u32 addr, const T data) { - addr -= HW::VADDR_LCD; - u32 index = addr / 4; - - // Writes other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= 0x400 || !std::is_same::value) { - LOG_ERROR(HW_LCD, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, addr); - return; - } - - g_regs[index] = static_cast(data); - - // Notify tracer about the register write - // This is happening *after* handling the write to make sure we properly catch all memory reads. - if (Pica::g_debug_context && Pica::g_debug_context->recorder) { - // addr + GPU VBase - IO VBase + IO PBase - Pica::g_debug_context->recorder->RegisterWritten( - addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); - } -} - -// Explicitly instantiate template functions because we aren't defining this in the header: - -template void Read(u64& var, const u32 addr); -template void Read(u32& var, const u32 addr); -template void Read(u16& var, const u32 addr); -template void Read(u8& var, const u32 addr); - -template void Write(u32 addr, const u64 data); -template void Write(u32 addr, const u32 data); -template void Write(u32 addr, const u16 data); -template void Write(u32 addr, const u8 data); - -/// Initialize hardware -void Init() { - std::memset(&g_regs, 0, sizeof(g_regs)); - LOG_DEBUG(HW_LCD, "initialized OK"); -} - -/// Shutdown hardware -void Shutdown() { - LOG_DEBUG(HW_LCD, "shutdown OK"); -} - -} // namespace LCD diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 5ea646924..dab2bdcca 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp @@ -9,7 +9,7 @@ #include "common/assert.h" #include "common/color.h" #include "common/common_types.h" -#include "common/microprofileui.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "core/core.h" #include "core/hle/service/cam/y2r_u.h" diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a57d0d265..0f2ef021d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -19,10 +19,9 @@ #include "core/global.h" #include "core/hle/kernel/process.h" #include "core/hle/service/plgldr/plgldr.h" -#include "core/hw/hw.h" #include "core/memory.h" +#include "video_core/gpu.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" SERIALIZE_EXPORT_IMPL(Memory::MemorySystem::BackingMemImpl) SERIALIZE_EXPORT_IMPL(Memory::MemorySystem::BackingMemImpl) @@ -346,13 +345,52 @@ std::shared_ptr MemorySystem::GetCurrentPageTable() const { return impl->current_page_table; } +void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { + const VAddr end = start + size; + + auto CheckRegion = [&](VAddr region_start, VAddr region_end, PAddr paddr_region_start) { + if (start >= region_end || end <= region_start) { + // No overlap with region + return; + } + + auto& renderer = Core::System::GetInstance().GPU().Renderer(); + VAddr overlap_start = std::max(start, region_start); + VAddr overlap_end = std::min(end, region_end); + PAddr physical_start = paddr_region_start + (overlap_start - region_start); + u32 overlap_size = overlap_end - overlap_start; + + auto* rasterizer = renderer.Rasterizer(); + switch (mode) { + case FlushMode::Flush: + rasterizer->FlushRegion(physical_start, overlap_size); + break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; + case FlushMode::FlushAndInvalidate: + rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); + break; + } + }; + + CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END, FCRAM_PADDR); + CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END, FCRAM_PADDR); + CheckRegion(VRAM_VADDR, VRAM_VADDR_END, VRAM_PADDR); + if (Service::PLGLDR::PLG_LDR::GetPluginFBAddr()) + CheckRegion(PLUGIN_3GX_FB_VADDR, PLUGIN_3GX_FB_VADDR_END, + Service::PLGLDR::PLG_LDR::GetPluginFBAddr()); +} + void MemorySystem::MapPages(PageTable& page_table, u32 base, u32 size, MemoryRef memory, PageType type) { LOG_DEBUG(HW_Memory, "Mapping {} onto {:08X}-{:08X}", (void*)memory.GetPtr(), base * CITRA_PAGE_SIZE, (base + size) * CITRA_PAGE_SIZE); - RasterizerFlushVirtualRegion(base << CITRA_PAGE_BITS, size * CITRA_PAGE_SIZE, - FlushMode::FlushAndInvalidate); + if (impl->system.IsPoweredOn()) { + RasterizerFlushVirtualRegion(base << CITRA_PAGE_BITS, size * CITRA_PAGE_SIZE, + FlushMode::FlushAndInvalidate); + } u32 end = base + size; while (base != end) { @@ -421,9 +459,8 @@ T MemorySystem::Read(const VAddr vaddr) { return value; } else if ((paddr & 0xF0000000) == 0x10000000 && paddr >= Memory::IO_AREA_PADDR) { // Check MMIO region - T ret; - HW::Read(ret, static_cast(paddr) - Memory::IO_AREA_PADDR + 0x1EC00000); - return ret; + return impl->system.GPU().ReadReg(static_cast(paddr) - Memory::IO_AREA_PADDR + + 0x1EC00000); } } @@ -468,7 +505,10 @@ void MemorySystem::Write(const VAddr vaddr, const T data) { return; } else if ((paddr & 0xF0000000) == 0x10000000 && paddr >= Memory::IO_AREA_PADDR) { // Check MMIO region - HW::Write(static_cast(paddr) - Memory::IO_AREA_PADDR + 0x1EC00000, data); + ASSERT(sizeof(data) == sizeof(u32)); + impl->system.GPU().WriteReg(static_cast(paddr) - Memory::IO_AREA_PADDR + + 0x1EC00000, + static_cast(data)); return; } } @@ -713,84 +753,6 @@ void MemorySystem::RasterizerMarkRegionCached(PAddr start, u32 size, bool cached } } -void RasterizerFlushRegion(PAddr start, u32 size) { - if (VideoCore::g_renderer == nullptr) { - return; - } - - VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); -} - -void RasterizerInvalidateRegion(PAddr start, u32 size) { - if (VideoCore::g_renderer == nullptr) { - return; - } - - VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); -} - -void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { - // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be - // null here - if (VideoCore::g_renderer == nullptr) { - return; - } - - VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); -} - -void RasterizerClearAll(bool flush) { - // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be - // null here - if (VideoCore::g_renderer == nullptr) { - return; - } - - VideoCore::g_renderer->Rasterizer()->ClearAll(flush); -} - -void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { - // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be - // null here - if (VideoCore::g_renderer == nullptr) { - return; - } - - VAddr end = start + size; - - auto CheckRegion = [&](VAddr region_start, VAddr region_end, PAddr paddr_region_start) { - if (start >= region_end || end <= region_start) { - // No overlap with region - return; - } - - VAddr overlap_start = std::max(start, region_start); - VAddr overlap_end = std::min(end, region_end); - PAddr physical_start = paddr_region_start + (overlap_start - region_start); - u32 overlap_size = overlap_end - overlap_start; - - auto* rasterizer = VideoCore::g_renderer->Rasterizer(); - switch (mode) { - case FlushMode::Flush: - rasterizer->FlushRegion(physical_start, overlap_size); - break; - case FlushMode::Invalidate: - rasterizer->InvalidateRegion(physical_start, overlap_size); - break; - case FlushMode::FlushAndInvalidate: - rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); - break; - } - }; - - CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END, FCRAM_PADDR); - CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END, FCRAM_PADDR); - CheckRegion(VRAM_VADDR, VRAM_VADDR_END, VRAM_PADDR); - if (Service::PLGLDR::PLG_LDR::GetPluginFBAddr()) - CheckRegion(PLUGIN_3GX_FB_VADDR, PLUGIN_3GX_FB_VADDR_END, - Service::PLGLDR::PLG_LDR::GetPluginFBAddr()); -} - u8 MemorySystem::Read8(const VAddr addr) { return Read(addr); } diff --git a/src/core/memory.h b/src/core/memory.h index 4f74039e5..24ece022a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -226,21 +226,6 @@ enum : VAddr { PLUGIN_3GX_FB_VADDR_END = PLUGIN_3GX_FB_VADDR + PLUGIN_3GX_FB_SIZE }; -/** - * Flushes any externally cached rasterizer resources touching the given region. - */ -void RasterizerFlushRegion(PAddr start, u32 size); - -/** - * Invalidates any externally cached rasterizer resources touching the given region. - */ -void RasterizerInvalidateRegion(PAddr start, u32 size); - -/** - * Flushes and invalidates any externally cached rasterizer resources touching the given region. - */ -void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); - enum class FlushMode { /// Write back modified surfaces to RAM Flush, @@ -250,16 +235,6 @@ enum class FlushMode { FlushAndInvalidate, }; -/** - * Flushes and invalidates all memory in the rasterizer cache and removes any leftover state - * If flush is true, the rasterizer should flush any cached resources to RAM before clearing - */ -void RasterizerClearAll(bool flush); - -/** - * Flushes and invalidates any externally cached rasterizer resources touching the given virtual - * address region. - */ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode); class MemorySystem { diff --git a/src/core/movie.cpp b/src/core/movie.cpp index 3602ae9b6..1dba19ef9 100644 --- a/src/core/movie.cpp +++ b/src/core/movie.cpp @@ -21,7 +21,6 @@ #include "core/hle/service/hid/hid.h" #include "core/hle/service/ir/extra_hid.h" #include "core/hle/service/ir/ir_rst.h" -#include "core/hw/gpu.h" #include "core/loader/loader.h" #include "core/movie.h" @@ -218,10 +217,10 @@ Movie::PlayMode Movie::GetPlayMode() const { } u64 Movie::GetCurrentInputIndex() const { - return static_cast(std::nearbyint(current_input / 234.0 * GPU::SCREEN_REFRESH_RATE)); + return static_cast(std::nearbyint(current_input / 234.0 * SCREEN_REFRESH_RATE)); } u64 Movie::GetTotalInputCount() const { - return static_cast(std::nearbyint(total_input / 234.0 * GPU::SCREEN_REFRESH_RATE)); + return static_cast(std::nearbyint(total_input / 234.0 * SCREEN_REFRESH_RATE)); } void Movie::CheckInputEnd() { diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index 038dc82ef..0d30a9187 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -13,8 +13,9 @@ #include #include "common/file_util.h" #include "common/settings.h" -#include "core/hw/gpu.h" +#include "core/core_timing.h" #include "core/perf_stats.h" +#include "video_core/gpu.h" using namespace std::chrono_literals; using DoubleSecs = std::chrono::duration; @@ -120,7 +121,7 @@ PerfStats::Results PerfStats::GetLastStats() { double PerfStats::GetLastFrameTimeScale() const { std::scoped_lock lock{object_mutex}; - constexpr double FRAME_LENGTH = 1.0 / GPU::SCREEN_REFRESH_RATE; + constexpr double FRAME_LENGTH = 1.0 / SCREEN_REFRESH_RATE; return duration_cast(previous_frame_length).count() / FRAME_LENGTH; } diff --git a/src/core/tracer/citrace.h b/src/core/tracer/citrace.h index 21fdc127a..629baba4a 100644 --- a/src/core/tracer/citrace.h +++ b/src/core/tracer/citrace.h @@ -75,16 +75,7 @@ struct CTMemoryLoad { struct CTRegisterWrite { u32 physical_address; - - enum : u32 { - SIZE_8 = 0xD1, - SIZE_16 = 0xD2, - SIZE_32 = 0xD3, - SIZE_64 = 0xD4, - } size; - - // TODO: Make it clearer which bits of this member are used for sizes other than 32 bits - u64 value; + u32 value; }; struct CTStreamElement { diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp index a47632d5a..a6cf02954 100644 --- a/src/core/tracer/recorder.cpp +++ b/src/core/tracer/recorder.cpp @@ -22,9 +22,8 @@ void Recorder::Finish(const std::string& filename) { // Calculate file offsets auto& initial = header.initial_state_offsets; - initial.gpu_registers_size = static_cast(initial_state.gpu_registers.size()); - initial.lcd_registers_size = static_cast(initial_state.lcd_registers.size()); initial.pica_registers_size = static_cast(initial_state.pica_registers.size()); + initial.lcd_registers_size = static_cast(initial_state.lcd_registers.size()); initial.default_attributes_size = static_cast(initial_state.default_attributes.size()); initial.vs_program_binary_size = static_cast(initial_state.vs_program_binary.size()); initial.vs_swizzle_data_size = static_cast(initial_state.vs_swizzle_data.size()); @@ -81,22 +80,17 @@ void Recorder::Finish(const std::string& filename) { throw "Failed to write header"; // Write initial state - written = - file.WriteArray(initial_state.gpu_registers.data(), initial_state.gpu_registers.size()); - if (written != initial_state.gpu_registers.size() || file.Tell() != initial.lcd_registers) - throw "Failed to write GPU registers"; - - written = - file.WriteArray(initial_state.lcd_registers.data(), initial_state.lcd_registers.size()); - if (written != initial_state.lcd_registers.size() || file.Tell() != initial.pica_registers) - throw "Failed to write LCD registers"; - written = file.WriteArray(initial_state.pica_registers.data(), initial_state.pica_registers.size()); if (written != initial_state.pica_registers.size() || file.Tell() != initial.default_attributes) throw "Failed to write Pica registers"; + written = + file.WriteArray(initial_state.lcd_registers.data(), initial_state.lcd_registers.size()); + if (written != initial_state.lcd_registers.size() || file.Tell() != initial.pica_registers) + throw "Failed to write LCD registers"; + written = file.WriteArray(initial_state.default_attributes.data(), initial_state.default_attributes.size()); if (written != initial_state.default_attributes.size() || @@ -187,21 +181,12 @@ void Recorder::MemoryAccessed(const u8* data, u32 size, u32 physical_address) { stream.push_back(element); } -template -void Recorder::RegisterWritten(u32 physical_address, T value) { +void Recorder::RegisterWritten(u32 physical_address, u32 value) { StreamElement element = {{RegisterWrite}}; - element.data.register_write.size = (sizeof(T) == 1) ? CTRegisterWrite::SIZE_8 - : (sizeof(T) == 2) ? CTRegisterWrite::SIZE_16 - : (sizeof(T) == 4) ? CTRegisterWrite::SIZE_32 - : CTRegisterWrite::SIZE_64; element.data.register_write.physical_address = physical_address; element.data.register_write.value = value; stream.push_back(element); } -template void Recorder::RegisterWritten(u32, u8); -template void Recorder::RegisterWritten(u32, u16); -template void Recorder::RegisterWritten(u32, u32); -template void Recorder::RegisterWritten(u32, u64); } // namespace CiTrace diff --git a/src/core/tracer/recorder.h b/src/core/tracer/recorder.h index 8b7c7d999..114f22245 100644 --- a/src/core/tracer/recorder.h +++ b/src/core/tracer/recorder.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -17,7 +16,6 @@ namespace CiTrace { class Recorder { public: struct InitialState { - std::vector gpu_registers; std::vector lcd_registers; std::vector pica_registers; std::vector default_attributes; @@ -52,8 +50,7 @@ public: * Record a register write. * @note Use this whenever a GPU-related MMIO register has been written to. */ - template - void RegisterWritten(u32 physical_address, T value); + void RegisterWritten(u32 physical_address, u32 value); private: // Initial state of recording start diff --git a/src/tests/video_core/shader/shader_jit_compiler.cpp b/src/tests/video_core/shader/shader_jit_compiler.cpp index c76a2d573..01698a2e1 100644 --- a/src/tests/video_core/shader/shader_jit_compiler.cpp +++ b/src/tests/video_core/shader/shader_jit_compiler.cpp @@ -13,6 +13,8 @@ #include #include #include +#include "video_core/pica/shader_setup.h" +#include "video_core/pica/shader_unit.h" #include "video_core/shader/shader_interpreter.h" #if CITRA_ARCH(x86_64) #include "video_core/shader/shader_jit_x64_compiler.h" @@ -54,11 +56,11 @@ struct StringMaker { }; } // namespace Catch -static std::unique_ptr CompileShaderSetup( +static std::unique_ptr CompileShaderSetup( std::initializer_list code) { const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); - auto shader = std::make_unique(); + auto shader = std::make_unique(); std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(), [](const auto& x) { return x.hex; }); @@ -75,18 +77,16 @@ public: shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); } - explicit ShaderTest(std::unique_ptr input_shader_setup) + explicit ShaderTest(std::unique_ptr input_shader_setup) : shader_setup(std::move(input_shader_setup)) { shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); } Common::Vec4f Run(std::span inputs) { - Pica::Shader::UnitState shader_unit; + Pica::ShaderUnit shader_unit; RunJit(shader_unit, inputs); - return {shader_unit.registers.output[0].x.ToFloat32(), - shader_unit.registers.output[0].y.ToFloat32(), - shader_unit.registers.output[0].z.ToFloat32(), - shader_unit.registers.output[0].w.ToFloat32()}; + return {shader_unit.output[0].x.ToFloat32(), shader_unit.output[0].y.ToFloat32(), + shader_unit.output[0].z.ToFloat32(), shader_unit.output[0].w.ToFloat32()}; } Common::Vec4f Run(std::initializer_list inputs) { @@ -105,39 +105,36 @@ public: return Run(std::vector{inputs}); } - void RunJit(Pica::Shader::UnitState& shader_unit, std::span inputs) { + void RunJit(Pica::ShaderUnit& shader_unit, std::span inputs) { for (std::size_t i = 0; i < inputs.size(); ++i) { const Common::Vec4f& input = inputs[i]; - shader_unit.registers.input[i].x = Pica::f24::FromFloat32(input.x); - shader_unit.registers.input[i].y = Pica::f24::FromFloat32(input.y); - shader_unit.registers.input[i].z = Pica::f24::FromFloat32(input.z); - shader_unit.registers.input[i].w = Pica::f24::FromFloat32(input.w); + shader_unit.input[i].x = Pica::f24::FromFloat32(input.x); + shader_unit.input[i].y = Pica::f24::FromFloat32(input.y); + shader_unit.input[i].z = Pica::f24::FromFloat32(input.z); + shader_unit.input[i].w = Pica::f24::FromFloat32(input.w); } - shader_unit.registers.temporary.fill( - Common::Vec4::AssignToAll(Pica::f24::Zero())); + shader_unit.temporary.fill(Common::Vec4::AssignToAll(Pica::f24::Zero())); shader_jit.Run(*shader_setup, shader_unit, 0); } - void RunJit(Pica::Shader::UnitState& shader_unit, float input) { + void RunJit(Pica::ShaderUnit& shader_unit, float input) { const Common::Vec4f input_vec(input, 0, 0, 0); RunJit(shader_unit, {&input_vec, 1}); } - void RunInterpreter(Pica::Shader::UnitState& shader_unit, - std::span inputs) { + void RunInterpreter(Pica::ShaderUnit& shader_unit, std::span inputs) { for (std::size_t i = 0; i < inputs.size(); ++i) { const Common::Vec4f& input = inputs[i]; - shader_unit.registers.input[i].x = Pica::f24::FromFloat32(input.x); - shader_unit.registers.input[i].y = Pica::f24::FromFloat32(input.y); - shader_unit.registers.input[i].z = Pica::f24::FromFloat32(input.z); - shader_unit.registers.input[i].w = Pica::f24::FromFloat32(input.w); + shader_unit.input[i].x = Pica::f24::FromFloat32(input.x); + shader_unit.input[i].y = Pica::f24::FromFloat32(input.y); + shader_unit.input[i].z = Pica::f24::FromFloat32(input.z); + shader_unit.input[i].w = Pica::f24::FromFloat32(input.w); } - shader_unit.registers.temporary.fill( - Common::Vec4::AssignToAll(Pica::f24::Zero())); + shader_unit.temporary.fill(Common::Vec4::AssignToAll(Pica::f24::Zero())); shader_interpreter.Run(*shader_setup, shader_unit); } - void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) { + void RunInterpreter(Pica::ShaderUnit& shader_unit, float input) { const Common::Vec4f input_vec(input, 0, 0, 0); RunInterpreter(shader_unit, {&input_vec, 1}); } @@ -145,7 +142,7 @@ public: public: JitShader shader_jit; ShaderInterpreter shader_interpreter; - std::unique_ptr shader_setup; + std::unique_ptr shader_setup; }; TEST_CASE("ADD", "[video_core][shader][shader_jit]") { @@ -642,11 +639,11 @@ TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") { input) + input; - Pica::Shader::UnitState shader_unit_jit; + Pica::ShaderUnit shader_unit_jit; shader_test.RunJit(shader_unit_jit, input); REQUIRE(shader_unit_jit.address_registers[2] == expected_aL); - REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Catch::Approx(expected_out)); + REQUIRE(shader_unit_jit.output[0].x.ToFloat32() == Catch::Approx(expected_out)); } { shader_test.shader_setup->uniforms.i[0] = {9, 0, 2, 0}; @@ -659,11 +656,11 @@ TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") { (shader_test.shader_setup->uniforms.i[1][0] + 1)) * input) + input; - Pica::Shader::UnitState shader_unit_jit; + Pica::ShaderUnit shader_unit_jit; shader_test.RunJit(shader_unit_jit, input); REQUIRE(shader_unit_jit.address_registers[2] == expected_aL); - REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Catch::Approx(expected_out)); + REQUIRE(shader_unit_jit.output[0].x.ToFloat32() == Catch::Approx(expected_out)); } } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 50c262bec..de0fdcf4a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,8 +1,6 @@ add_subdirectory(host_shaders) add_library(video_core STATIC - command_processor.cpp - command_processor.h custom_textures/custom_format.cpp custom_textures/custom_format.h custom_textures/custom_tex_manager.cpp @@ -11,29 +9,41 @@ add_library(video_core STATIC custom_textures/material.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h - geometry_pipeline.cpp - geometry_pipeline.h + gpu.cpp + gpu.h gpu_debugger.h - pica.cpp - pica.h - pica_state.h pica_types.h precompiled_headers.h - primitive_assembly.cpp - primitive_assembly.h rasterizer_accelerated.cpp rasterizer_accelerated.h rasterizer_interface.h - regs.cpp - regs.h - regs_framebuffer.h - regs_lighting.h - regs_pipeline.h - regs_rasterizer.h - regs_shader.h - regs_texturing.h renderer_base.cpp renderer_base.h + pica/geometry_pipeline.cpp + pica/geometry_pipeline.h + pica/pica_core.cpp + pica/pica_core.h + pica/output_vertex.cpp + pica/output_vertex.h + pica/primitive_assembly.cpp + pica/primitive_assembly.h + pica/regs_external.h + pica/regs_framebuffer.h + pica/regs_internal.cpp + pica/regs_internal.h + pica/regs_lcd.h + pica/regs_lighting.h + pica/regs_pipeline.h + pica/regs_rasterizer.h + pica/regs_shader.h + pica/regs_texturing.h + pica/shader_setup.cpp + pica/shader_setup.h + pica/shader_unit.cpp + pica/shader_unit.h + pica/packed_attribute.h + pica/vertex_loader.cpp + pica/vertex_loader.h rasterizer_cache/framebuffer_base.h rasterizer_cache/pixel_format.cpp rasterizer_cache/pixel_format.h @@ -84,6 +94,8 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_software/renderer_software.cpp renderer_software/renderer_software.h + renderer_software/sw_blitter.cpp + renderer_software/sw_blitter.h renderer_software/sw_clipper.cpp renderer_software/sw_clipper.h renderer_software/sw_framebuffer.cpp @@ -167,8 +179,6 @@ add_library(video_core STATIC texture/texture_decode.cpp texture/texture_decode.h utils.h - vertex_loader.cpp - vertex_loader.h video_core.cpp video_core.h ) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp deleted file mode 100644 index a6f2eb73f..000000000 --- a/src/video_core/command_processor.cpp +++ /dev/null @@ -1,677 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/vector_math.h" -#include "core/hle/service/gsp/gsp.h" -#include "core/hw/gpu.h" -#include "core/memory.h" -#include "core/tracer/recorder.h" -#include "video_core/command_processor.h" -#include "video_core/debug_utils/debug_utils.h" -#include "video_core/pica_state.h" -#include "video_core/pica_types.h" -#include "video_core/primitive_assembly.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/regs.h" -#include "video_core/regs_pipeline.h" -#include "video_core/regs_texturing.h" -#include "video_core/renderer_base.h" -#include "video_core/shader/shader.h" -#include "video_core/vertex_loader.h" -#include "video_core/video_core.h" - -namespace Pica::CommandProcessor { - -// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF -constexpr std::array expand_bits_to_bytes{ - 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, - 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, -}; - -MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); - -static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) { - if (&setup == &g_state.vs) { - return "vertex shader"; - } - if (&setup == &g_state.gs) { - return "geometry shader"; - } - return "unknown shader"; -} - -static void WriteUniformBoolReg(Shader::ShaderSetup& setup, u32 value) { - for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) - setup.uniforms.b[i] = (value & (1 << i)) != 0; -} - -static void WriteUniformIntReg(Shader::ShaderSetup& setup, unsigned index, - const Common::Vec4& values) { - ASSERT(index < setup.uniforms.i.size()); - setup.uniforms.i[index] = values; - LOG_TRACE(HW_GPU, "Set {} integer uniform {} to {:02x} {:02x} {:02x} {:02x}", - GetShaderSetupTypeName(setup), index, values.x, values.y, values.z, values.w); -} - -static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, - int& float_regs_counter, std::array& uniform_write_buffer, - u32 value) { - auto& uniform_setup = config.uniform_setup; - - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - uniform_write_buffer[float_regs_counter++] = value; - - // Uniforms are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || - (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { - float_regs_counter = 0; - - if (uniform_setup.index >= setup.uniforms.f.size()) { - LOG_ERROR(HW_GPU, "Invalid {} float uniform index {}", GetShaderSetupTypeName(setup), - (int)uniform_setup.index); - } else { - auto& uniform = setup.uniforms.f[uniform_setup.index]; - - // NOTE: The destination component order indeed is "backwards" - if (uniform_setup.IsFloat32()) { - for (auto i : {0, 1, 2, 3}) { - float buffer_value; - std::memcpy(&buffer_value, &uniform_write_buffer[i], sizeof(float)); - uniform[3 - i] = f24::FromFloat32(buffer_value); - } - } else { - // TODO: Untested - uniform.w = f24::FromRaw(uniform_write_buffer[0] >> 8); - uniform.z = f24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | - ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = f24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | - ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = f24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); - } - - LOG_TRACE(HW_GPU, "Set {} float uniform {:x} to ({} {} {} {})", - GetShaderSetupTypeName(setup), (int)uniform_setup.index, - uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), - uniform.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - uniform_setup.index.Assign(uniform_setup.index + 1); - } - } -} - -static void WritePicaReg(u32 id, u32 value, u32 mask) { - auto& regs = g_state.regs; - - if (id >= Regs::NUM_REGS) { - LOG_ERROR( - HW_GPU, - "Commandlist tried to write to invalid register 0x{:03X} (value: {:08X}, mask: {:X})", - id, value, mask); - return; - } - - // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value - u32 old_value = regs.reg_array[id]; - - const u32 write_mask = expand_bits_to_bytes[mask]; - - regs.reg_array[id] = (old_value & ~write_mask) | (value & write_mask); - - // Double check for is_pica_tracing to avoid call overhead - if (DebugUtils::IsPicaTracing()) { - DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs.reg_array[id]}); - } - - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, - reinterpret_cast(&id)); - - switch (id) { - // Trigger IRQ - case PICA_REG_INDEX(trigger_irq): - Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D); - break; - - case PICA_REG_INDEX(pipeline.triangle_topology): - g_state.primitive_assembler.Reconfigure(regs.pipeline.triangle_topology); - break; - - case PICA_REG_INDEX(pipeline.restart_primitive): - g_state.primitive_assembler.Reset(); - break; - - case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): - g_state.immediate.current_attribute = 0; - g_state.immediate.reset_geometry_pipeline = true; - g_state.default_attr_counter = 0; - break; - - // Load default vertex input attributes - case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[0]): - case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[1]): - case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[2]): { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - g_state.default_attr_write_buffer[g_state.default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are - // encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (g_state.default_attr_counter >= 3) { - g_state.default_attr_counter = 0; - - auto& setup = regs.pipeline.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index {}", (int)setup.index); - break; - } - - Common::Vec4 attribute; - - // NOTE: The destination component order indeed is "backwards" - attribute.w = f24::FromRaw(g_state.default_attr_write_buffer[0] >> 8); - attribute.z = f24::FromRaw(((g_state.default_attr_write_buffer[0] & 0xFF) << 16) | - ((g_state.default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = f24::FromRaw(((g_state.default_attr_write_buffer[1] & 0xFFFF) << 8) | - ((g_state.default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = f24::FromRaw(g_state.default_attr_write_buffer[2] & 0xFFFFFF); - - LOG_TRACE(HW_GPU, "Set default VS attribute {:x} to ({} {} {} {})", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - if (setup.index < 15) { - g_state.input_default_attributes.attr[setup.index] = attribute; - setup.index++; - } else { - // Put each attribute into an immediate input buffer. When all specified immediate - // attributes are present, the Vertex Shader is invoked and everything is sent to - // the primitive assembler. - - auto& immediate_input = g_state.immediate.input_vertex; - auto& immediate_attribute_id = g_state.immediate.current_attribute; - - immediate_input.attr[immediate_attribute_id] = attribute; - - if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { - immediate_attribute_id += 1; - } else { - MICROPROFILE_SCOPE(GPU_Drawing); - immediate_attribute_id = 0; - - Shader::OutputVertex::ValidateSemantics(regs.rasterizer); - - auto* shader_engine = Shader::GetEngine(); - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - static_cast(&immediate_input)); - Shader::UnitState shader_unit; - Shader::AttributeBuffer output{}; - - shader_unit.LoadInput(regs.vs, immediate_input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Send to geometry pipeline - if (g_state.immediate.reset_geometry_pipeline) { - g_state.geometry_pipeline.Reconfigure(); - g_state.immediate.reset_geometry_pipeline = false; - } - ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); - g_state.geometry_pipeline.Setup(shader_engine); - g_state.geometry_pipeline.SubmitVertex(output); - - // TODO: If drawing after every immediate mode triangle kills performance, - // change it to flush triangles whenever a drawing config register changes - // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, - nullptr); - } - } - } - } - break; - } - - case PICA_REG_INDEX(pipeline.gpu_mode): - // This register likely just enables vertex processing and doesn't need any special handling - break; - - case PICA_REG_INDEX(pipeline.command_buffer.trigger[0]): - case PICA_REG_INDEX(pipeline.command_buffer.trigger[1]): { - unsigned index = - static_cast(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0])); - u32* head_ptr = (u32*)VideoCore::g_memory->GetPhysicalPointer( - regs.pipeline.command_buffer.GetPhysicalAddress(index)); - g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; - g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32); - break; - } - - // It seems like these trigger vertex rendering - case PICA_REG_INDEX(pipeline.trigger_draw): - case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { - MICROPROFILE_SCOPE(GPU_Drawing); - -#if PICA_LOG_TEV - DebugUtils::DumpTevStageConfig(regs.GetTevStages()); -#endif - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - - PrimitiveAssembler& primitive_assembler = g_state.primitive_assembler; - - bool accelerate_draw = VideoCore::g_hw_shader_enabled && primitive_assembler.IsEmpty(); - - if (regs.pipeline.use_gs == PipelineRegs::UseGS::No) { - auto topology = primitive_assembler.GetTopology(); - if (topology == PipelineRegs::TriangleTopology::Shader || - topology == PipelineRegs::TriangleTopology::List) { - accelerate_draw = accelerate_draw && (regs.pipeline.num_vertices % 3) == 0; - } - // TODO (wwylele): for Strip/Fan topology, if the primitive assember is not restarted - // after this draw call, the buffered vertex from this draw should "leak" to the next - // draw, in which case we should buffer the vertex into the software primitive assember, - // or disable accelerate draw completely. However, there is not game found yet that does - // this, so this is left unimplemented for now. Revisit this when an issue is found in - // games. - } else { - accelerate_draw = false; - } - - bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); - - if (accelerate_draw && - VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed)) { - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - break; - } - - // Processes information about internal vertex attributes to figure out how a vertex is - // loaded. - // Later, these can be compiled and cached. - const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); - VertexLoader loader(regs.pipeline); - Shader::OutputVertex::ValidateSemantics(regs.rasterizer); - - // Load vertices - const auto& index_info = regs.pipeline.index_array; - const u8* index_address_8 = - VideoCore::g_memory->GetPhysicalPointer(base_address + index_info.offset); - const u16* index_address_16 = reinterpret_cast(index_address_8); - bool index_u16 = index_info.format != 0; - - if (g_debug_context && g_debug_context->recorder) { - for (int i = 0; i < 3; ++i) { - const auto texture = regs.texturing.GetTextures()[i]; - if (!texture.enabled) - continue; - - u8* texture_data = - VideoCore::g_memory->GetPhysicalPointer(texture.config.GetPhysicalAddress()); - g_debug_context->recorder->MemoryAccessed( - texture_data, - Pica::TexturingRegs::NibblesPerPixel(texture.format) * texture.config.width / - 2 * texture.config.height, - texture.config.GetPhysicalAddress()); - } - } - - DebugUtils::MemoryAccessTracker memory_accesses; - - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const std::size_t VERTEX_CACHE_SIZE = 32; - std::array vertex_cache_valid{}; - std::array vertex_cache_ids; - std::array vertex_cache; - Shader::AttributeBuffer vs_output; - - unsigned int vertex_cache_pos = 0; - - auto* shader_engine = Shader::GetEngine(); - Shader::UnitState shader_unit; - - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - g_state.geometry_pipeline.Reconfigure(); - g_state.geometry_pipeline.Setup(shader_engine); - if (g_state.geometry_pipeline.NeedIndexInput()) - ASSERT(is_indexed); - - for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { - // Indexed rendering doesn't use the start offset - unsigned int vertex = - is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) - : (index + regs.pipeline.vertex_offset); - - bool vertex_cache_hit = false; - - if (is_indexed) { - if (g_state.geometry_pipeline.NeedIndexInput()) { - g_state.geometry_pipeline.SubmitIndex(vertex); - continue; - } - - if (g_debug_context && Pica::g_debug_context->recorder) { - int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(base_address + index_info.offset + size * index, - size); - } - - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex_cache_valid[i] && vertex == vertex_cache_ids[i]) { - vs_output = vertex_cache[i]; - vertex_cache_hit = true; - break; - } - } - } - - if (!vertex_cache_hit) { - // Initialize data for the current vertex - Shader::AttributeBuffer input; - loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - (void*)&input); - shader_unit.LoadInput(regs.vs, input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, vs_output); - - if (is_indexed) { - vertex_cache[vertex_cache_pos] = vs_output; - vertex_cache_valid[vertex_cache_pos] = true; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; - } - } - - // Send to geometry pipeline - g_state.geometry_pipeline.SubmitVertex(vs_output); - } - - for (auto& range : memory_accesses.ranges) { - g_debug_context->recorder->MemoryAccessed( - VideoCore::g_memory->GetPhysicalPointer(range.first), range.second, range.first); - } - - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - - break; - } - - case PICA_REG_INDEX(gs.bool_uniforms): - WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); - break; - - case PICA_REG_INDEX(gs.int_uniforms[0]): - case PICA_REG_INDEX(gs.int_uniforms[1]): - case PICA_REG_INDEX(gs.int_uniforms[2]): - case PICA_REG_INDEX(gs.int_uniforms[3]): { - unsigned index = (id - PICA_REG_INDEX(gs.int_uniforms[0])); - auto values = regs.gs.int_uniforms[index]; - WriteUniformIntReg(g_state.gs, index, - Common::Vec4(values.x, values.y, values.z, values.w)); - break; - } - - case PICA_REG_INDEX(gs.uniform_setup.set_value[0]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[1]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[2]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[3]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[4]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[5]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[6]): - case PICA_REG_INDEX(gs.uniform_setup.set_value[7]): { - WriteUniformFloatReg(g_state.regs.gs, g_state.gs, g_state.gs_float_regs_counter, - g_state.gs_uniform_write_buffer, value); - break; - } - - case PICA_REG_INDEX(gs.program.set_word[0]): - case PICA_REG_INDEX(gs.program.set_word[1]): - case PICA_REG_INDEX(gs.program.set_word[2]): - case PICA_REG_INDEX(gs.program.set_word[3]): - case PICA_REG_INDEX(gs.program.set_word[4]): - case PICA_REG_INDEX(gs.program.set_word[5]): - case PICA_REG_INDEX(gs.program.set_word[6]): - case PICA_REG_INDEX(gs.program.set_word[7]): { - u32& offset = g_state.regs.gs.program.offset; - if (offset >= 4096) { - LOG_ERROR(HW_GPU, "Invalid GS program offset {}", offset); - } else { - g_state.gs.program_code[offset] = value; - g_state.gs.MarkProgramCodeDirty(); - offset++; - } - break; - } - - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[0]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[1]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[2]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[3]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[4]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[5]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[6]): - case PICA_REG_INDEX(gs.swizzle_patterns.set_word[7]): { - u32& offset = g_state.regs.gs.swizzle_patterns.offset; - if (offset >= g_state.gs.swizzle_data.size()) { - LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset {}", offset); - } else { - g_state.gs.swizzle_data[offset] = value; - g_state.gs.MarkSwizzleDataDirty(); - offset++; - } - break; - } - - case PICA_REG_INDEX(vs.bool_uniforms): - // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? - WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); - break; - - case PICA_REG_INDEX(vs.int_uniforms[0]): - case PICA_REG_INDEX(vs.int_uniforms[1]): - case PICA_REG_INDEX(vs.int_uniforms[2]): - case PICA_REG_INDEX(vs.int_uniforms[3]): { - // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? - unsigned index = (id - PICA_REG_INDEX(vs.int_uniforms[0])); - auto values = regs.vs.int_uniforms[index]; - WriteUniformIntReg(g_state.vs, index, - Common::Vec4(values.x, values.y, values.z, values.w)); - break; - } - - case PICA_REG_INDEX(vs.uniform_setup.set_value[0]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[1]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[2]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[3]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[4]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[5]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[6]): - case PICA_REG_INDEX(vs.uniform_setup.set_value[7]): { - // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? - WriteUniformFloatReg(g_state.regs.vs, g_state.vs, g_state.vs_float_regs_counter, - g_state.vs_uniform_write_buffer, value); - break; - } - - case PICA_REG_INDEX(vs.program.set_word[0]): - case PICA_REG_INDEX(vs.program.set_word[1]): - case PICA_REG_INDEX(vs.program.set_word[2]): - case PICA_REG_INDEX(vs.program.set_word[3]): - case PICA_REG_INDEX(vs.program.set_word[4]): - case PICA_REG_INDEX(vs.program.set_word[5]): - case PICA_REG_INDEX(vs.program.set_word[6]): - case PICA_REG_INDEX(vs.program.set_word[7]): { - u32& offset = g_state.regs.vs.program.offset; - if (offset >= 512) { - LOG_ERROR(HW_GPU, "Invalid VS program offset {}", offset); - } else { - g_state.vs.program_code[offset] = value; - g_state.vs.MarkProgramCodeDirty(); - if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { - g_state.gs.program_code[offset] = value; - g_state.gs.MarkProgramCodeDirty(); - } - offset++; - } - break; - } - - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[0]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[1]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[2]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[3]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[4]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[5]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[6]): - case PICA_REG_INDEX(vs.swizzle_patterns.set_word[7]): { - u32& offset = g_state.regs.vs.swizzle_patterns.offset; - if (offset >= g_state.vs.swizzle_data.size()) { - LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset {}", offset); - } else { - g_state.vs.swizzle_data[offset] = value; - g_state.vs.MarkSwizzleDataDirty(); - if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { - g_state.gs.swizzle_data[offset] = value; - g_state.gs.MarkSwizzleDataDirty(); - } - offset++; - } - break; - } - - case PICA_REG_INDEX(lighting.lut_data[0]): - case PICA_REG_INDEX(lighting.lut_data[1]): - case PICA_REG_INDEX(lighting.lut_data[2]): - case PICA_REG_INDEX(lighting.lut_data[3]): - case PICA_REG_INDEX(lighting.lut_data[4]): - case PICA_REG_INDEX(lighting.lut_data[5]): - case PICA_REG_INDEX(lighting.lut_data[6]): - case PICA_REG_INDEX(lighting.lut_data[7]): { - auto& lut_config = regs.lighting.lut_config; - - ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); - - g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; - lut_config.index.Assign(lut_config.index + 1); - break; - } - - case PICA_REG_INDEX(texturing.fog_lut_data[0]): - case PICA_REG_INDEX(texturing.fog_lut_data[1]): - case PICA_REG_INDEX(texturing.fog_lut_data[2]): - case PICA_REG_INDEX(texturing.fog_lut_data[3]): - case PICA_REG_INDEX(texturing.fog_lut_data[4]): - case PICA_REG_INDEX(texturing.fog_lut_data[5]): - case PICA_REG_INDEX(texturing.fog_lut_data[6]): - case PICA_REG_INDEX(texturing.fog_lut_data[7]): { - g_state.fog.lut[regs.texturing.fog_lut_offset % 128].raw = value; - regs.texturing.fog_lut_offset.Assign(regs.texturing.fog_lut_offset + 1); - break; - } - - case PICA_REG_INDEX(texturing.proctex_lut_data[0]): - case PICA_REG_INDEX(texturing.proctex_lut_data[1]): - case PICA_REG_INDEX(texturing.proctex_lut_data[2]): - case PICA_REG_INDEX(texturing.proctex_lut_data[3]): - case PICA_REG_INDEX(texturing.proctex_lut_data[4]): - case PICA_REG_INDEX(texturing.proctex_lut_data[5]): - case PICA_REG_INDEX(texturing.proctex_lut_data[6]): - case PICA_REG_INDEX(texturing.proctex_lut_data[7]): { - auto& index = regs.texturing.proctex_lut_config.index; - auto& pt = g_state.proctex; - - switch (regs.texturing.proctex_lut_config.ref_table.Value()) { - case TexturingRegs::ProcTexLutTable::Noise: - pt.noise_table[index % pt.noise_table.size()].raw = value; - break; - case TexturingRegs::ProcTexLutTable::ColorMap: - pt.color_map_table[index % pt.color_map_table.size()].raw = value; - break; - case TexturingRegs::ProcTexLutTable::AlphaMap: - pt.alpha_map_table[index % pt.alpha_map_table.size()].raw = value; - break; - case TexturingRegs::ProcTexLutTable::Color: - pt.color_table[index % pt.color_table.size()].raw = value; - break; - case TexturingRegs::ProcTexLutTable::ColorDiff: - pt.color_diff_table[index % pt.color_diff_table.size()].raw = value; - break; - } - index.Assign(index + 1); - break; - } - default: - break; - } - - VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); - - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, - reinterpret_cast(&id)); -} - -void ProcessCommandList(PAddr list, u32 size) { - - u32* buffer = (u32*)VideoCore::g_memory->GetPhysicalPointer(list); - - if (Pica::g_debug_context && Pica::g_debug_context->recorder) { - Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, size, list); - } - - g_state.cmd_list.addr = list; - g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = buffer; - g_state.cmd_list.length = size / sizeof(u32); - - while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { - - // Align read pointer to 8 bytes - if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) - ++g_state.cmd_list.current_ptr; - - u32 value = *g_state.cmd_list.current_ptr++; - const CommandHeader header = {*g_state.cmd_list.current_ptr++}; - - WritePicaReg(header.cmd_id, value, header.parameter_mask); - - for (unsigned i = 0; i < header.extra_data_length; ++i) { - u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); - WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); - } - } -} - -} // namespace Pica::CommandProcessor diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h deleted file mode 100644 index 3b4e05519..000000000 --- a/src/video_core/command_processor.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Pica::CommandProcessor { - -union CommandHeader { - u32 hex; - - BitField<0, 16, u32> cmd_id; - - // parameter_mask: - // Mask applied to the input value to make it possible to update - // parts of a register without overwriting its other fields. - // first bit: 0x000000FF - // second bit: 0x0000FF00 - // third bit: 0x00FF0000 - // fourth bit: 0xFF000000 - BitField<16, 4, u32> parameter_mask; - - BitField<20, 11, u32> extra_data_length; - - BitField<31, 1, u32> group_commands; -}; -static_assert(std::is_standard_layout::value == true, - "CommandHeader does not use standard layout"); -static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); - -void ProcessCommandList(PAddr list, u32 size); - -} // namespace Pica::CommandProcessor diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 8284afc30..7b593194d 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -2,38 +2,22 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include -#include #include #include #include -#include -#include #include -#include - #include #include #include #include "common/assert.h" #include "common/bit_field.h" -#include "common/color.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "common/math_util.h" #include "common/vector_math.h" +#include "core/core.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/pica_state.h" -#include "video_core/pica_types.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_shader.h" -#include "video_core/regs_texturing.h" +#include "video_core/gpu.h" +#include "video_core/pica/regs_shader.h" +#include "video_core/pica/shader_setup.h" #include "video_core/renderer_base.h" -#include "video_core/shader/shader.h" -#include "video_core/texture/texture_decode.h" -#include "video_core/utils.h" -#include "video_core/video_core.h" using nihstro::DVLBHeader; using nihstro::DVLEHeader; @@ -41,13 +25,13 @@ using nihstro::DVLPHeader; namespace Pica { -void DebugContext::DoOnEvent(Event event, void* data) { +void DebugContext::DoOnEvent(Event event, const void* data) { { std::unique_lock lock{breakpoint_mutex}; // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug // widgets - VideoCore::g_renderer->Rasterizer()->FlushAll(); + Core::System::GetInstance().GPU().Renderer().Rasterizer()->FlushAll(); // TODO: Should stop the CPU thread here once we multithread emulation. @@ -84,8 +68,7 @@ std::shared_ptr g_debug_context; // TODO: Get rid of this global namespace DebugUtils { -void DumpShader(const std::string& filename, const ShaderRegs& config, - const Shader::ShaderSetup& setup, +void DumpShader(const std::string& filename, const ShaderRegs& config, const ShaderSetup& setup, const RasterizerRegs::VSOutputAttributes* output_attributes) { struct StuffToWrite { const u8* pointer; @@ -288,13 +271,13 @@ void StartPicaTracing() { g_is_pica_tracing = true; } -void OnPicaRegWrite(PicaTrace::Write write) { +void OnPicaRegWrite(u16 cmd_id, u16 mask, u32 value) { std::lock_guard lock(pica_trace_mutex); if (!g_is_pica_tracing) return; - pica_trace->writes.push_back(write); + pica_trace->writes.push_back(PicaTrace::Write{cmd_id, mask, value}); } std::unique_ptr FinishPicaTracing() { @@ -313,153 +296,6 @@ std::unique_ptr FinishPicaTracing() { return ret; } -static std::string ReplacePattern(const std::string& input, const std::string& pattern, - const std::string& replacement) { - std::size_t start = input.find(pattern); - if (start == std::string::npos) - return input; - - std::string ret = input; - ret.replace(start, pattern.length(), replacement); - return ret; -} - -static std::string GetTevStageConfigSourceString( - const TexturingRegs::TevStageConfig::Source& source) { - - using Source = TexturingRegs::TevStageConfig::Source; - static const std::map source_map = { - {Source::PrimaryColor, "PrimaryColor"}, - {Source::PrimaryFragmentColor, "PrimaryFragmentColor"}, - {Source::SecondaryFragmentColor, "SecondaryFragmentColor"}, - {Source::Texture0, "Texture0"}, - {Source::Texture1, "Texture1"}, - {Source::Texture2, "Texture2"}, - {Source::Texture3, "Texture3"}, - {Source::PreviousBuffer, "PreviousBuffer"}, - {Source::Constant, "Constant"}, - {Source::Previous, "Previous"}, - }; - - const auto src_it = source_map.find(source); - if (src_it == source_map.end()) - return "Unknown"; - - return src_it->second; -} - -static std::string GetTevStageConfigColorSourceString( - const TexturingRegs::TevStageConfig::Source& source, - const TexturingRegs::TevStageConfig::ColorModifier modifier) { - - using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; - static const std::map color_modifier_map = { - {ColorModifier::SourceColor, "%source.rgb"}, - {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"}, - {ColorModifier::SourceAlpha, "%source.aaa"}, - {ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)"}, - {ColorModifier::SourceRed, "%source.rrr"}, - {ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)"}, - {ColorModifier::SourceGreen, "%source.ggg"}, - {ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)"}, - {ColorModifier::SourceBlue, "%source.bbb"}, - {ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)"}, - }; - - auto src_str = GetTevStageConfigSourceString(source); - auto modifier_it = color_modifier_map.find(modifier); - std::string modifier_str = "%source.????"; - if (modifier_it != color_modifier_map.end()) - modifier_str = modifier_it->second; - - return ReplacePattern(modifier_str, "%source", src_str); -} - -static std::string GetTevStageConfigAlphaSourceString( - const TexturingRegs::TevStageConfig::Source& source, - const TexturingRegs::TevStageConfig::AlphaModifier modifier) { - - using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; - static const std::map alpha_modifier_map = { - {AlphaModifier::SourceAlpha, "%source.a"}, - {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"}, - {AlphaModifier::SourceRed, "%source.r"}, - {AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)"}, - {AlphaModifier::SourceGreen, "%source.g"}, - {AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)"}, - {AlphaModifier::SourceBlue, "%source.b"}, - {AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)"}, - }; - - auto src_str = GetTevStageConfigSourceString(source); - auto modifier_it = alpha_modifier_map.find(modifier); - std::string modifier_str = "%source.????"; - if (modifier_it != alpha_modifier_map.end()) - modifier_str = modifier_it->second; - - return ReplacePattern(modifier_str, "%source", src_str); -} - -static std::string GetTevStageConfigOperationString( - const TexturingRegs::TevStageConfig::Operation& operation) { - - using Operation = TexturingRegs::TevStageConfig::Operation; - static const std::map combiner_map = { - {Operation::Replace, "%source1"}, - {Operation::Modulate, "(%source1 * %source2)"}, - {Operation::Add, "(%source1 + %source2)"}, - {Operation::AddSigned, "(%source1 + %source2) - 0.5"}, - {Operation::Lerp, "lerp(%source1, %source2, %source3)"}, - {Operation::Subtract, "(%source1 - %source2)"}, - {Operation::Dot3_RGB, "dot(%source1, %source2)"}, - {Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)"}, - {Operation::AddThenMultiply, "((%source1 + %source2) * %source3)"}, - }; - - const auto op_it = combiner_map.find(operation); - if (op_it == combiner_map.end()) - return "Unknown op (%source1, %source2, %source3)"; - - return op_it->second; -} - -std::string GetTevStageConfigColorCombinerString(const TexturingRegs::TevStageConfig& tev_stage) { - auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); - op_str = ReplacePattern( - op_str, "%source1", - GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); - op_str = ReplacePattern( - op_str, "%source2", - GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); - return ReplacePattern( - op_str, "%source3", - GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); -} - -std::string GetTevStageConfigAlphaCombinerString(const TexturingRegs::TevStageConfig& tev_stage) { - auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); - op_str = ReplacePattern( - op_str, "%source1", - GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); - op_str = ReplacePattern( - op_str, "%source2", - GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); - return ReplacePattern( - op_str, "%source3", - GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); -} - -void DumpTevStageConfig(const std::array& stages) { - std::string stage_info = "Tev setup:\n"; - for (std::size_t index = 0; index < stages.size(); ++index) { - const auto& tev_stage = stages[index]; - stage_info += "Stage " + std::to_string(index) + ": " + - GetTevStageConfigColorCombinerString(tev_stage) + " " + - GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; - } - LOG_TRACE(HW_GPU, "{}", stage_info); -} - } // namespace DebugUtils } // namespace Pica diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 11c22e333..4f3cc8fb9 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -4,22 +4,16 @@ #pragma once -#include -#include #include #include #include -#include #include #include #include #include #include #include "common/common_types.h" -#include "common/vector_math.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_shader.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_rasterizer.h" namespace CiTrace { class Recorder; @@ -27,9 +21,8 @@ class Recorder; namespace Pica { -namespace Shader { +struct ShaderRegs; struct ShaderSetup; -} class DebugContext { public: @@ -87,7 +80,7 @@ public: * @param data Optional data pointer (if unused, this is a nullptr) * @note This function will perform nothing unless it is overridden in the child class. */ - virtual void OnPicaBreakPointHit(Event event, void* data) {} + virtual void OnPicaBreakPointHit(Event event, const void* data) {} /** * Action to perform when emulation is resumed from a breakpoint. @@ -126,7 +119,7 @@ public: * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until * Resume() is called. */ - void OnEvent(Event event, void* data) { + void OnEvent(Event event, const void* data) { // This check is left in the header to allow the compiler to inline it. if (!breakpoints[(int)event].enabled) return; @@ -134,7 +127,7 @@ public: DoOnEvent(event, data); } - void DoOnEvent(Event event, void* data); + void DoOnEvent(Event event, const void* data); /** * Resume from the current breakpoint. @@ -181,10 +174,7 @@ extern std::shared_ptr g_debug_context; // TODO: Get rid of this g namespace DebugUtils { -#define PICA_LOG_TEV 0 - -void DumpShader(const std::string& filename, const ShaderRegs& config, - const Shader::ShaderSetup& setup, +void DumpShader(const std::string& filename, const ShaderRegs& config, const ShaderSetup& setup, const RasterizerRegs::VSOutputAttributes* output_attributes); // Utility class to log Pica commands. @@ -203,46 +193,9 @@ void StartPicaTracing(); inline bool IsPicaTracing() { return g_is_pica_tracing; } -void OnPicaRegWrite(PicaTrace::Write write); +void OnPicaRegWrite(u16 cmd_id, u16 mask, u32 value); std::unique_ptr FinishPicaTracing(); -std::string GetTevStageConfigColorCombinerString(const TexturingRegs::TevStageConfig& tev_stage); -std::string GetTevStageConfigAlphaCombinerString(const TexturingRegs::TevStageConfig& tev_stage); - -/// Dumps the Tev stage config to log at trace level -void DumpTevStageConfig(const std::array& stages); - -/** - * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. - */ -class MemoryAccessTracker { - /// Combine overlapping and close ranges - void SimplifyRanges() { - for (auto it = ranges.begin(); it != ranges.end(); ++it) { - // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, - // too - auto it2 = std::next(it); - while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { - it->second = std::max(it->second, it2->first + it2->second - it->first); - it2 = ranges.erase(it2); - } - } - } - -public: - /// Record a particular memory access in the list - void AddAccess(u32 paddr, u32 size) { - // Create new range or extend existing one - ranges[paddr] = std::max(ranges[paddr], size); - - // Simplify ranges... - SimplifyRanges(); - } - - /// Map of accessed ranges (mapping start address to range size) - std::map ranges; -}; - } // namespace DebugUtils } // namespace Pica diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp new file mode 100644 index 000000000..a77e8b1dd --- /dev/null +++ b/src/video_core/gpu.cpp @@ -0,0 +1,419 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/archives.h" +#include "common/microprofile.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/hle/service/gsp/gsp_gpu.h" +#include "core/hle/service/plgldr/plgldr.h" +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/gpu.h" +#include "video_core/gpu_debugger.h" +#include "video_core/pica/pica_core.h" +#include "video_core/pica/regs_lcd.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_software/sw_blitter.h" +#include "video_core/video_core.h" + +namespace VideoCore { + +constexpr VAddr VADDR_LCD = 0x1ED02000; +constexpr VAddr VADDR_GPU = 0x1EF00000; + +static PAddr VirtualToPhysicalAddress(VAddr addr) { + if (addr == 0) { + return 0; + } + + if (addr >= Memory::VRAM_VADDR && addr <= Memory::VRAM_VADDR_END) { + return addr - Memory::VRAM_VADDR + Memory::VRAM_PADDR; + } + if (addr >= Memory::LINEAR_HEAP_VADDR && addr <= Memory::LINEAR_HEAP_VADDR_END) { + return addr - Memory::LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; + } + if (addr >= Memory::NEW_LINEAR_HEAP_VADDR && addr <= Memory::NEW_LINEAR_HEAP_VADDR_END) { + return addr - Memory::NEW_LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; + } + if (addr >= Memory::PLUGIN_3GX_FB_VADDR && addr <= Memory::PLUGIN_3GX_FB_VADDR_END) { + return addr - Memory::PLUGIN_3GX_FB_VADDR + Service::PLGLDR::PLG_LDR::GetPluginFBAddr(); + } + + LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x{:08X}", addr); + return addr; +} + +MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); + +struct GPU::Impl { + Core::Timing& timing; + Core::System& system; + Memory::MemorySystem& memory; + Pica::DebugContext& debug_context; + Pica::PicaCore pica; + GraphicsDebugger gpu_debugger; + std::unique_ptr renderer; + RasterizerInterface* rasterizer; + std::unique_ptr sw_blitter; + Core::TimingEventType* vblank_event; + Service::GSP::InterruptHandler signal_interrupt; + + explicit Impl(Core::System& system, Frontend::EmuWindow& emu_window, + Frontend::EmuWindow* secondary_window) + : timing{system.CoreTiming()}, system{system}, memory{system.Memory()}, + debug_context{*Pica::g_debug_context}, pica{memory, debug_context}, + renderer{VideoCore::CreateRenderer(emu_window, secondary_window, pica, system)}, + rasterizer{renderer->Rasterizer()}, sw_blitter{std::make_unique( + memory, rasterizer)} {} + ~Impl() = default; +}; + +GPU::GPU(Core::System& system, Frontend::EmuWindow& emu_window, + Frontend::EmuWindow* secondary_window) + : impl{std::make_unique(system, emu_window, secondary_window)} { + impl->vblank_event = impl->timing.RegisterEvent( + "GPU::VBlankCallback", + [this](uintptr_t user_data, s64 cycles_late) { VBlankCallback(user_data, cycles_late); }); + impl->timing.ScheduleEvent(FRAME_TICKS, impl->vblank_event); + + // Bind the rasterizer to the PICA GPU + impl->pica.BindRasterizer(impl->rasterizer); +} + +GPU::~GPU() = default; + +void GPU::SetInterruptHandler(Service::GSP::InterruptHandler handler) { + impl->signal_interrupt = handler; + impl->pica.SetInterruptHandler(handler); +} + +void GPU::FlushRegion(PAddr addr, u32 size) { + impl->rasterizer->FlushRegion(addr, size); +} + +void GPU::InvalidateRegion(PAddr addr, u32 size) { + impl->rasterizer->InvalidateRegion(addr, size); +} + +void GPU::ClearAll(bool flush) { + impl->rasterizer->ClearAll(flush); +} + +void GPU::Execute(const Service::GSP::Command& command) { + using Service::GSP::CommandId; + auto& regs = impl->pica.regs; + + switch (command.id) { + case CommandId::RequestDma: { + Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, + command.dma_request.size, Memory::FlushMode::Flush); + Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, + command.dma_request.size, + Memory::FlushMode::Invalidate); + + // TODO(Subv): These memory accesses should not go through the application's memory mapping. + // They should go through the GSP module's memory mapping. + const auto process = impl->system.Kernel().GetCurrentProcess(); + impl->memory.CopyBlock(*process, command.dma_request.dest_address, + command.dma_request.source_address, command.dma_request.size); + impl->signal_interrupt(Service::GSP::InterruptId::DMA); + break; + } + case CommandId::SubmitCmdList: { + auto& params = command.submit_gpu_cmdlist; + auto& cmdbuffer = regs.internal.pipeline.command_buffer; + + // Write to the command buffer GPU registers + cmdbuffer.addr[0].Assign(VirtualToPhysicalAddress(params.address) >> 3); + cmdbuffer.size[0].Assign(params.size >> 3); + cmdbuffer.trigger[0] = 1; + + // Trigger processing of the command list + SubmitCmdList(0); + break; + } + case CommandId::MemoryFill: { + auto& params = command.memory_fill; + auto& memfill = regs.memory_fill_config; + + // Write to the memory fill GPU registers. + if (params.start1 != 0) { + memfill[0].address_start = VirtualToPhysicalAddress(params.start1) >> 3; + memfill[0].address_end = VirtualToPhysicalAddress(params.end1) >> 3; + memfill[0].value_32bit = params.value1; + memfill[0].control = params.control1; + MemoryFill(0); + } + if (params.start2 != 0) { + memfill[1].address_start = VirtualToPhysicalAddress(params.start2) >> 3; + memfill[1].address_end = VirtualToPhysicalAddress(params.end2) >> 3; + memfill[1].value_32bit = params.value2; + memfill[1].control = params.control2; + MemoryFill(1); + } + break; + } + case CommandId::DisplayTransfer: { + auto& params = command.display_transfer; + auto& display_transfer = regs.display_transfer_config; + + // Write to the transfer engine GPU registers. + display_transfer.input_address = VirtualToPhysicalAddress(params.in_buffer_address) >> 3; + display_transfer.output_address = VirtualToPhysicalAddress(params.out_buffer_address) >> 3; + display_transfer.input_size = params.in_buffer_size; + display_transfer.output_size = params.out_buffer_size; + display_transfer.flags = params.flags; + display_transfer.trigger.Assign(1); + + // Trigger the display transfer. + MemoryTransfer(); + break; + } + case CommandId::TextureCopy: { + auto& params = command.texture_copy; + auto& texture_copy = regs.display_transfer_config; + + // Write to the transfer engine GPU registers. + texture_copy.input_address = VirtualToPhysicalAddress(params.in_buffer_address) >> 3; + texture_copy.output_address = VirtualToPhysicalAddress(params.out_buffer_address) >> 3; + texture_copy.texture_copy.size = params.size; + texture_copy.texture_copy.input_size = params.in_width_gap; + texture_copy.texture_copy.output_size = params.out_width_gap; + texture_copy.flags = params.flags; + texture_copy.trigger.Assign(1); + + // Trigger the texture copy. + MemoryTransfer(); + break; + } + case CommandId::CacheFlush: { + // Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers + // Use command.cache_flush.regions to implement this handler + break; + } + default: + LOG_ERROR(HW_GPU, "Unknown command {:#08X}", command.id.Value()); + } + + // Notify debugger that a GSP command was processed. + impl->debug_context.OnEvent(Pica::DebugContext::Event::GSPCommandProcessed, &command); +} + +void GPU::SetBufferSwap(u32 screen_id, const Service::GSP::FrameBufferInfo& info) { + const PAddr phys_address_left = VirtualToPhysicalAddress(info.address_left); + const PAddr phys_address_right = VirtualToPhysicalAddress(info.address_right); + + // Update framebuffer properties. + auto& framebuffer = impl->pica.regs.framebuffer_config[screen_id]; + if (info.active_fb == 0) { + framebuffer.address_left1 = phys_address_left; + framebuffer.address_right1 = phys_address_right; + } else { + framebuffer.address_left2 = phys_address_left; + framebuffer.address_right2 = phys_address_right; + } + + framebuffer.stride = info.stride; + framebuffer.format = info.format; + framebuffer.active_fb = info.shown_fb; + + // Notify debugger about the buffer swap. + impl->debug_context.OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr); + + if (screen_id == 0) { + MicroProfileFlip(); + impl->system.perf_stats->EndGameFrame(); + } +} + +void GPU::SetColorFill(const Pica::ColorFill& fill) { + impl->pica.regs_lcd.color_fill_top = fill; + impl->pica.regs_lcd.color_fill_bottom = fill; +} + +u32 GPU::ReadReg(VAddr addr) { + switch (addr & 0xFFFFF000) { + case VADDR_LCD: { + const u32 offset = addr - VADDR_LCD; + const u32 index = offset / sizeof(u32); + ASSERT(addr % sizeof(u32) == 0); + ASSERT(index < Pica::RegsLcd::NumIds()); + return impl->pica.regs_lcd[index]; + } + case VADDR_GPU: + case VADDR_GPU + 0x1000: { + const u32 offset = addr - VADDR_GPU; + const u32 index = offset / sizeof(u32); + ASSERT(addr % sizeof(u32) == 0); + ASSERT(index < Pica::PicaCore::Regs::NUM_REGS); + return impl->pica.regs.reg_array[index]; + } + default: + UNREACHABLE_MSG("Read from unknown GPU address {:#08X}", addr); + } +} + +void GPU::WriteReg(VAddr addr, u32 data) { + switch (addr & 0xFFFFF000) { + case VADDR_LCD: { + const u32 offset = addr - VADDR_LCD; + const u32 index = offset / sizeof(u32); + ASSERT(addr % sizeof(u32) == 0); + ASSERT(index < Pica::RegsLcd::NumIds()); + impl->pica.regs_lcd[index] = data; + break; + } + case VADDR_GPU: + case VADDR_GPU + 0x1000: { + const u32 offset = addr - VADDR_GPU; + const u32 index = offset / sizeof(u32); + + ASSERT(addr % sizeof(u32) == 0); + ASSERT(index < Pica::PicaCore::Regs::NUM_REGS); + impl->pica.regs.reg_array[index] = data; + + // Handle registers that trigger GPU actions + switch (index) { + case GPU_REG_INDEX(memory_fill_config[0].trigger): + MemoryFill(0); + break; + case GPU_REG_INDEX(memory_fill_config[1].trigger): + MemoryFill(1); + break; + case GPU_REG_INDEX(display_transfer_config.trigger): + MemoryTransfer(); + break; + case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[0]): + SubmitCmdList(0); + break; + case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[1]): + SubmitCmdList(1); + break; + default: + break; + } + break; + } + default: + UNREACHABLE_MSG("Write to unknown GPU address {:#08X}", addr); + } +} + +void GPU::Sync() { + impl->renderer->Sync(); +} + +VideoCore::RendererBase& GPU::Renderer() { + return *impl->renderer; +} + +Pica::PicaCore& GPU::PicaCore() { + return impl->pica; +} + +const Pica::PicaCore& GPU::PicaCore() const { + return impl->pica; +} + +Pica::DebugContext& GPU::DebugContext() { + return *Pica::g_debug_context; +} + +GraphicsDebugger& GPU::Debugger() { + return impl->gpu_debugger; +} + +void GPU::SubmitCmdList(u32 index) { + // Check if a command list was triggered. + auto& config = impl->pica.regs.internal.pipeline.command_buffer; + if (!config.trigger[index]) { + return; + } + + MICROPROFILE_SCOPE(GPU_CmdlistProcessing); + + // Forward command list processing to the PICA core. + const PAddr addr = config.GetPhysicalAddress(index); + const u32 size = config.GetSize(index); + impl->pica.ProcessCmdList(addr, size); + config.trigger[index] = 0; +} + +void GPU::MemoryFill(u32 index) { + // Check if a memory fill was triggered. + auto& config = impl->pica.regs.memory_fill_config[index]; + if (!config.trigger) { + return; + } + + // Perform memory fill. + if (!impl->rasterizer->AccelerateFill(config)) { + impl->sw_blitter->MemoryFill(config); + } + + // It seems that it won't signal interrupt if "address_start" is zero. + // TODO: hwtest this + if (config.GetStartAddress() != 0) { + if (!index) { + impl->signal_interrupt(Service::GSP::InterruptId::PSC0); + } else { + impl->signal_interrupt(Service::GSP::InterruptId::PSC1); + } + } + + // Reset "trigger" flag and set the "finish" flag + // This was confirmed to happen on hardware even if "address_start" is zero. + config.trigger.Assign(0); + config.finished.Assign(1); +} + +void GPU::MemoryTransfer() { + // Check if a transfer was triggered. + auto& config = impl->pica.regs.display_transfer_config; + if (!config.trigger.Value()) { + return; + } + + MICROPROFILE_SCOPE(GPU_DisplayTransfer); + + // Notify debugger about the display transfer. + impl->debug_context.OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); + + // Perform memory transfer + if (config.is_texture_copy) { + if (!impl->rasterizer->AccelerateTextureCopy(config)) { + impl->sw_blitter->TextureCopy(config); + } + } else { + if (!impl->rasterizer->AccelerateDisplayTransfer(config)) { + impl->sw_blitter->DisplayTransfer(config); + } + } + + // Complete transfer. + config.trigger.Assign(0); + impl->signal_interrupt(Service::GSP::InterruptId::PPF); +} + +void GPU::VBlankCallback(std::uintptr_t user_data, s64 cycles_late) { + // Present renderered frame. + impl->renderer->SwapBuffers(); + + // Signal to GSP that GPU interrupt has occurred + impl->signal_interrupt(Service::GSP::InterruptId::PDC0); + impl->signal_interrupt(Service::GSP::InterruptId::PDC1); + + // Reschedule recurrent event + impl->timing.ScheduleEvent(FRAME_TICKS - cycles_late, impl->vblank_event); +} + +template +void GPU::serialize(Archive& ar, const u32 file_version) { + ar & impl->pica; +} + +SERIALIZE_IMPL(GPU) + +} // namespace VideoCore diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h new file mode 100644 index 000000000..4b29bb431 --- /dev/null +++ b/src/video_core/gpu.h @@ -0,0 +1,113 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "core/hle/service/gsp/gsp_interrupt.h" + +namespace Service::GSP { +struct Command; +struct FrameBufferInfo; +} // namespace Service::GSP + +namespace Core { +class System; +} + +namespace Pica { +class DebugContext; +class PicaCore; +struct RegsLcd; +union ColorFill; +} // namespace Pica + +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore { + +/// Measured on hardware to be 2240568 timer cycles or 4481136 ARM11 cycles +constexpr u64 FRAME_TICKS = 4481136ull; + +class GraphicsDebugger; +class RendererBase; + +/** + * The GPU class is the high level interface to the video_core for core services. + */ +class GPU { +public: + explicit GPU(Core::System& system, Frontend::EmuWindow& emu_window, + Frontend::EmuWindow* secondary_window); + ~GPU(); + + /// Sets the function to call for signalling GSP interrupts. + void SetInterruptHandler(Service::GSP::InterruptHandler handler); + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(PAddr addr, u32 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(PAddr addr, u32 size); + + /// Flushes and invalidates all memory in the rasterizer cache and removes any leftover state. + void ClearAll(bool flush); + + /// Executes the provided GSP command. + void Execute(const Service::GSP::Command& command); + + /// Updates GPU display framebuffer configuration using the specified parameters. + void SetBufferSwap(u32 screen_id, const Service::GSP::FrameBufferInfo& info); + + /// Sets the LCD color fill configuration for the top and bottom screens. + void SetColorFill(const Pica::ColorFill& fill); + + /// Reads a word from the GPU virtual address. + u32 ReadReg(VAddr addr); + + /// Writes the provided value to the GPU virtual address. + void WriteReg(VAddr addr, u32 data); + + /// Synchronizes fixed function renderer state with PICA registers. + void Sync(); + + /// Returns a mutable reference to the renderer. + [[nodiscard]] VideoCore::RendererBase& Renderer(); + + /// Returns a mutable reference to the PICA GPU. + [[nodiscard]] Pica::PicaCore& PicaCore(); + + /// Returns an immutable reference to the PICA GPU. + [[nodiscard]] const Pica::PicaCore& PicaCore() const; + + /// Returns a mutable reference to the pica debugging context. + [[nodiscard]] Pica::DebugContext& DebugContext(); + + /// Returns a mutable reference to the GSP command debugger. + [[nodiscard]] GraphicsDebugger& Debugger(); + +private: + void SubmitCmdList(u32 index); + + void MemoryFill(u32 index); + + void MemoryTransfer(); + + void VBlankCallback(uintptr_t user_data, s64 cycles_late); + + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version); + +private: + struct Impl; + std::unique_ptr impl; +}; + +} // namespace VideoCore diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 33abab3d0..c5fb2de47 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -7,18 +7,22 @@ #include #include #include -#include "core/hle/service/gsp/gsp.h" +#include "core/hle/service/gsp/gsp_command.h" + +namespace VideoCore { class GraphicsDebugger { public: // Base class for all objects which need to be notified about GPU events class DebuggerObserver { - public: - DebuggerObserver() : observed(nullptr) {} + friend class GraphicsDebugger; + public: + DebuggerObserver() = default; virtual ~DebuggerObserver() { - if (observed) + if (observed) { observed->UnregisterObserver(this); + } } /** @@ -39,20 +43,15 @@ public: } private: - GraphicsDebugger* observed; - - friend class GraphicsDebugger; + GraphicsDebugger* observed{}; }; - void GXCommandProcessed(u8* command_data) { - if (observers.empty()) + void GXCommandProcessed(Service::GSP::Command& command_data) { + if (observers.empty()) { return; + } - gx_command_history.emplace_back(); - Service::GSP::Command& cmd = gx_command_history.back(); - - std::memcpy(&cmd, command_data, sizeof(Service::GSP::Command)); - + gx_command_history.emplace_back(command_data); ForEachObserver([this](DebuggerObserver* observer) { observer->GXCommandProcessed(static_cast(this->gx_command_history.size())); }); @@ -80,6 +79,7 @@ private: } std::vector observers; - std::vector gx_command_history; }; + +} // namespace VideoCore diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp deleted file mode 100644 index 2f7ae0d6f..000000000 --- a/src/video_core/pica.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "core/global.h" -#include "video_core/geometry_pipeline.h" -#include "video_core/pica.h" -#include "video_core/pica_state.h" -#include "video_core/renderer_base.h" -#include "video_core/video_core.h" - -namespace Core { -template <> -Pica::State& Global() { - return Pica::g_state; -} -} // namespace Core - -namespace Pica { - -State g_state; - -void Init() { - g_state.Reset(); -} - -void Shutdown() { - Shader::Shutdown(); -} - -template -void Zero(T& o) { - static_assert(std::is_trivial_v, "It's undefined behavior to memset a non-trivial type"); - std::memset(&o, 0, sizeof(o)); -} - -State::State() : geometry_pipeline(*this) { - auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - primitive_assembler.SubmitVertex( - Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); - }; - - auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; - - g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); - g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); -} - -void State::Reset() { - Zero(regs); - vs = {}; - gs = {}; - Zero(cmd_list); - immediate = {}; - primitive_assembler.Reconfigure(PipelineRegs::TriangleTopology::List); - vs_float_regs_counter = 0; - vs_uniform_write_buffer.fill(0); - gs_float_regs_counter = 0; - gs_uniform_write_buffer.fill(0); - default_attr_counter = 0; - default_attr_write_buffer.fill(0); -} -} // namespace Pica diff --git a/src/video_core/pica.h b/src/video_core/pica.h deleted file mode 100644 index 6d6ff0f63..000000000 --- a/src/video_core/pica.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/regs_texturing.h" -namespace Pica { - -/// Initialize Pica state -void Init(); - -/// Shutdown Pica state -void Shutdown(); - -} // namespace Pica diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/pica/geometry_pipeline.cpp similarity index 74% rename from src/video_core/geometry_pipeline.cpp rename to src/video_core/pica/geometry_pipeline.cpp index 957d24bfb..fa7d5d8a7 100644 --- a/src/video_core/geometry_pipeline.cpp +++ b/src/video_core/pica/geometry_pipeline.cpp @@ -6,11 +6,13 @@ #include #include #include "common/archives.h" -#include "video_core/geometry_pipeline.h" -#include "video_core/pica_state.h" -#include "video_core/regs.h" -#include "video_core/renderer_base.h" -#include "video_core/video_core.h" +#include "core/core.h" +#include "video_core/gpu.h" +#include "video_core/pica/geometry_pipeline.h" +#include "video_core/pica/pica_core.h" +#include "video_core/pica/shader_setup.h" +#include "video_core/pica/shader_unit.h" +#include "video_core/shader/shader.h" namespace Pica { @@ -33,7 +35,7 @@ public: * @param input attributes of a vertex output from vertex shader * @return if the buffer is full and the geometry shader should be invoked */ - virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; + virtual bool SubmitVertex(const AttributeBuffer& input) = 0; private: template @@ -49,32 +51,33 @@ private: // TODO: what happens when the input size is not divisible by the output size? class GeometryPipeline_Point : public GeometryPipelineBackend { public: - GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { + GeometryPipeline_Point(const RegsInternal& regs, GeometryShaderUnit& unit) + : regs(regs), unit(unit) { ASSERT(regs.pipeline.variable_primitive == 0); ASSERT(regs.gs.input_to_uniform == 0); vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; std::size_t gs_input_num = regs.gs.max_input_attribute_index + 1; ASSERT(gs_input_num % vs_output_num == 0); - buffer_cur = attribute_buffer.attr; - buffer_end = attribute_buffer.attr + gs_input_num; + buffer_cur = attribute_buffer.data(); + buffer_end = attribute_buffer.data() + gs_input_num; } bool IsEmpty() const override { - return buffer_cur == attribute_buffer.attr; + return buffer_cur == attribute_buffer.data(); } bool NeedIndexInput() const override { return false; } - void SubmitIndex(unsigned int val) override { + void SubmitIndex(u32 val) override { UNREACHABLE(); } - bool SubmitVertex(const Shader::AttributeBuffer& input) override { - buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + bool SubmitVertex(const AttributeBuffer& input) override { + buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur); if (buffer_cur == buffer_end) { - buffer_cur = attribute_buffer.attr; + buffer_cur = attribute_buffer.data(); unit.LoadInput(regs.gs, attribute_buffer); return true; } @@ -82,14 +85,17 @@ public: } private: - const Regs& regs; - Shader::GSUnitState& unit; - Shader::AttributeBuffer attribute_buffer; + const RegsInternal& regs; + GeometryShaderUnit& unit; + AttributeBuffer attribute_buffer; Common::Vec4* buffer_cur; Common::Vec4* buffer_end; - unsigned int vs_output_num; + u32 vs_output_num; - GeometryPipeline_Point() : regs(g_state.regs), unit(g_state.gs_unit) {} + // TODO: REMOVE THIS + GeometryPipeline_Point() + : regs(Core::System::GetInstance().GPU().PicaCore().regs.internal), + unit(Core::System::GetInstance().GPU().PicaCore().gs_unit) {} template static void serialize_common(Class* self, Archive& ar, const unsigned int version) { @@ -101,8 +107,8 @@ private: template void save(Archive& ar, const unsigned int version) const { serialize_common(this, ar, version); - auto buffer_idx = static_cast(buffer_cur - attribute_buffer.attr); - auto buffer_size = static_cast(buffer_end - attribute_buffer.attr); + auto buffer_idx = static_cast(buffer_cur - attribute_buffer.data()); + auto buffer_size = static_cast(buffer_end - attribute_buffer.data()); ar << buffer_idx; ar << buffer_size; } @@ -113,8 +119,8 @@ private: u32 buffer_idx, buffer_size; ar >> buffer_idx; ar >> buffer_size; - buffer_cur = attribute_buffer.attr + buffer_idx; - buffer_end = attribute_buffer.attr + buffer_size; + buffer_cur = attribute_buffer.data() + buffer_idx; + buffer_end = attribute_buffer.data() + buffer_size; } BOOST_SERIALIZATION_SPLIT_MEMBER() @@ -127,7 +133,7 @@ private: // value in the batch. This mode is usually used for subdivision. class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { public: - GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) + GeometryPipeline_VariablePrimitive(const RegsInternal& regs, ShaderSetup& setup) : regs(regs), setup(setup) { ASSERT(regs.pipeline.variable_primitive == 1); ASSERT(regs.gs.input_to_uniform == 1); @@ -142,7 +148,7 @@ public: return need_index; } - void SubmitIndex(unsigned int val) override { + void SubmitIndex(u32 val) override { DEBUG_ASSERT(need_index); // The number of vertex input is put to the uniform register @@ -157,15 +163,15 @@ public: need_index = false; } - bool SubmitVertex(const Shader::AttributeBuffer& input) override { + bool SubmitVertex(const AttributeBuffer& input) override { DEBUG_ASSERT(!need_index); if (main_vertex_num != 0) { // For main vertices, receive all attributes - buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur); --main_vertex_num; } else { // For other vertices, only receive the first attribute (usually the position) - *(buffer_cur++) = input.attr[0]; + *(buffer_cur++) = input[0]; } --total_vertex_num; @@ -179,14 +185,17 @@ public: private: bool need_index = true; - const Regs& regs; - Shader::ShaderSetup& setup; - unsigned int main_vertex_num; - unsigned int total_vertex_num; + const RegsInternal& regs; + ShaderSetup& setup; + u32 main_vertex_num; + u32 total_vertex_num; Common::Vec4* buffer_cur; - unsigned int vs_output_num; + u32 vs_output_num; - GeometryPipeline_VariablePrimitive() : regs(g_state.regs), setup(g_state.gs) {} + // TODO: REMOVE THIS + GeometryPipeline_VariablePrimitive() + : regs(Core::System::GetInstance().GPU().PicaCore().regs.internal), + setup(Core::System::GetInstance().GPU().PicaCore().gs_setup) {} template static void serialize_common(Class* self, Archive& ar, const unsigned int version) { @@ -222,8 +231,7 @@ private: // particle system. class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { public: - GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) - : regs(regs), setup(setup) { + GeometryPipeline_FixedPrimitive(const RegsInternal& regs, ShaderSetup& setup) : setup(setup) { ASSERT(regs.pipeline.variable_primitive == 0); ASSERT(regs.gs.input_to_uniform == 1); vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; @@ -241,12 +249,12 @@ public: return false; } - void SubmitIndex(unsigned int val) override { + void SubmitIndex(u32 val) override { UNREACHABLE(); } - bool SubmitVertex(const Shader::AttributeBuffer& input) override { - buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + bool SubmitVertex(const AttributeBuffer& input) override { + buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur); if (buffer_cur == buffer_end) { buffer_cur = buffer_begin; return true; @@ -255,14 +263,15 @@ public: } private: - [[maybe_unused]] const Regs& regs; - Shader::ShaderSetup& setup; + ShaderSetup& setup; Common::Vec4* buffer_begin; Common::Vec4* buffer_cur; Common::Vec4* buffer_end; - unsigned int vs_output_num; + u32 vs_output_num; - GeometryPipeline_FixedPrimitive() : regs(g_state.regs), setup(g_state.gs) {} + // TODO: REMOVE THIS + GeometryPipeline_FixedPrimitive() + : setup(Core::System::GetInstance().GPU().PicaCore().gs_setup) {} template static void serialize_common(Class* self, Archive& ar, const unsigned int version) { @@ -298,52 +307,53 @@ private: friend class boost::serialization::access; }; -GeometryPipeline::GeometryPipeline(State& state) : state(state) {} +GeometryPipeline::GeometryPipeline(RegsInternal& regs_, GeometryShaderUnit& gs_unit_, + ShaderSetup& gs_) + : regs(regs_), gs_unit(gs_unit_), gs(gs_) {} GeometryPipeline::~GeometryPipeline() = default; -void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { +void GeometryPipeline::SetVertexHandler(VertexHandler vertex_handler) { this->vertex_handler = std::move(vertex_handler); } -void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { - if (!backend) +void GeometryPipeline::Setup(ShaderEngine* shader_engine) { + if (!backend) { return; + } this->shader_engine = shader_engine; - shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); + shader_engine->SetupBatch(gs, regs.gs.main_offset); } void GeometryPipeline::Reconfigure() { ASSERT(!backend || backend->IsEmpty()); - if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { + if (regs.pipeline.use_gs == PipelineRegs::UseGS::No) { backend = nullptr; return; } - ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); - // The following assumes that when geometry shader is in use, the shader unit 3 is configured as // a geometry shader unit. // TODO: what happens if this is not true? - ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); - ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); + ASSERT(regs.pipeline.gs_unit_exclusive_configuration == 1); + ASSERT(regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); + ASSERT(regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); - state.gs_unit.ConfigOutput(state.regs.gs); + gs_unit.ConfigOutput(regs.gs); - ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == - state.regs.pipeline.vs_outmap_total_minus_1_b); + ASSERT(regs.pipeline.vs_outmap_total_minus_1_a == regs.pipeline.vs_outmap_total_minus_1_b); - switch (state.regs.pipeline.gs_config.mode) { + switch (regs.pipeline.gs_config.mode) { case PipelineRegs::GSMode::Point: - backend = std::make_unique(state.regs, state.gs_unit); + backend = std::make_unique(regs, gs_unit); break; case PipelineRegs::GSMode::VariablePrimitive: - backend = std::make_unique(state.regs, state.gs); + backend = std::make_unique(regs, gs); break; case PipelineRegs::GSMode::FixedPrimitive: - backend = std::make_unique(state.regs, state.gs); + backend = std::make_unique(regs, gs); break; default: UNREACHABLE(); @@ -351,8 +361,9 @@ void GeometryPipeline::Reconfigure() { } bool GeometryPipeline::NeedIndexInput() const { - if (!backend) + if (!backend) { return false; + } return backend->NeedIndexInput(); } @@ -360,19 +371,19 @@ void GeometryPipeline::SubmitIndex(unsigned int val) { backend->SubmitIndex(val); } -void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { +void GeometryPipeline::SubmitVertex(const AttributeBuffer& input) { if (!backend) { // No backend means the geometry shader is disabled, so we send the vertex shader output // directly to the primitive assembler. vertex_handler(input); } else { if (backend->SubmitVertex(input)) { - shader_engine->Run(state.gs, state.gs_unit); + shader_engine->Run(gs, gs_unit); // The uniform b15 is set to true after every geometry shader invocation. This is useful // for the shader to know if this is the first invocation in a batch, if the program set // b15 to false first. - state.gs.uniforms.b[15] = true; + gs.uniforms.b[15] = true; } } } diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/pica/geometry_pipeline.h similarity index 70% rename from src/video_core/geometry_pipeline.h rename to src/video_core/pica/geometry_pipeline.h index 1a903b1e0..048a4bb7e 100644 --- a/src/video_core/geometry_pipeline.h +++ b/src/video_core/pica/geometry_pipeline.h @@ -6,11 +6,14 @@ #include #include -#include "video_core/shader/shader.h" +#include "video_core/pica/shader_unit.h" namespace Pica { -struct State; +struct RegsInternal; +struct GeometryShaderUnit; +struct ShaderSetup; +class ShaderEngine; class GeometryPipelineBackend; class GeometryPipeline_Point; @@ -20,17 +23,14 @@ class GeometryPipeline_FixedPrimitive; /// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler class GeometryPipeline { public: - explicit GeometryPipeline(State& state); + explicit GeometryPipeline(RegsInternal& regs, GeometryShaderUnit& gs_unit, ShaderSetup& gs); ~GeometryPipeline(); /// Sets the handler for receiving vertex outputs from vertex shader - void SetVertexHandler(Shader::VertexHandler vertex_handler); + void SetVertexHandler(VertexHandler vertex_handler); - /** - * Setup the geometry shader unit if it is in use - * @param shader_engine the shader engine for the geometry shader to run - */ - void Setup(Shader::ShaderEngine* shader_engine); + /// Setup the geometry shader unit if it is in use + void Setup(ShaderEngine* shader_engine); /// Reconfigures the pipeline according to current register settings void Reconfigure(); @@ -42,13 +42,15 @@ public: void SubmitIndex(unsigned int val); /// Submits vertex attributes output from vertex shader - void SubmitVertex(const Shader::AttributeBuffer& input); + void SubmitVertex(const AttributeBuffer& input); private: - Shader::VertexHandler vertex_handler; - Shader::ShaderEngine* shader_engine; + VertexHandler vertex_handler; + ShaderEngine* shader_engine; std::unique_ptr backend; - State& state; + RegsInternal& regs; + GeometryShaderUnit& gs_unit; + ShaderSetup& gs; template void serialize(Archive& ar, const unsigned int version); diff --git a/src/video_core/pica/output_vertex.cpp b/src/video_core/pica/output_vertex.cpp new file mode 100644 index 000000000..e0cb2cdd0 --- /dev/null +++ b/src/video_core/pica/output_vertex.cpp @@ -0,0 +1,50 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/pica/output_vertex.h" +#include "video_core/pica/regs_rasterizer.h" + +namespace Pica { + +OutputVertex::OutputVertex(const RasterizerRegs& regs, const AttributeBuffer& output) { + // Attributes can be used without being set in GPUREG_SH_OUTMAP_Oi + // Hardware tests have shown that they are initialized to 1 in this case. + std::array vertex_slots_overflow; + vertex_slots_overflow.fill(f24::One()); + + const u32 num_attributes = regs.vs_output_total & 7; + for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) { + const auto output_register_map = regs.vs_output_attributes[attrib]; + vertex_slots_overflow[output_register_map.map_x] = output[attrib][0]; + vertex_slots_overflow[output_register_map.map_y] = output[attrib][1]; + vertex_slots_overflow[output_register_map.map_z] = output[attrib][2]; + vertex_slots_overflow[output_register_map.map_w] = output[attrib][3]; + } + + // Copy to result + std::memcpy(this, vertex_slots_overflow.data(), sizeof(OutputVertex)); + + // The hardware takes the absolute and saturates vertex colors, *before* doing interpolation + for (u32 i = 0; i < 4; ++i) { + const f32 c = std::fabs(color[i].ToFloat32()); + color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f); + } +} + +#define ASSERT_POS(var, pos) \ + static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong " \ + "offset.") + +ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X); +ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X); +ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R); +ASSERT_POS(tc0, RasterizerRegs::VSOutputAttributes::TEXCOORD0_U); +ASSERT_POS(tc1, RasterizerRegs::VSOutputAttributes::TEXCOORD1_U); +ASSERT_POS(tc0_w, RasterizerRegs::VSOutputAttributes::TEXCOORD0_W); +ASSERT_POS(view, RasterizerRegs::VSOutputAttributes::VIEW_X); +ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U); + +#undef ASSERT_POS + +} // namespace Pica diff --git a/src/video_core/pica/output_vertex.h b/src/video_core/pica/output_vertex.h new file mode 100644 index 000000000..7213dcf67 --- /dev/null +++ b/src/video_core/pica/output_vertex.h @@ -0,0 +1,48 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "video_core/pica_types.h" + +namespace Pica { + +struct RasterizerRegs; + +using AttributeBuffer = std::array, 16>; + +struct OutputVertex { + OutputVertex() = default; + explicit OutputVertex(const RasterizerRegs& regs, const AttributeBuffer& output); + + Common::Vec4 pos; + Common::Vec4 quat; + Common::Vec4 color; + Common::Vec2 tc0; + Common::Vec2 tc1; + f24 tc0_w; + INSERT_PADDING_WORDS(1); + Common::Vec3 view; + INSERT_PADDING_WORDS(1); + Common::Vec2 tc2; + +private: + template + void serialize(Archive& ar, const u32) { + ar& pos; + ar& quat; + ar& color; + ar& tc0; + ar& tc1; + ar& tc0_w; + ar& view; + ar& tc2; + } + friend class boost::serialization::access; +}; +static_assert(std::is_trivial_v, "Structure is not POD"); +static_assert(sizeof(OutputVertex) == 24 * sizeof(f32), "OutputVertex has invalid size"); + +} // namespace Pica diff --git a/src/video_core/pica/packed_attribute.h b/src/video_core/pica/packed_attribute.h new file mode 100644 index 000000000..0e6e716e0 --- /dev/null +++ b/src/video_core/pica/packed_attribute.h @@ -0,0 +1,74 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/vector_math.h" +#include "video_core/pica_types.h" + +namespace Pica { + +/** + * Uniforms and fixed attributes are written in a packed format such that four float24 values are + * encoded in three 32-bit numbers. Uniforms can also encode four float32 values in four 32-bit + * numbers. We write to internal memory once a full vector is written. + */ +struct PackedAttribute { + std::array buffer{}; + u32 index{}; + + /// Places a word to the queue and returns true if the queue becomes full. + constexpr bool Push(u32 word, bool is_float32 = false) { + buffer[index++] = word; + return (index >= 4 && is_float32) || (index >= 3 && !is_float32); + } + + /// Resets the queue discarding previous entries. + constexpr void Reset() { + index = 0; + } + + /// Returns the queue contents with either float24 or float32 interpretation. + constexpr Common::Vec4 Get(bool is_float32 = false) { + Reset(); + if (is_float32) { + return AsFloat32(); + } else { + return AsFloat24(); + } + } + +private: + /// Decodes the queue contents with float24 transfer mode. + constexpr Common::Vec4 AsFloat24() const { + const u32 x = buffer[2] & 0xFFFFFF; + const u32 y = ((buffer[1] & 0xFFFF) << 8) | ((buffer[2] >> 24) & 0xFF); + const u32 z = ((buffer[0] & 0xFF) << 16) | ((buffer[1] >> 16) & 0xFFFF); + const u32 w = buffer[0] >> 8; + return Common::Vec4{f24::FromRaw(x), f24::FromRaw(y), f24::FromRaw(z), + f24::FromRaw(w)}; + } + + /// Decodes the queue contents with float32 transfer mode. + constexpr Common::Vec4 AsFloat32() const { + Common::Vec4 uniform; + for (u32 i = 0; i < 4; i++) { + const f32 buffer_value = std::bit_cast(buffer[i]); + uniform[3 - i] = f24::FromFloat32(buffer_value); + } + return uniform; + } + +private: + template + void serialize(Archive& ar, const u32) { + ar& buffer; + ar& index; + } + friend class boost::serialization::access; +}; + +} // namespace Pica diff --git a/src/video_core/pica/pica_core.cpp b/src/video_core/pica/pica_core.cpp new file mode 100644 index 000000000..1d7594a2a --- /dev/null +++ b/src/video_core/pica/pica_core.cpp @@ -0,0 +1,592 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/arch.h" +#include "common/archives.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/pica/pica_core.h" +#include "video_core/pica/vertex_loader.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/shader/shader.h" + +namespace Pica { + +MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); + +using namespace DebugUtils; + +union CommandHeader { + u32 hex; + BitField<0, 16, u32> cmd_id; + BitField<16, 4, u32> parameter_mask; + BitField<20, 8, u32> extra_data_length; + BitField<31, 1, u32> group_commands; +}; +static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); + +PicaCore::PicaCore(Memory::MemorySystem& memory_, DebugContext& debug_context_) + : memory{memory_}, debug_context{debug_context_}, geometry_pipeline{regs.internal, gs_unit, + gs_setup}, + shader_engine{CreateEngine(Settings::values.use_shader_jit.GetValue())} { + SetFramebufferDefaults(); + + const auto submit_vertex = [this](const AttributeBuffer& buffer) { + const auto add_triangle = [this](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + rasterizer->AddTriangle(v0, v1, v2); + }; + const auto vertex = OutputVertex(regs.internal.rasterizer, buffer); + primitive_assembler.SubmitVertex(vertex, add_triangle); + }; + + gs_unit.SetVertexHandlers(submit_vertex, [this]() { primitive_assembler.SetWinding(); }); + geometry_pipeline.SetVertexHandler(submit_vertex); + + primitive_assembler.Reconfigure(PipelineRegs::TriangleTopology::List); +} + +PicaCore::~PicaCore() = default; + +void PicaCore::SetFramebufferDefaults() { + auto& framebuffer_top = regs.framebuffer_config[0]; + auto& framebuffer_sub = regs.framebuffer_config[1]; + + // Set framebuffer defaults from nn::gx::Initialize + framebuffer_top.address_left1 = 0x181E6000; + framebuffer_top.address_left2 = 0x1822C800; + framebuffer_top.address_right1 = 0x18273000; + framebuffer_top.address_right2 = 0x182B9800; + framebuffer_sub.address_left1 = 0x1848F000; + framebuffer_sub.address_left2 = 0x184C7800; + + framebuffer_top.width.Assign(240); + framebuffer_top.height.Assign(400); + framebuffer_top.stride = 3 * 240; + framebuffer_top.color_format.Assign(PixelFormat::RGB8); + framebuffer_top.active_fb = 0; + + framebuffer_sub.width.Assign(240); + framebuffer_sub.height.Assign(320); + framebuffer_sub.stride = 3 * 240; + framebuffer_sub.color_format.Assign(PixelFormat::RGB8); + framebuffer_sub.active_fb = 0; +} + +void PicaCore::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { + this->rasterizer = rasterizer; +} + +void PicaCore::SetInterruptHandler(Service::GSP::InterruptHandler& signal_interrupt) { + this->signal_interrupt = signal_interrupt; +} + +void PicaCore::ProcessCmdList(PAddr list, u32 size) { + // Initialize command list tracking. + const u8* head = memory.GetPhysicalPointer(list); + cmd_list.Reset(list, head, size); + + while (cmd_list.current_index < cmd_list.length) { + // Align read pointer to 8 bytes + if (cmd_list.current_index % 2 != 0) { + cmd_list.current_index++; + } + + // Read the header and the value to write. + const u32 value = cmd_list.head[cmd_list.current_index++]; + const CommandHeader header{cmd_list.head[cmd_list.current_index++]}; + + // Write to the requested PICA register. + WriteInternalReg(header.cmd_id, value, header.parameter_mask); + + // Write any extra paramters as well. + for (u32 i = 0; i < header.extra_data_length; ++i) { + const u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); + const u32 extra_value = cmd_list.head[cmd_list.current_index++]; + WriteInternalReg(cmd, extra_value, header.parameter_mask); + } + } +} + +void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { + if (id >= RegsInternal::NUM_REGS) { + LOG_ERROR( + HW_GPU, + "Commandlist tried to write to invalid register 0x{:03X} (value: {:08X}, mask: {:X})", + id, value, mask); + return; + } + + // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF + constexpr std::array ExpandBitsToBytes = { + 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, + 0x00ffff00, 0x00ffffff, 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, + 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, + }; + + // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value + const u32 old_value = regs.internal.reg_array[id]; + const u32 write_mask = ExpandBitsToBytes[mask]; + regs.internal.reg_array[id] = (old_value & ~write_mask) | (value & write_mask); + + // Track register write. + DebugUtils::OnPicaRegWrite(id, mask, regs.internal.reg_array[id]); + + // Track events. + debug_context.OnEvent(DebugContext::Event::PicaCommandLoaded, &id); + SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::PicaCommandProcessed, &id); }); + + switch (id) { + // Trigger IRQ + case PICA_REG_INDEX(trigger_irq): + signal_interrupt(Service::GSP::InterruptId::P3D); + break; + + case PICA_REG_INDEX(pipeline.triangle_topology): + primitive_assembler.Reconfigure(regs.internal.pipeline.triangle_topology); + break; + + case PICA_REG_INDEX(pipeline.restart_primitive): + primitive_assembler.Reset(); + break; + + case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): + immediate.Reset(); + break; + + // Load default vertex input attributes + case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[0]): + case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[1]): + case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[2]): + SubmitImmediate(value); + break; + + case PICA_REG_INDEX(pipeline.gpu_mode): + // This register likely just enables vertex processing and doesn't need any special handling + break; + + case PICA_REG_INDEX(pipeline.command_buffer.trigger[0]): + case PICA_REG_INDEX(pipeline.command_buffer.trigger[1]): { + const u32 index = static_cast(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0])); + const PAddr addr = regs.internal.pipeline.command_buffer.GetPhysicalAddress(index); + const u32 size = regs.internal.pipeline.command_buffer.GetSize(index); + const u8* head = memory.GetPhysicalPointer(addr); + cmd_list.Reset(addr, head, size); + break; + } + + // It seems like these trigger vertex rendering + case PICA_REG_INDEX(pipeline.trigger_draw): + case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { + const bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); + DrawArrays(is_indexed); + break; + } + + case PICA_REG_INDEX(gs.bool_uniforms): + gs_setup.WriteUniformBoolReg(regs.internal.gs.bool_uniforms.Value()); + break; + + case PICA_REG_INDEX(gs.int_uniforms[0]): + case PICA_REG_INDEX(gs.int_uniforms[1]): + case PICA_REG_INDEX(gs.int_uniforms[2]): + case PICA_REG_INDEX(gs.int_uniforms[3]): { + const u32 index = (id - PICA_REG_INDEX(gs.int_uniforms[0])); + gs_setup.WriteUniformIntReg(index, regs.internal.gs.GetIntUniform(index)); + break; + } + + case PICA_REG_INDEX(gs.uniform_setup.set_value[0]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[1]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[2]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[3]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[4]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[5]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[6]): + case PICA_REG_INDEX(gs.uniform_setup.set_value[7]): { + gs_setup.WriteUniformFloatReg(regs.internal.gs, value); + break; + } + + case PICA_REG_INDEX(gs.program.set_word[0]): + case PICA_REG_INDEX(gs.program.set_word[1]): + case PICA_REG_INDEX(gs.program.set_word[2]): + case PICA_REG_INDEX(gs.program.set_word[3]): + case PICA_REG_INDEX(gs.program.set_word[4]): + case PICA_REG_INDEX(gs.program.set_word[5]): + case PICA_REG_INDEX(gs.program.set_word[6]): + case PICA_REG_INDEX(gs.program.set_word[7]): { + u32& offset = regs.internal.gs.program.offset; + if (offset >= 4096) { + LOG_ERROR(HW_GPU, "Invalid GS program offset {}", offset); + } else { + gs_setup.program_code[offset] = value; + gs_setup.MarkProgramCodeDirty(); + offset++; + } + break; + } + + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[0]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[1]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[2]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[3]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[4]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[5]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[6]): + case PICA_REG_INDEX(gs.swizzle_patterns.set_word[7]): { + u32& offset = regs.internal.gs.swizzle_patterns.offset; + if (offset >= gs_setup.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset {}", offset); + } else { + gs_setup.swizzle_data[offset] = value; + gs_setup.MarkSwizzleDataDirty(); + offset++; + } + break; + } + + case PICA_REG_INDEX(vs.bool_uniforms): + vs_setup.WriteUniformBoolReg(regs.internal.vs.bool_uniforms.Value()); + break; + + case PICA_REG_INDEX(vs.int_uniforms[0]): + case PICA_REG_INDEX(vs.int_uniforms[1]): + case PICA_REG_INDEX(vs.int_uniforms[2]): + case PICA_REG_INDEX(vs.int_uniforms[3]): { + const u32 index = (id - PICA_REG_INDEX(vs.int_uniforms[0])); + vs_setup.WriteUniformIntReg(index, regs.internal.vs.GetIntUniform(index)); + break; + } + + case PICA_REG_INDEX(vs.uniform_setup.set_value[0]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[1]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[2]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[3]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[4]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[5]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[6]): + case PICA_REG_INDEX(vs.uniform_setup.set_value[7]): { + vs_setup.WriteUniformFloatReg(regs.internal.vs, value); + break; + } + + case PICA_REG_INDEX(vs.program.set_word[0]): + case PICA_REG_INDEX(vs.program.set_word[1]): + case PICA_REG_INDEX(vs.program.set_word[2]): + case PICA_REG_INDEX(vs.program.set_word[3]): + case PICA_REG_INDEX(vs.program.set_word[4]): + case PICA_REG_INDEX(vs.program.set_word[5]): + case PICA_REG_INDEX(vs.program.set_word[6]): + case PICA_REG_INDEX(vs.program.set_word[7]): { + u32& offset = regs.internal.vs.program.offset; + if (offset >= 512) { + LOG_ERROR(HW_GPU, "Invalid VS program offset {}", offset); + } else { + vs_setup.program_code[offset] = value; + vs_setup.MarkProgramCodeDirty(); + if (!regs.internal.pipeline.gs_unit_exclusive_configuration) { + gs_setup.program_code[offset] = value; + gs_setup.MarkProgramCodeDirty(); + } + offset++; + } + break; + } + + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[0]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[1]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[2]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[3]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[4]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[5]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[6]): + case PICA_REG_INDEX(vs.swizzle_patterns.set_word[7]): { + u32& offset = regs.internal.vs.swizzle_patterns.offset; + if (offset >= vs_setup.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset {}", offset); + } else { + vs_setup.swizzle_data[offset] = value; + vs_setup.MarkSwizzleDataDirty(); + if (!regs.internal.pipeline.gs_unit_exclusive_configuration) { + gs_setup.swizzle_data[offset] = value; + gs_setup.MarkSwizzleDataDirty(); + } + offset++; + } + break; + } + + case PICA_REG_INDEX(lighting.lut_data[0]): + case PICA_REG_INDEX(lighting.lut_data[1]): + case PICA_REG_INDEX(lighting.lut_data[2]): + case PICA_REG_INDEX(lighting.lut_data[3]): + case PICA_REG_INDEX(lighting.lut_data[4]): + case PICA_REG_INDEX(lighting.lut_data[5]): + case PICA_REG_INDEX(lighting.lut_data[6]): + case PICA_REG_INDEX(lighting.lut_data[7]): { + auto& lut_config = regs.internal.lighting.lut_config; + ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); + + lighting.luts[lut_config.type][lut_config.index].raw = value; + lut_config.index.Assign(lut_config.index + 1); + break; + } + + case PICA_REG_INDEX(texturing.fog_lut_data[0]): + case PICA_REG_INDEX(texturing.fog_lut_data[1]): + case PICA_REG_INDEX(texturing.fog_lut_data[2]): + case PICA_REG_INDEX(texturing.fog_lut_data[3]): + case PICA_REG_INDEX(texturing.fog_lut_data[4]): + case PICA_REG_INDEX(texturing.fog_lut_data[5]): + case PICA_REG_INDEX(texturing.fog_lut_data[6]): + case PICA_REG_INDEX(texturing.fog_lut_data[7]): { + fog.lut[regs.internal.texturing.fog_lut_offset % 128].raw = value; + regs.internal.texturing.fog_lut_offset.Assign(regs.internal.texturing.fog_lut_offset + 1); + break; + } + + case PICA_REG_INDEX(texturing.proctex_lut_data[0]): + case PICA_REG_INDEX(texturing.proctex_lut_data[1]): + case PICA_REG_INDEX(texturing.proctex_lut_data[2]): + case PICA_REG_INDEX(texturing.proctex_lut_data[3]): + case PICA_REG_INDEX(texturing.proctex_lut_data[4]): + case PICA_REG_INDEX(texturing.proctex_lut_data[5]): + case PICA_REG_INDEX(texturing.proctex_lut_data[6]): + case PICA_REG_INDEX(texturing.proctex_lut_data[7]): { + auto& index = regs.internal.texturing.proctex_lut_config.index; + + switch (regs.internal.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + proctex.noise_table[index % proctex.noise_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + proctex.color_map_table[index % proctex.color_map_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + proctex.alpha_map_table[index % proctex.alpha_map_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::Color: + proctex.color_table[index % proctex.color_table.size()].raw = value; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + proctex.color_diff_table[index % proctex.color_diff_table.size()].raw = value; + break; + } + index.Assign(index + 1); + break; + } + default: + break; + } + + // Notify the rasterizer an internal register was updated. + rasterizer->NotifyPicaRegisterChanged(id); +} + +void PicaCore::SubmitImmediate(u32 value) { + // Push to word to the queue. This returns true when a full attribute is formed. + if (!immediate.queue.Push(value)) { + return; + } + + constexpr size_t IMMEDIATE_MODE_INDEX = 0xF; + + auto& setup = regs.internal.pipeline.vs_default_attributes_setup; + if (setup.index > IMMEDIATE_MODE_INDEX) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index {}", setup.index); + return; + } + + // Retrieve the attribute and place it in the default attribute buffer. + const auto attribute = immediate.queue.Get(); + if (setup.index < IMMEDIATE_MODE_INDEX) { + input_default_attributes[setup.index] = attribute; + setup.index++; + return; + } + + // When index is 0xF the attribute is used for immediate mode drawing. + immediate.input_vertex[immediate.current_attribute] = attribute; + if (immediate.current_attribute < regs.internal.pipeline.max_input_attrib_index) { + immediate.current_attribute++; + return; + } + + // We formed a vertex, flush. + DrawImmediate(); +} + +void PicaCore::DrawImmediate() { + // Compile the vertex shader. + shader_engine->SetupBatch(vs_setup, regs.internal.vs.main_offset); + + // Track vertex in the debug recorder. + debug_context.OnEvent(DebugContext::Event::VertexShaderInvocation, + std::addressof(immediate.input_vertex)); + SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); }); + + ShaderUnit shader_unit; + AttributeBuffer output{}; + + // Invoke the vertex shader for the vertex. + shader_unit.LoadInput(regs.internal.vs, immediate.input_vertex); + shader_engine->Run(vs_setup, shader_unit); + shader_unit.WriteOutput(regs.internal.vs, output); + + // Reconfigure geometry pipeline if needed. + if (immediate.reset_geometry_pipeline) { + geometry_pipeline.Reconfigure(); + immediate.reset_geometry_pipeline = false; + } + + // Send to geometry pipeline. + ASSERT(!geometry_pipeline.NeedIndexInput()); + geometry_pipeline.Setup(shader_engine.get()); + geometry_pipeline.SubmitVertex(output); + + // Flush the immediate triangle. + rasterizer->DrawTriangles(); + immediate.current_attribute = 0; +} + +void PicaCore::DrawArrays(bool is_indexed) { + MICROPROFILE_SCOPE(GPU_Drawing); + + // Track vertex in the debug recorder. + debug_context.OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); + SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); }); + + const bool accelerate_draw = [this] { + // Geometry shaders cannot be accelerated due to register preservation. + if (regs.internal.pipeline.use_gs == PipelineRegs::UseGS::Yes) { + return false; + } + + // TODO (wwylele): for Strip/Fan topology, if the primitive assember is not restarted + // after this draw call, the buffered vertex from this draw should "leak" to the next + // draw, in which case we should buffer the vertex into the software primitive assember, + // or disable accelerate draw completely. However, there is not game found yet that does + // this, so this is left unimplemented for now. Revisit this when an issue is found in + // games. + + bool accelerate_draw = Settings::values.use_hw_shader && primitive_assembler.IsEmpty(); + const auto topology = primitive_assembler.GetTopology(); + if (topology == PipelineRegs::TriangleTopology::Shader || + topology == PipelineRegs::TriangleTopology::List) { + accelerate_draw = accelerate_draw && (regs.internal.pipeline.num_vertices % 3) == 0; + } + return accelerate_draw; + }(); + + // Attempt to use hardware vertex shaders if possible. + if (accelerate_draw && rasterizer->AccelerateDrawBatch(is_indexed)) { + return; + } + + // We cannot accelerate the draw, so load and execute the vertex shader for each vertex. + LoadVertices(is_indexed); + + // Draw emitted triangles. + rasterizer->DrawTriangles(); +} + +void PicaCore::LoadVertices(bool is_indexed) { + // Read and validate vertex information from the loaders + const auto& pipeline = regs.internal.pipeline; + const PAddr base_address = pipeline.vertex_attributes.GetPhysicalBaseAddress(); + const auto loader = VertexLoader(memory, pipeline); + regs.internal.rasterizer.ValidateSemantics(); + + // Locate index buffer. + const auto& index_info = pipeline.index_array; + const u8* index_address_8 = memory.GetPhysicalPointer(base_address + index_info.offset); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + // Simple circular-replacement vertex cache + const std::size_t VERTEX_CACHE_SIZE = 64; + std::array vertex_cache_valid{}; + std::array vertex_cache_ids; + std::array vertex_cache; + u32 vertex_cache_pos = 0; + + // Compile the vertex shader for this batch. + ShaderUnit shader_unit; + AttributeBuffer vs_output; + shader_engine->SetupBatch(vs_setup, regs.internal.vs.main_offset); + + // Setup geometry pipeline in case we are using a geometry shader. + geometry_pipeline.Reconfigure(); + geometry_pipeline.Setup(shader_engine.get()); + ASSERT(!geometry_pipeline.NeedIndexInput() || is_indexed); + + for (u32 index = 0; index < pipeline.num_vertices; ++index) { + // Indexed rendering doesn't use the start offset + const u32 vertex = is_indexed + ? (index_u16 ? index_address_16[index] : index_address_8[index]) + : (index + pipeline.vertex_offset); + + bool vertex_cache_hit = false; + if (is_indexed) { + if (geometry_pipeline.NeedIndexInput()) { + geometry_pipeline.SubmitIndex(vertex); + continue; + } + + for (u32 i = 0; i < VERTEX_CACHE_SIZE; ++i) { + if (vertex_cache_valid[i] && vertex == vertex_cache_ids[i]) { + vs_output = vertex_cache[i]; + vertex_cache_hit = true; + break; + } + } + } + + if (!vertex_cache_hit) { + // Initialize data for the current vertex + AttributeBuffer input; + loader.LoadVertex(base_address, index, vertex, input, input_default_attributes); + + // Record vertex processing to the debugger. + debug_context.OnEvent(DebugContext::Event::VertexShaderInvocation, + std::addressof(input)); + + // Invoke the vertex shader for this vertex. + shader_unit.LoadInput(regs.internal.vs, input); + shader_engine->Run(vs_setup, shader_unit); + shader_unit.WriteOutput(regs.internal.vs, vs_output); + + // Cache the vertex when doing indexed rendering. + if (is_indexed) { + vertex_cache[vertex_cache_pos] = vs_output; + vertex_cache_valid[vertex_cache_pos] = true; + vertex_cache_ids[vertex_cache_pos] = vertex; + vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + } + } + + // Send to geometry pipeline + geometry_pipeline.SubmitVertex(vs_output); + } +} + +template +void PicaCore::CommandList::serialize(Archive& ar, const u32 file_version) { + ar& addr; + ar& length; + ar& current_index; + if (Archive::is_loading::value) { + const u8* ptr = Core::System::GetInstance().Memory().GetPhysicalPointer(addr); + head = reinterpret_cast(ptr); + } +} + +SERIALIZE_IMPL(PicaCore::CommandList) + +} // namespace Pica diff --git a/src/video_core/pica/pica_core.h b/src/video_core/pica/pica_core.h new file mode 100644 index 000000000..bb19a4f25 --- /dev/null +++ b/src/video_core/pica/pica_core.h @@ -0,0 +1,287 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/service/gsp/gsp_interrupt.h" +#include "video_core/pica/geometry_pipeline.h" +#include "video_core/pica/packed_attribute.h" +#include "video_core/pica/primitive_assembly.h" +#include "video_core/pica/regs_external.h" +#include "video_core/pica/regs_internal.h" +#include "video_core/pica/regs_lcd.h" +#include "video_core/pica/shader_setup.h" +#include "video_core/pica/shader_unit.h" + +namespace Memory { +class MemorySystem; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Pica { + +class DebugContext; +class ShaderEngine; + +class PicaCore { +public: + explicit PicaCore(Memory::MemorySystem& memory, DebugContext& debug_context_); + ~PicaCore(); + + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + + void SetInterruptHandler(Service::GSP::InterruptHandler& signal_interrupt); + + void ProcessCmdList(PAddr list, u32 size); + +private: + void SetFramebufferDefaults(); + + void WriteInternalReg(u32 id, u32 value, u32 mask); + + void SubmitImmediate(u32 data); + + void DrawImmediate(); + + void DrawArrays(bool is_indexed); + + void LoadVertices(bool is_indexed); + +public: + union Regs { + static constexpr size_t NUM_REGS = 0x732; + + struct { + u32 hardware_id; + INSERT_PADDING_WORDS(0x3); + MemoryFillConfig memory_fill_config[2]; + u32 vram_bank_control; + u32 gpu_busy; + INSERT_PADDING_WORDS(0x22); + u32 backlight_control; + INSERT_PADDING_WORDS(0xCF); + FramebufferConfig framebuffer_config[2]; + INSERT_PADDING_WORDS(0x180); + DisplayTransferConfig display_transfer_config; + INSERT_PADDING_WORDS(0xF5); + RegsInternal internal; + }; + std::array reg_array; + }; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32)); + + struct CommandList { + PAddr addr; + const u32* head; + u32 current_index; + u32 length; + + void Reset(PAddr addr, const u8* head, u32 size) { + this->addr = addr; + this->head = reinterpret_cast(head); + this->length = size / sizeof(u32); + current_index = 0; + } + + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version); + }; + + struct ImmediateModeState { + AttributeBuffer input_vertex{}; + u32 current_attribute{}; + bool reset_geometry_pipeline{true}; + PackedAttribute queue; + + void Reset() { + current_attribute = 0; + reset_geometry_pipeline = true; + queue.Reset(); + } + + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& input_vertex; + ar& current_attribute; + ar& reset_geometry_pipeline; + ar& queue; + } + }; + + struct ProcTex { + union ValueEntry { + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField<0, 12, u32> value; // 0.0.12 fixed point + + // Difference between two entry values. Used for efficient interpolation. + // 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5). + // Note: the type of this is different from the one of lighting LUT + BitField<12, 12, s32> difference; + + f32 ToFloat() const { + return static_cast(value) / 4095.f; + } + + f32 DiffToFloat() const { + return static_cast(difference) / 4095.f; + } + }; + + union ColorEntry { + u32 raw; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; + BitField<16, 8, u32> b; + BitField<24, 8, u32> a; + + Common::Vec4 ToVector() const { + return {static_cast(r), static_cast(g), static_cast(b), + static_cast(a)}; + } + }; + + union ColorDifferenceEntry { + u32 raw; + BitField<0, 8, s32> r; // half of the difference between two ColorEntry + BitField<8, 8, s32> g; + BitField<16, 8, s32> b; + BitField<24, 8, s32> a; + + Common::Vec4 ToVector() const { + return Common::Vec4{r, g, b, a} * 2; + } + }; + + std::array noise_table; + std::array color_map_table; + std::array alpha_map_table; + std::array color_table; + std::array color_diff_table; + + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& boost::serialization::make_binary_object(this, sizeof(ProcTex)); + } + }; + + struct Lighting { + union LutEntry { + // Used for raw access + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField<0, 12, u32> value; // 0.0.12 fixed point + + // Used for efficient interpolation. + BitField<12, 11, u32> difference; // 0.0.11 fixed point + BitField<23, 1, u32> neg_difference; + + f32 ToFloat() const { + return static_cast(value) / 4095.f; + } + + f32 DiffToFloat() const { + const f32 diff = static_cast(difference) / 2047.f; + return neg_difference ? -diff : diff; + } + + template + void serialize(Archive& ar, const u32 file_version) { + ar& raw; + } + }; + + std::array, 24> luts; + + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& boost::serialization::make_binary_object(this, sizeof(Lighting)); + } + }; + + struct Fog { + union LutEntry { + // Used for raw access + u32 raw; + + BitField<0, 13, s32> difference; // 1.1.11 fixed point + BitField<13, 11, u32> value; // 0.0.11 fixed point + + f32 ToFloat() const { + return static_cast(value) / 2047.0f; + } + + f32 DiffToFloat() const { + return static_cast(difference) / 2047.0f; + } + }; + + std::array lut; + + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& boost::serialization::make_binary_object(this, sizeof(Fog)); + } + }; + + RegsLcd regs_lcd{}; + Regs regs{}; + // TODO: Move these to a separate shader scheduler class + GeometryShaderUnit gs_unit; + ShaderSetup vs_setup; + ShaderSetup gs_setup; + ProcTex proctex{}; + Lighting lighting{}; + Fog fog{}; + AttributeBuffer input_default_attributes{}; + ImmediateModeState immediate{}; + +private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& regs_lcd; + ar& regs.reg_array; + ar& gs_unit; + ar& vs_setup; + ar& gs_setup; + ar& proctex; + ar& lighting; + ar& fog; + ar& input_default_attributes; + ar& immediate; + ar& geometry_pipeline; + ar& primitive_assembler; + ar& cmd_list; + } + +private: + Memory::MemorySystem& memory; + VideoCore::RasterizerInterface* rasterizer; + DebugContext& debug_context; + Service::GSP::InterruptHandler signal_interrupt; + GeometryPipeline geometry_pipeline; + PrimitiveAssembler primitive_assembler; + CommandList cmd_list; + std::unique_ptr shader_engine; +}; + +#define GPU_REG_INDEX(field_name) (offsetof(Pica::PicaCore::Regs, field_name) / sizeof(u32)) + +} // namespace Pica diff --git a/src/video_core/pica/primitive_assembly.cpp b/src/video_core/pica/primitive_assembly.cpp new file mode 100644 index 000000000..3162b7ddf --- /dev/null +++ b/src/video_core/pica/primitive_assembly.cpp @@ -0,0 +1,53 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "video_core/pica/primitive_assembly.h" + +namespace Pica { + +PrimitiveAssembler::PrimitiveAssembler(PipelineRegs::TriangleTopology topology) + : topology(topology) {} + +void PrimitiveAssembler::SubmitVertex(const OutputVertex& vtx, + const TriangleHandler& triangle_handler) { + switch (topology) { + case PipelineRegs::TriangleTopology::List: + case PipelineRegs::TriangleTopology::Shader: + if (buffer_index < 2) { + buffer[buffer_index++] = vtx; + } else { + buffer_index = 0; + if (topology == PipelineRegs::TriangleTopology::Shader && winding) { + triangle_handler(buffer[1], buffer[0], vtx); + winding = false; + } else { + triangle_handler(buffer[0], buffer[1], vtx); + } + } + break; + + case PipelineRegs::TriangleTopology::Strip: + case PipelineRegs::TriangleTopology::Fan: + if (strip_ready) { + triangle_handler(buffer[0], buffer[1], vtx); + } + + buffer[buffer_index] = vtx; + strip_ready |= (buffer_index == 1); + + if (topology == PipelineRegs::TriangleTopology::Strip) { + buffer_index = !buffer_index; + } else if (topology == PipelineRegs::TriangleTopology::Fan) { + buffer_index = 1; + } + break; + + default: + LOG_ERROR(HW_GPU, "Unknown triangle topology {:x}:", (int)topology); + break; + } +} + +} // namespace Pica diff --git a/src/video_core/primitive_assembly.h b/src/video_core/pica/primitive_assembly.h similarity index 69% rename from src/video_core/primitive_assembly.h rename to src/video_core/pica/primitive_assembly.h index c98976a7d..dc900749e 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/pica/primitive_assembly.h @@ -8,61 +8,73 @@ #include #include #include -#include "video_core/regs_pipeline.h" +#include "video_core/pica/output_vertex.h" +#include "video_core/pica/regs_pipeline.h" namespace Pica { -/* +/** * Utility class to build triangles from a series of vertices, * according to a given triangle topology. */ -template struct PrimitiveAssembler { using TriangleHandler = - std::function; + std::function; explicit PrimitiveAssembler( PipelineRegs::TriangleTopology topology = PipelineRegs::TriangleTopology::List); - /* + /** * Queues a vertex, builds primitives from the vertex queue according to the given * triangle topology, and calls triangle_handler for each generated primitive. * NOTE: We could specify the triangle handler in the constructor, but this way we can * keep event and handler code next to each other. */ - void SubmitVertex(const VertexType& vtx, const TriangleHandler& triangle_handler); + void SubmitVertex(const OutputVertex& vtx, const TriangleHandler& triangle_handler); /** * Invert the vertex order of the next triangle. Called by geometry shader emitter. * This only takes effect for TriangleTopology::Shader. */ - void SetWinding(); + void SetWinding() noexcept { + winding = true; + } /** * Resets the internal state of the PrimitiveAssembler. */ - void Reset(); + void Reset() { + buffer_index = 0; + strip_ready = false; + winding = false; + } /** * Reconfigures the PrimitiveAssembler to use a different triangle topology. */ - void Reconfigure(PipelineRegs::TriangleTopology topology); + void Reconfigure(PipelineRegs::TriangleTopology topology) { + Reset(); + this->topology = topology; + } /** * Returns whether the PrimitiveAssembler has an empty internal buffer. */ - bool IsEmpty() const; + bool IsEmpty() const { + return buffer_index == 0 && !strip_ready; + } /** * Returns the current topology. */ - PipelineRegs::TriangleTopology GetTopology() const; + PipelineRegs::TriangleTopology GetTopology() const { + return topology; + } private: PipelineRegs::TriangleTopology topology; - int buffer_index = 0; - std::array buffer; + std::array buffer; bool strip_ready = false; bool winding = false; @@ -70,7 +82,7 @@ private: void serialize(Archive& ar, const unsigned int version) { ar& topology; ar& buffer_index; - ar& boost::serialization::make_array(buffer.data(), buffer.size()); + ar& buffer; ar& strip_ready; ar& winding; } diff --git a/src/video_core/pica/regs_external.h b/src/video_core/pica/regs_external.h new file mode 100644 index 000000000..1a433a696 --- /dev/null +++ b/src/video_core/pica/regs_external.h @@ -0,0 +1,217 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" + +namespace Pica { + +/** + * Most physical addresses which GPU registers refer to are 8-byte aligned. + * This function should be used to get the address from a raw register value. + */ +constexpr u32 DecodeAddressRegister(u32 register_value) { + return register_value * 8; +} + +/// Components are laid out in reverse byte order, most significant bits first. +enum class PixelFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGB565 = 2, + RGB5A1 = 3, + RGBA4 = 4, +}; + +constexpr u32 BytesPerPixel(Pica::PixelFormat format) { + switch (format) { + case Pica::PixelFormat::RGBA8: + return 4; + case Pica::PixelFormat::RGB8: + return 3; + case Pica::PixelFormat::RGB565: + case Pica::PixelFormat::RGB5A1: + case Pica::PixelFormat::RGBA4: + return 2; + default: + UNREACHABLE(); + } + + return 0; +} + +struct MemoryFillConfig { + u32 address_start; + u32 address_end; + + union { + u32 value_32bit; + + BitField<0, 16, u32> value_16bit; + + // TODO: Verify component order + BitField<0, 8, u32> value_24bit_r; + BitField<8, 8, u32> value_24bit_g; + BitField<16, 8, u32> value_24bit_b; + }; + + union { + u32 control; + + // Setting this field to 1 triggers the memory fill. + // This field also acts as a status flag, and gets reset to 0 upon completion. + BitField<0, 1, u32> trigger; + // Set to 1 upon completion. + BitField<1, 1, u32> finished; + // If both of these bits are unset, then it will fill the memory with a 16 bit value + // 1: fill with 24-bit wide values + BitField<8, 1, u32> fill_24bit; + // 1: fill with 32-bit wide values + BitField<9, 1, u32> fill_32bit; + }; + + inline u32 GetStartAddress() const { + return DecodeAddressRegister(address_start); + } + + inline u32 GetEndAddress() const { + return DecodeAddressRegister(address_end); + } + + inline std::string DebugName() const { + return fmt::format("from {:#X} to {:#X} with {}-bit value {:#X}", GetStartAddress(), + GetEndAddress(), fill_32bit ? "32" : (fill_24bit ? "24" : "16"), + value_32bit); + } +}; +static_assert(sizeof(MemoryFillConfig) == 0x10); + +struct FramebufferConfig { + INSERT_PADDING_WORDS(0x17); + + union { + u32 size; + + BitField<0, 16, u32> width; + BitField<16, 16, u32> height; + }; + + INSERT_PADDING_WORDS(0x2); + + u32 address_left1; + u32 address_left2; + + union { + u32 format; + + BitField<0, 3, PixelFormat> color_format; + }; + + INSERT_PADDING_WORDS(0x1); + + union { + u32 active_fb; + + // 0: Use parameters ending with "1" + // 1: Use parameters ending with "2" + BitField<0, 1, u32> second_fb_active; + }; + + INSERT_PADDING_WORDS(0x5); + + // Distance between two pixel rows, in bytes + u32 stride; + + u32 address_right1; + u32 address_right2; + + INSERT_PADDING_WORDS(0x19); +}; +static_assert(sizeof(FramebufferConfig) == 0x100); + +struct DisplayTransferConfig { + u32 input_address; + u32 output_address; + + inline u32 GetPhysicalInputAddress() const { + return DecodeAddressRegister(input_address); + } + + inline u32 GetPhysicalOutputAddress() const { + return DecodeAddressRegister(output_address); + } + + inline std::string DebugName() const noexcept { + return fmt::format("from {:#x} to {:#x} with {} scaling and stride {}, width {}", + GetPhysicalInputAddress(), GetPhysicalOutputAddress(), + scaling == NoScale ? "no" : (scaling == ScaleX ? "X" : "XY"), + input_width.Value(), output_width.Value()); + } + + union { + u32 output_size; + + BitField<0, 16, u32> output_width; + BitField<16, 16, u32> output_height; + }; + + union { + u32 input_size; + + BitField<0, 16, u32> input_width; + BitField<16, 16, u32> input_height; + }; + + enum ScalingMode : u32 { + NoScale = 0, // Doesn't scale the image + ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter + ScaleXY = + 2, // Downscales the image in half in both the X and Y axes and applies a box filter + }; + + union { + u32 flags; + + BitField<0, 1, u32> flip_vertically; // flips input data vertically + BitField<1, 1, u32> input_linear; // Converts from linear to tiled format + BitField<2, 1, u32> crop_input_lines; + BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any + // processing and respecting texture copy fields + BitField<5, 1, u32> dont_swizzle; + BitField<8, 3, PixelFormat> input_format; + BitField<12, 3, PixelFormat> output_format; + /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. + BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented + BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer + }; + + INSERT_PADDING_WORDS(0x1); + + // it seems that writing to this field triggers the display transfer + BitField<0, 1, u32> trigger; + + INSERT_PADDING_WORDS(0x1); + + struct { + u32 size; // The lower 4 bits are ignored + + union { + u32 input_size; + + BitField<0, 16, u32> input_width; + BitField<16, 16, u32> input_gap; + }; + + union { + u32 output_size; + + BitField<0, 16, u32> output_width; + BitField<16, 16, u32> output_gap; + }; + } texture_copy; +}; +static_assert(sizeof(DisplayTransferConfig) == 0x2c); + +} // namespace Pica diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/pica/regs_framebuffer.h similarity index 100% rename from src/video_core/regs_framebuffer.h rename to src/video_core/pica/regs_framebuffer.h diff --git a/src/video_core/regs.cpp b/src/video_core/pica/regs_internal.cpp similarity index 97% rename from src/video_core/regs.cpp rename to src/video_core/pica/regs_internal.cpp index 2699e710a..9a528c654 100644 --- a/src/video_core/regs.cpp +++ b/src/video_core/pica/regs_internal.cpp @@ -7,11 +7,11 @@ #include #include "common/common_types.h" -#include "video_core/regs.h" +#include "video_core/pica/regs_internal.h" namespace Pica { -static const std::pair register_names[] = { +static constexpr std::pair register_names[] = { {0x010, "GPUREG_FINALIZE"}, {0x040, "GPUREG_FACECULLING_CONFIG"}, @@ -474,15 +474,15 @@ static const std::pair register_names[] = { {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"}, }; -const char* Regs::GetRegisterName(u16 index) { - auto found = std::lower_bound(std::begin(register_names), std::end(register_names), index, - [](auto p, auto i) { return p.first < i; }); - if (found->first == index) { - return found->second; - } else { - // Return empty string if no match is found - return ""; +const char* RegsInternal::GetRegisterName(u16 index) { + const auto it = std::lower_bound(std::begin(register_names), std::end(register_names), index, + [](auto p, auto i) { return p.first < i; }); + if (it->first == index) { + return it->second; } + + // Return empty string if no match is found + return ""; } } // namespace Pica diff --git a/src/video_core/regs.h b/src/video_core/pica/regs_internal.h similarity index 87% rename from src/video_core/regs.h rename to src/video_core/pica/regs_internal.h index c81a4ee23..1634ec92c 100644 --- a/src/video_core/regs.h +++ b/src/video_core/pica/regs_internal.h @@ -3,18 +3,19 @@ // Refer to the license.txt file included. #pragma once -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_lighting.h" -#include "video_core/regs_pipeline.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_shader.h" -#include "video_core/regs_texturing.h" + +#include "video_core/pica/regs_framebuffer.h" +#include "video_core/pica/regs_lighting.h" +#include "video_core/pica/regs_pipeline.h" +#include "video_core/pica/regs_rasterizer.h" +#include "video_core/pica/regs_shader.h" +#include "video_core/pica/regs_texturing.h" namespace Pica { -#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32)) +#define PICA_REG_INDEX(field_name) (offsetof(Pica::RegsInternal, field_name) / sizeof(u32)) -struct Regs { +struct RegsInternal { static constexpr std::size_t NUM_REGS = 0x300; union { @@ -38,10 +39,11 @@ struct Regs { static const char* GetRegisterName(u16 index); }; -static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Regs struct has wrong size"); +static_assert(sizeof(RegsInternal) == RegsInternal::NUM_REGS * sizeof(u32), + "Regs struct has wrong size"); #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ + static_assert(offsetof(RegsInternal, field_name) == position * 4, \ "Field " #field_name " has invalid position") ASSERT_REG_POSITION(trigger_irq, 0x10); @@ -110,4 +112,5 @@ ASSERT_REG_POSITION(gs, 0x280); ASSERT_REG_POSITION(vs, 0x2b0); #undef ASSERT_REG_POSITION + } // namespace Pica diff --git a/src/core/hw/lcd.h b/src/video_core/pica/regs_lcd.h similarity index 64% rename from src/core/hw/lcd.h rename to src/video_core/pica/regs_lcd.h index 8e681609b..7848b0aad 100644 --- a/src/core/hw/lcd.h +++ b/src/video_core/pica/regs_lcd.h @@ -1,45 +1,46 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2023 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once -#include -#include #include + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/vector_math.h" -#define LCD_REG_INDEX(field_name) (offsetof(LCD::Regs, field_name) / sizeof(u32)) +#define LCD_REG_INDEX(field_name) (offsetof(Pica::RegsLcd, field_name) / sizeof(u32)) -namespace LCD { +namespace Pica { -struct Regs { +union ColorFill { + u32 raw; - union ColorFill { - u32 raw; + BitField<0, 8, u32> color_r; + BitField<8, 8, u32> color_g; + BitField<16, 8, u32> color_b; + BitField<24, 1, u32> is_enabled; - BitField<0, 8, u32> color_r; - BitField<8, 8, u32> color_g; - BitField<16, 8, u32> color_b; - BitField<24, 1, u32> is_enabled; - }; + Common::Vec3 AsVector() const noexcept { + return Common::MakeVec(color_r, color_g, color_b); + } +}; +struct RegsLcd { INSERT_PADDING_WORDS(0x81); ColorFill color_fill_top; INSERT_PADDING_WORDS(0xE); u32 backlight_top; - INSERT_PADDING_WORDS(0x1F0); - ColorFill color_fill_bottom; INSERT_PADDING_WORDS(0xE); u32 backlight_bottom; INSERT_PADDING_WORDS(0x16F); static constexpr std::size_t NumIds() { - return sizeof(Regs) / sizeof(u32); + return sizeof(RegsLcd) / sizeof(u32); } const u32& operator[](int index) const { @@ -62,28 +63,16 @@ private: } friend class boost::serialization::access; }; -static_assert(std::is_standard_layout::value, "Structure does not use standard layout"); +static_assert(std::is_standard_layout_v, "Structure does not use standard layout"); #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ + static_assert(offsetof(RegsLcd, field_name) == position * 4, \ "Field " #field_name " has invalid position") ASSERT_REG_POSITION(color_fill_top, 0x81); ASSERT_REG_POSITION(backlight_top, 0x90); ASSERT_REG_POSITION(color_fill_bottom, 0x281); ASSERT_REG_POSITION(backlight_bottom, 0x290); -extern Regs g_regs; +#undef ASSERT_REG_POSITION -template -void Read(T& var, const u32 addr); - -template -void Write(u32 addr, const T data); - -/// Initialize hardware -void Init(); - -/// Shutdown hardware -void Shutdown(); - -} // namespace LCD +} // namespace Pica diff --git a/src/video_core/regs_lighting.h b/src/video_core/pica/regs_lighting.h similarity index 96% rename from src/video_core/regs_lighting.h rename to src/video_core/pica/regs_lighting.h index 7520c48b9..baad5cb4a 100644 --- a/src/video_core/regs_lighting.h +++ b/src/video_core/pica/regs_lighting.h @@ -26,16 +26,16 @@ struct LightingRegs { DistanceAttenuation = 16, }; - static constexpr unsigned NumLightingSampler = 24; + static constexpr u32 NumLightingSampler = 24; - static LightingSampler SpotlightAttenuationSampler(unsigned index) { + static LightingSampler SpotlightAttenuationSampler(u32 index) { return static_cast( - static_cast(LightingSampler::SpotlightAttenuation) + index); + static_cast(LightingSampler::SpotlightAttenuation) + index); } - static LightingSampler DistanceAttenuationSampler(unsigned index) { - return static_cast( - static_cast(LightingSampler::DistanceAttenuation) + index); + static LightingSampler DistanceAttenuationSampler(u32 index) { + return static_cast(static_cast(LightingSampler::DistanceAttenuation) + + index); } /** diff --git a/src/video_core/regs_pipeline.h b/src/video_core/pica/regs_pipeline.h similarity index 94% rename from src/video_core/regs_pipeline.h rename to src/video_core/pica/regs_pipeline.h index 8ffce9dee..6bfd96c89 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/pica/regs_pipeline.h @@ -20,6 +20,20 @@ struct PipelineRegs { FLOAT = 3, }; + static u32 GetFormatBytes(VertexAttributeFormat format) { + switch (format) { + case VertexAttributeFormat::FLOAT: + return 4; + case VertexAttributeFormat::SHORT: + return 2; + case VertexAttributeFormat::BYTE: + case VertexAttributeFormat::UBYTE: + return 1; + default: + UNREACHABLE(); + } + } + struct { BitField<1, 28, u32> base_address; @@ -194,14 +208,14 @@ struct PipelineRegs { BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to - unsigned GetSize(unsigned index) const { + u32 GetSize(u32 index) const { ASSERT(index < 2); return 8 * size[index]; } - PAddr GetPhysicalAddress(unsigned index) const { + PAddr GetPhysicalAddress(u32 index) const { ASSERT(index < 2); - return (PAddr)(8 * addr[index]); + return 8 * addr[index]; } } command_buffer; diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/pica/regs_rasterizer.h similarity index 88% rename from src/video_core/regs_rasterizer.h rename to src/video_core/pica/regs_rasterizer.h index f605d2cd8..7b88231f9 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/pica/regs_rasterizer.h @@ -104,6 +104,17 @@ struct RasterizerRegs { u32 raw; } vs_output_attributes[7]; + void ValidateSemantics() { + for (std::size_t attrib = 0; attrib < vs_output_total; ++attrib) { + const u32 output_register_map = vs_output_attributes[attrib].raw; + for (std::size_t comp = 0; comp < 4; ++comp) { + const u32 semantic = (output_register_map >> (8 * comp)) & 0x1F; + ASSERT_MSG(semantic < 24 || semantic == VSOutputAttributes::INVALID, + "Invalid/unknown semantic id: {}", semantic); + } + } + } + INSERT_PADDING_WORDS(0xe); enum class ScissorMode : u32 { diff --git a/src/video_core/regs_shader.h b/src/video_core/pica/regs_shader.h similarity index 89% rename from src/video_core/regs_shader.h rename to src/video_core/pica/regs_shader.h index 71aa9d11a..f9e47d6d1 100644 --- a/src/video_core/regs_shader.h +++ b/src/video_core/pica/regs_shader.h @@ -4,11 +4,10 @@ #pragma once -#include - #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/vector_math.h" namespace Pica { @@ -22,6 +21,11 @@ struct ShaderRegs { BitField<24, 8, u32> w; } int_uniforms[4]; + Common::Vec4 GetIntUniform(u32 index) const { + const auto& values = int_uniforms[index]; + return Common::MakeVec(values.x, values.y, values.z, values.w); + } + INSERT_PADDING_WORDS(0x4); enum ShaderMode { @@ -55,13 +59,13 @@ struct ShaderRegs { INSERT_PADDING_WORDS(0x2); struct { - enum Format : u32 { - FLOAT24 = 0, - FLOAT32 = 1, + enum class Format : u32 { + Float24 = 0, + Float32 = 1, }; bool IsFloat32() const { - return format == FLOAT32; + return format == Format::Float32; } union { @@ -70,7 +74,6 @@ struct ShaderRegs { // indices // TODO: Maybe the uppermost index is for the geometry shader? Investigate! BitField<0, 7, u32> index; - BitField<31, 1, Format> format; }; diff --git a/src/video_core/regs_texturing.h b/src/video_core/pica/regs_texturing.h similarity index 100% rename from src/video_core/regs_texturing.h rename to src/video_core/pica/regs_texturing.h diff --git a/src/video_core/pica/shader_setup.cpp b/src/video_core/pica/shader_setup.cpp new file mode 100644 index 000000000..967292c9f --- /dev/null +++ b/src/video_core/pica/shader_setup.cpp @@ -0,0 +1,61 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_set.h" +#include "common/hash.h" +#include "video_core/pica/regs_shader.h" +#include "video_core/pica/shader_setup.h" + +namespace Pica { + +ShaderSetup::ShaderSetup() = default; + +ShaderSetup::~ShaderSetup() = default; + +void ShaderSetup::WriteUniformBoolReg(u32 value) { + const auto bits = BitSet32(value); + for (u32 i = 0; i < uniforms.b.size(); ++i) { + uniforms.b[i] = bits[i]; + } +} + +void ShaderSetup::WriteUniformIntReg(u32 index, const Common::Vec4 values) { + ASSERT(index < uniforms.i.size()); + uniforms.i[index] = values; +} + +void ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 value) { + auto& uniform_setup = config.uniform_setup; + const bool is_float32 = uniform_setup.IsFloat32(); + if (!uniform_queue.Push(value, is_float32)) { + return; + } + + const auto uniform = uniform_queue.Get(is_float32); + if (uniform_setup.index >= uniforms.f.size()) { + LOG_ERROR(HW_GPU, "Invalid float uniform index {}", uniform_setup.index.Value()); + return; + } + + uniforms.f[uniform_setup.index] = uniform; + uniform_setup.index.Assign(uniform_setup.index + 1); +} + +u64 ShaderSetup::GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; +} + +u64 ShaderSetup::GetSwizzleDataHash() { + if (swizzle_data_hash_dirty) { + swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + swizzle_data_hash_dirty = false; + } + return swizzle_data_hash; +} + +} // namespace Pica diff --git a/src/video_core/pica/shader_setup.h b/src/video_core/pica/shader_setup.h new file mode 100644 index 000000000..f26843838 --- /dev/null +++ b/src/video_core/pica/shader_setup.h @@ -0,0 +1,103 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "video_core/pica/packed_attribute.h" +#include "video_core/pica_types.h" + +namespace Pica { + +constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096; +constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096; + +using ProgramCode = std::array; +using SwizzleData = std::array; + +struct Uniforms { + alignas(16) std::array, 96> f; + std::array b; + std::array, 4> i; + + static size_t GetFloatUniformOffset(u32 index) { + return offsetof(Uniforms, f) + index * sizeof(Common::Vec4); + } + + static size_t GetBoolUniformOffset(u32 index) { + return offsetof(Uniforms, b) + index * sizeof(bool); + } + + static size_t GetIntUniformOffset(u32 index) { + return offsetof(Uniforms, i) + index * sizeof(Common::Vec4); + } + +private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& f; + ar& b; + ar& i; + } +}; + +struct ShaderRegs; + +/** + * This structure contains the state information common for all shader units such as uniforms. + * The geometry shaders has a unique configuration so when enabled it has its own setup. + */ +struct ShaderSetup { +public: + explicit ShaderSetup(); + ~ShaderSetup(); + + void WriteUniformBoolReg(u32 value); + + void WriteUniformIntReg(u32 index, const Common::Vec4 values); + + void WriteUniformFloatReg(ShaderRegs& config, u32 value); + + u64 GetProgramCodeHash(); + + u64 GetSwizzleDataHash(); + + void MarkProgramCodeDirty() { + program_code_hash_dirty = true; + } + + void MarkSwizzleDataDirty() { + swizzle_data_hash_dirty = true; + } + +public: + Uniforms uniforms; + PackedAttribute uniform_queue; + ProgramCode program_code; + SwizzleData swizzle_data; + u32 entry_point; + const void* cached_shader{}; + +private: + bool program_code_hash_dirty{true}; + bool swizzle_data_hash_dirty{true}; + u64 program_code_hash{0xDEADC0DE}; + u64 swizzle_data_hash{0xDEADC0DE}; + + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& uniforms; + ar& uniform_queue; + ar& program_code; + ar& swizzle_data; + ar& program_code_hash_dirty; + ar& swizzle_data_hash_dirty; + ar& program_code_hash; + ar& swizzle_data_hash; + } +}; + +} // namespace Pica diff --git a/src/video_core/pica/shader_unit.cpp b/src/video_core/pica/shader_unit.cpp new file mode 100644 index 000000000..7cee1131d --- /dev/null +++ b/src/video_core/pica/shader_unit.cpp @@ -0,0 +1,63 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_set.h" +#include "video_core/pica/regs_shader.h" +#include "video_core/pica/shader_unit.h" + +namespace Pica { + +ShaderUnit::ShaderUnit(GeometryEmitter* emitter) : emitter_ptr{emitter} {} + +ShaderUnit::~ShaderUnit() = default; + +void ShaderUnit::LoadInput(const ShaderRegs& config, const AttributeBuffer& buffer) { + const u32 max_attribute = config.max_input_attribute_index; + for (u32 attr = 0; attr <= max_attribute; ++attr) { + const u32 reg = config.GetRegisterForAttribute(attr); + input[reg] = buffer[attr]; + } +} + +void ShaderUnit::WriteOutput(const ShaderRegs& config, AttributeBuffer& buffer) { + u32 output_index{}; + for (u32 reg : Common::BitSet(config.output_mask)) { + buffer[output_index++] = output[reg]; + } +} + +void GeometryEmitter::Emit(std::span, 16> output_regs) { + ASSERT(vertex_id < 3); + + u32 output_index{}; + for (u32 reg : Common::BitSet(output_mask)) { + buffer[vertex_id][output_index++] = output_regs[reg]; + } + + if (prim_emit) { + if (winding) { + handlers->winding_setter(); + } + for (std::size_t i = 0; i < buffer.size(); ++i) { + handlers->vertex_handler(buffer[i]); + } + } +} + +GeometryShaderUnit::GeometryShaderUnit() : ShaderUnit{&emitter} {} + +GeometryShaderUnit::~GeometryShaderUnit() = default; + +void GeometryShaderUnit::SetVertexHandlers(VertexHandler vertex_handler, + WindingSetter winding_setter) { + emitter.handlers = new Handlers; + emitter.handlers->vertex_handler = vertex_handler; + emitter.handlers->winding_setter = winding_setter; +} + +void GeometryShaderUnit::ConfigOutput(const ShaderRegs& config) { + emitter.output_mask = config.output_mask; +} + +} // namespace Pica diff --git a/src/video_core/pica/shader_unit.h b/src/video_core/pica/shader_unit.h new file mode 100644 index 000000000..968b00b90 --- /dev/null +++ b/src/video_core/pica/shader_unit.h @@ -0,0 +1,120 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "video_core/pica/output_vertex.h" + +namespace Pica { + +/// Handler type for receiving vertex outputs from vertex shader or geometry shader +using VertexHandler = std::function; + +/// Handler type for signaling to invert the vertex order of the next triangle +using WindingSetter = std::function; + +struct ShaderRegs; +struct GeometryEmitter; + +/** + * This structure contains the state information that needs to be unique for a shader unit. The 3DS + * has four shader units that process shaders in parallel. + */ +struct ShaderUnit { + explicit ShaderUnit(GeometryEmitter* emitter = nullptr); + ~ShaderUnit(); + + void LoadInput(const ShaderRegs& config, const AttributeBuffer& input); + + void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); + + static constexpr size_t InputOffset(s32 register_index) { + return offsetof(ShaderUnit, input) + register_index * sizeof(Common::Vec4); + } + + static constexpr size_t OutputOffset(s32 register_index) { + return offsetof(ShaderUnit, output) + register_index * sizeof(Common::Vec4); + } + + static constexpr size_t TemporaryOffset(s32 register_index) { + return offsetof(ShaderUnit, temporary) + register_index * sizeof(Common::Vec4); + } + +public: + s32 address_registers[3]; + bool conditional_code[2]; + alignas(16) std::array, 16> input; + alignas(16) std::array, 16> temporary; + alignas(16) std::array, 16> output; + GeometryEmitter* emitter_ptr; + +private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& input; + ar& temporary; + ar& output; + ar& conditional_code; + ar& address_registers; + } +}; + +struct Handlers { + VertexHandler vertex_handler; + WindingSetter winding_setter; +}; + +/// This structure contains state information for primitive emitting in geometry shader. +struct GeometryEmitter { + void Emit(std::span, 16> output_regs); + +public: + std::array buffer; + u8 vertex_id; + bool prim_emit; + bool winding; + u32 output_mask; + Handlers* handlers; + +private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& buffer; + ar& vertex_id; + ar& prim_emit; + ar& winding; + ar& output_mask; + } +}; + +/** + * This is an extended shader unit state that represents the special unit that can run both vertex + * shader and geometry shader. It contains an additional primitive emitter and utilities for + * geometry shader. + */ +struct GeometryShaderUnit : public ShaderUnit { + GeometryShaderUnit(); + ~GeometryShaderUnit(); + + void SetVertexHandlers(VertexHandler vertex_handler, WindingSetter winding_setter); + void ConfigOutput(const ShaderRegs& config); + + GeometryEmitter emitter; + +private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const u32 file_version) { + ar& boost::serialization::base_object(*this); + ar& emitter; + } +}; + +} // namespace Pica diff --git a/src/video_core/pica/vertex_loader.cpp b/src/video_core/pica/vertex_loader.cpp new file mode 100644 index 000000000..948ac2817 --- /dev/null +++ b/src/video_core/pica/vertex_loader.cpp @@ -0,0 +1,109 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/logging/log.h" +#include "video_core/pica/vertex_loader.h" + +namespace Pica { + +VertexLoader::VertexLoader(Memory::MemorySystem& memory_, const PipelineRegs& regs) + : memory{memory_} { + const auto& attribute_config = regs.vertex_attributes; + num_total_attributes = attribute_config.GetNumTotalAttributes(); + + vertex_attribute_sources.fill(0xdeadbeef); + + for (u32 i = 0; i < 16; i++) { + vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); + } + + // Setup attribute data from loaders + for (u32 loader = 0; loader < 12; ++loader) { + const auto& loader_config = attribute_config.attribute_loaders[loader]; + + u32 offset = 0; + + // TODO: What happens if a loader overwrites a previous one's data? + for (u32 component = 0; component < loader_config.component_count; ++component) { + if (component >= 12) { + LOG_ERROR(HW_GPU, + "Overflow in the vertex attribute loader {} trying to load component {}", + loader, component); + continue; + } + + u32 attribute_index = loader_config.GetComponent(component); + if (attribute_index < 12) { + offset = Common::AlignUp(offset, + attribute_config.GetElementSizeInBytes(attribute_index)); + vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; + vertex_attribute_strides[attribute_index] = + static_cast(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = + attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = + attribute_config.GetNumElements(attribute_index); + offset += attribute_config.GetStride(attribute_index); + } else if (attribute_index < 16) { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, + // respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } else { + UNREACHABLE(); // This is truly unreachable due to the number of bits for each + // component + } + } + } +} + +VertexLoader::~VertexLoader() = default; + +void VertexLoader::LoadVertex(PAddr base_address, u32 index, u32 vertex, AttributeBuffer& input, + AttributeBuffer& input_default_attributes) const { + for (s32 i = 0; i < num_total_attributes; ++i) { + // Load the default attribute if we're configured to do so + if (vertex_attribute_is_default[i]) { + input[i] = input_default_attributes[i]; + continue; + } + + // TODO(yuriks): In this case, no data gets loaded and the vertex + // remains with the last value it had. This isn't currently maintained + // as global state, however, and so won't work in Citra yet. + if (vertex_attribute_elements[i] == 0) { + LOG_ERROR(HW_GPU, "Vertex retension unimplemented"); + continue; + } + + // Load per-vertex data from the loader arrays + const PAddr source_addr = + base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; + + switch (vertex_attribute_formats[i]) { + case PipelineRegs::VertexAttributeFormat::BYTE: + LoadAttribute(source_addr, i, input); + break; + case PipelineRegs::VertexAttributeFormat::UBYTE: + LoadAttribute(source_addr, i, input); + break; + case PipelineRegs::VertexAttributeFormat::SHORT: + LoadAttribute(source_addr, i, input); + break; + case PipelineRegs::VertexAttributeFormat::FLOAT: + LoadAttribute(source_addr, i, input); + break; + } + + // Default attribute values set if array elements have < 4 components. This + // is *not* carried over from the default attribute settings even if they're + // enabled for this attribute. + for (u32 comp = vertex_attribute_elements[i]; comp < 4; comp++) { + input[i][comp] = comp == 3 ? f24::One() : f24::Zero(); + } + } +} + +} // namespace Pica diff --git a/src/video_core/pica/vertex_loader.h b/src/video_core/pica/vertex_loader.h new file mode 100644 index 000000000..7960600ec --- /dev/null +++ b/src/video_core/pica/vertex_loader.h @@ -0,0 +1,47 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/memory.h" +#include "video_core/pica/output_vertex.h" +#include "video_core/pica/regs_pipeline.h" + +namespace Memory { +class MemorySystem; +} + +namespace Pica { + +class VertexLoader { +public: + explicit VertexLoader(Memory::MemorySystem& memory_, const PipelineRegs& regs); + ~VertexLoader(); + + void LoadVertex(PAddr base_address, u32 index, u32 vertex, AttributeBuffer& input, + AttributeBuffer& input_default_attributes) const; + + template + void LoadAttribute(PAddr source_addr, u32 attrib, AttributeBuffer& out) const { + const T* data = reinterpret_cast(memory.GetPhysicalPointer(source_addr)); + for (u32 comp = 0; comp < vertex_attribute_elements[attrib]; ++comp) { + out[attrib][comp] = f24::FromFloat32(data[comp]); + } + } + + int GetNumTotalAttributes() const { + return num_total_attributes; + } + +private: + Memory::MemorySystem& memory; + std::array vertex_attribute_sources; + std::array vertex_attribute_strides{}; + std::array vertex_attribute_formats; + std::array vertex_attribute_elements{}; + std::array vertex_attribute_is_default; + int num_total_attributes = 0; +}; + +} // namespace Pica diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h deleted file mode 100644 index 0cb78ea8e..000000000 --- a/src/video_core/pica_state.h +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright 2016 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/vector_math.h" -#include "core/memory.h" -#include "video_core/geometry_pipeline.h" -#include "video_core/primitive_assembly.h" -#include "video_core/regs.h" -#include "video_core/shader/shader.h" -#include "video_core/video_core.h" - -// Boost::serialization doesn't like union types for some reason, -// so we need to mark arrays of union values with a special serialization method -template -struct UnionArray : public std::array { -private: - template - void serialize(Archive& ar, const unsigned int) { - static_assert(sizeof(Value) == sizeof(u32)); - ar&* static_cast(static_cast(this->data())); - } - friend class boost::serialization::access; -}; - -namespace Pica { - -/// Struct used to describe current Pica state -struct State { - State(); - void Reset(); - - /// Pica registers - Regs regs; - - Shader::ShaderSetup vs; - Shader::ShaderSetup gs; - - Shader::AttributeBuffer input_default_attributes; - - struct ProcTex { - union ValueEntry { - u32 raw; - - // LUT value, encoded as 12-bit fixed point, with 12 fraction bits - BitField<0, 12, u32> value; // 0.0.12 fixed point - - // Difference between two entry values. Used for efficient interpolation. - // 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5). - // Note: the type of this is different from the one of lighting LUT - BitField<12, 12, s32> difference; - - float ToFloat() const { - return static_cast(value) / 4095.f; - } - - float DiffToFloat() const { - return static_cast(difference) / 4095.f; - } - }; - - union ColorEntry { - u32 raw; - BitField<0, 8, u32> r; - BitField<8, 8, u32> g; - BitField<16, 8, u32> b; - BitField<24, 8, u32> a; - - Common::Vec4 ToVector() const { - return {static_cast(r), static_cast(g), static_cast(b), - static_cast(a)}; - } - }; - - union ColorDifferenceEntry { - u32 raw; - BitField<0, 8, s32> r; // half of the difference between two ColorEntry - BitField<8, 8, s32> g; - BitField<16, 8, s32> b; - BitField<24, 8, s32> a; - - Common::Vec4 ToVector() const { - return Common::Vec4{r, g, b, a} * 2; - } - }; - - UnionArray noise_table; - UnionArray color_map_table; - UnionArray alpha_map_table; - UnionArray color_table; - UnionArray color_diff_table; - - private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int file_version) { - ar& noise_table; - ar& color_map_table; - ar& alpha_map_table; - ar& color_table; - ar& color_diff_table; - } - } proctex; - - struct Lighting { - union LutEntry { - // Used for raw access - u32 raw; - - // LUT value, encoded as 12-bit fixed point, with 12 fraction bits - BitField<0, 12, u32> value; // 0.0.12 fixed point - - // Used for efficient interpolation. - BitField<12, 11, u32> difference; // 0.0.11 fixed point - BitField<23, 1, u32> neg_difference; - - float ToFloat() const { - return static_cast(value) / 4095.f; - } - - float DiffToFloat() const { - float diff = static_cast(difference) / 2047.f; - return neg_difference ? -diff : diff; - } - - template - void serialize(Archive& ar, const unsigned int file_version) { - ar& raw; - } - }; - - std::array, 24> luts; - } lighting; - - struct { - union LutEntry { - // Used for raw access - u32 raw; - - BitField<0, 13, s32> difference; // 1.1.11 fixed point - BitField<13, 11, u32> value; // 0.0.11 fixed point - - float ToFloat() const { - return static_cast(value) / 2047.0f; - } - - float DiffToFloat() const { - return static_cast(difference) / 2047.0f; - } - }; - - UnionArray lut; - } fog; - - /// Current Pica command list - struct { - PAddr addr; // This exists only for serialization - const u32* head_ptr; - const u32* current_ptr; - u32 length; - } cmd_list; - - /// Struct used to describe immediate mode rendering state - struct ImmediateModeState { - // Used to buffer partial vertices for immediate-mode rendering. - Shader::AttributeBuffer input_vertex; - // Index of the next attribute to be loaded into `input_vertex`. - u32 current_attribute = 0; - // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure - bool reset_geometry_pipeline = true; - - private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int file_version) { - ar& input_vertex; - ar& current_attribute; - ar& reset_geometry_pipeline; - } - - } immediate; - - // the geometry shader needs to be kept in the global state because some shaders relie on - // preserved register value across shader invocation. - // TODO: also bring the three vertex shader units here and implement the shader scheduler. - Shader::GSUnitState gs_unit; - - GeometryPipeline geometry_pipeline; - - // This is constructed with a dummy triangle topology - PrimitiveAssembler primitive_assembler; - - int vs_float_regs_counter = 0; - std::array vs_uniform_write_buffer{}; - - int gs_float_regs_counter = 0; - std::array gs_uniform_write_buffer{}; - - int default_attr_counter = 0; - std::array default_attr_write_buffer{}; - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int file_version) { - ar& regs.reg_array; - ar& vs; - ar& gs; - ar& input_default_attributes; - ar& proctex; - ar& lighting.luts; - ar& fog.lut; - ar& cmd_list.addr; - ar& cmd_list.length; - ar& immediate; - ar& gs_unit; - ar& geometry_pipeline; - ar& primitive_assembler; - ar& vs_float_regs_counter; - ar& boost::serialization::make_array(vs_uniform_write_buffer.data(), - vs_uniform_write_buffer.size()); - ar& gs_float_regs_counter; - ar& boost::serialization::make_array(gs_uniform_write_buffer.data(), - gs_uniform_write_buffer.size()); - ar& default_attr_counter; - ar& boost::serialization::make_array(default_attr_write_buffer.data(), - default_attr_write_buffer.size()); - boost::serialization::split_member(ar, *this, file_version); - } - - template - void save(Archive& ar, const unsigned int file_version) const { - ar << static_cast(cmd_list.current_ptr - cmd_list.head_ptr); - } - - template - void load(Archive& ar, const unsigned int file_version) { - u32 offset{}; - ar >> offset; - cmd_list.head_ptr = - reinterpret_cast(VideoCore::g_memory->GetPhysicalPointer(cmd_list.addr)); - cmd_list.current_ptr = cmd_list.head_ptr + offset; - } -}; - -extern State g_state; ///< Current Pica state - -} // namespace Pica diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp deleted file mode 100644 index 60b0af059..000000000 --- a/src/video_core/primitive_assembly.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/primitive_assembly.h" -#include "video_core/regs_pipeline.h" -#include "video_core/shader/shader.h" - -namespace Pica { - -template -PrimitiveAssembler::PrimitiveAssembler(PipelineRegs::TriangleTopology topology) - : topology(topology) {} - -template -void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, - const TriangleHandler& triangle_handler) { - switch (topology) { - case PipelineRegs::TriangleTopology::List: - case PipelineRegs::TriangleTopology::Shader: - if (buffer_index < 2) { - buffer[buffer_index++] = vtx; - } else { - buffer_index = 0; - if (topology == PipelineRegs::TriangleTopology::Shader && winding) { - triangle_handler(buffer[1], buffer[0], vtx); - winding = false; - } else { - triangle_handler(buffer[0], buffer[1], vtx); - } - } - break; - - case PipelineRegs::TriangleTopology::Strip: - case PipelineRegs::TriangleTopology::Fan: - if (strip_ready) - triangle_handler(buffer[0], buffer[1], vtx); - - buffer[buffer_index] = vtx; - - strip_ready |= (buffer_index == 1); - - if (topology == PipelineRegs::TriangleTopology::Strip) - buffer_index = !buffer_index; - else if (topology == PipelineRegs::TriangleTopology::Fan) - buffer_index = 1; - break; - - default: - LOG_ERROR(HW_GPU, "Unknown triangle topology {:x}:", (int)topology); - break; - } -} - -template -void PrimitiveAssembler::SetWinding() { - winding = true; -} - -template -void PrimitiveAssembler::Reset() { - buffer_index = 0; - strip_ready = false; - winding = false; -} - -template -void PrimitiveAssembler::Reconfigure(PipelineRegs::TriangleTopology topology) { - Reset(); - this->topology = topology; -} - -template -bool PrimitiveAssembler::IsEmpty() const { - return buffer_index == 0 && strip_ready == false; -} - -template -PipelineRegs::TriangleTopology PrimitiveAssembler::GetTopology() const { - return topology; -} - -// explicitly instantiate use cases -template struct PrimitiveAssembler; - -} // namespace Pica diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index b6e8bb4fa..62047deba 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -2,10 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/alignment.h" #include "core/memory.h" -#include "video_core/pica_state.h" +#include "video_core/pica/pica_core.h" #include "video_core/rasterizer_accelerated.h" namespace VideoCore { @@ -22,7 +21,7 @@ static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) { return Common::Vec3u{color.r, color.g, color.b} / 255.0f; } -RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v, +RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); @@ -52,8 +51,8 @@ RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::Output } } -RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_) - : memory{memory_}, regs{Pica::g_state.regs} { +RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_, Pica::PicaCore& pica_) + : memory{memory_}, pica{pica_}, regs{pica.regs.internal} { fs_uniform_block_data.lighting_lut_dirty.fill(true); } @@ -82,9 +81,8 @@ static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { return (Common::Dot(a, b) < 0.f); } -void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { +void RasterizerAccelerated::AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, + const Pica::OutputVertex& v2) { vertex_batch.emplace_back(v0, false); vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); @@ -146,7 +144,7 @@ void RasterizerAccelerated::SyncEntireState() { } SyncGlobalAmbient(); - for (unsigned light_index = 0; light_index < 8; light_index++) { + for (u32 light_index = 0; light_index < 8; light_index++) { SyncLightSpecular0(light_index); SyncLightSpecular1(light_index); SyncLightDiffuse(light_index); @@ -162,7 +160,7 @@ void RasterizerAccelerated::SyncEntireState() { SyncShadowBias(); SyncShadowTextureBias(); - for (unsigned tex_index = 0; tex_index < 3; tex_index++) { + for (u32 tex_index = 0; tex_index < 3; tex_index++) { SyncTextureLodBias(tex_index); } } diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index bf14c9728..b82961bbf 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -6,7 +6,6 @@ #include "common/vector_math.h" #include "video_core/rasterizer_interface.h" -#include "video_core/regs_texturing.h" #include "video_core/shader/generator/pica_fs_config.h" #include "video_core/shader/generator/shader_uniforms.h" @@ -15,19 +14,21 @@ class MemorySystem; } namespace Pica { -struct Regs; +class PicaCore; } namespace VideoCore { class RasterizerAccelerated : public RasterizerInterface { public: - RasterizerAccelerated(Memory::MemorySystem& memory); + explicit RasterizerAccelerated(Memory::MemorySystem& memory, Pica::PicaCore& pica); virtual ~RasterizerAccelerated() = default; - void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; + void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, + const Pica::OutputVertex& v2) override; + void NotifyPicaRegisterChanged(u32 id) override; + void SyncEntireState() override; protected: @@ -128,7 +129,7 @@ protected: /// Structure that the hardware rendered vertices are composed of struct HardwareVertex { HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion); + HardwareVertex(const Pica::OutputVertex& v, bool flip_quaternion); Common::Vec4f position; Common::Vec4f color; @@ -151,7 +152,8 @@ protected: protected: Memory::MemorySystem& memory; - Pica::Regs& regs; + Pica::PicaCore& pica; + Pica::RegsInternal& regs; std::vector vertex_batch; Pica::Shader::UserConfig user_config{}; @@ -159,8 +161,8 @@ protected: VSUniformBlockData vs_uniform_block_data{}; FSUniformBlockData fs_uniform_block_data{}; - std::array, Pica::LightingRegs::NumLightingSampler> - lighting_lut_data{}; + using LightLUT = std::array; + std::array lighting_lut_data{}; std::array fog_lut_data{}; std::array proctex_noise_lut_data{}; std::array proctex_color_map_data{}; diff --git a/src/video_core/rasterizer_cache/framebuffer_base.h b/src/video_core/rasterizer_cache/framebuffer_base.h index afd25cc21..9a1123270 100644 --- a/src/video_core/rasterizer_cache/framebuffer_base.h +++ b/src/video_core/rasterizer_cache/framebuffer_base.h @@ -6,9 +6,9 @@ #include "common/hash.h" #include "common/math_util.h" +#include "video_core/pica/regs_rasterizer.h" #include "video_core/rasterizer_cache/slot_id.h" #include "video_core/rasterizer_cache/surface_params.h" -#include "video_core/regs_rasterizer.h" namespace VideoCore { diff --git a/src/video_core/rasterizer_cache/pixel_format.cpp b/src/video_core/rasterizer_cache/pixel_format.cpp index 0dde49120..f0afb54c6 100644 --- a/src/video_core/rasterizer_cache/pixel_format.cpp +++ b/src/video_core/rasterizer_cache/pixel_format.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "video_core/pica/regs_external.h" #include "video_core/rasterizer_cache/pixel_format.h" namespace VideoCore { @@ -134,17 +135,17 @@ PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format } } -PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { +PixelFormat PixelFormatFromGPUPixelFormat(Pica::PixelFormat format) { switch (format) { - case GPU::Regs::PixelFormat::RGBA8: + case Pica::PixelFormat::RGBA8: return PixelFormat::RGBA8; - case GPU::Regs::PixelFormat::RGB8: + case Pica::PixelFormat::RGB8: return PixelFormat::RGB8; - case GPU::Regs::PixelFormat::RGB565: + case Pica::PixelFormat::RGB565: return PixelFormat::RGB565; - case GPU::Regs::PixelFormat::RGB5A1: + case Pica::PixelFormat::RGB5A1: return PixelFormat::RGB5A1; - case GPU::Regs::PixelFormat::RGBA4: + case Pica::PixelFormat::RGBA4: return PixelFormat::RGBA4; default: return PixelFormat::Invalid; diff --git a/src/video_core/rasterizer_cache/pixel_format.h b/src/video_core/rasterizer_cache/pixel_format.h index 449bde069..06e31ca46 100644 --- a/src/video_core/rasterizer_cache/pixel_format.h +++ b/src/video_core/rasterizer_cache/pixel_format.h @@ -6,9 +6,12 @@ #include #include -#include "core/hw/gpu.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_framebuffer.h" +#include "video_core/pica/regs_texturing.h" + +namespace Pica { +enum class PixelFormat : u32; +} namespace VideoCore { @@ -109,6 +112,6 @@ PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format); -PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format); +PixelFormat PixelFormatFromGPUPixelFormat(Pica::PixelFormat format); } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 92f1613a1..85eda2c56 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -14,9 +14,10 @@ #include "common/settings.h" #include "core/memory.h" #include "video_core/custom_textures/custom_tex_manager.h" +#include "video_core/pica/regs_external.h" +#include "video_core/pica/regs_internal.h" #include "video_core/rasterizer_cache/rasterizer_cache_base.h" #include "video_core/rasterizer_cache/surface_base.h" -#include "video_core/regs.h" #include "video_core/renderer_base.h" #include "video_core/texture/texture_decode.h" @@ -34,7 +35,7 @@ constexpr auto RangeFromInterval(const auto& map, const auto& interval) { template RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, CustomTexManager& custom_tex_manager_, Runtime& runtime_, - Pica::Regs& regs_, RendererBase& renderer_) + Pica::RegsInternal& regs_, RendererBase& renderer_) : memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_}, renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()}, filter{Settings::values.texture_filter.GetValue()}, @@ -151,7 +152,7 @@ void RasterizerCache::RemoveTextureCubeFace(SurfaceId surface_id) { } template -bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerCache::AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) { const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f}, "RasterizerCache::AccelerateTextureCopy ({})", config.DebugName()}; @@ -249,7 +250,7 @@ bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferC } template -bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerCache::AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) { const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f}, "RasterizerCache::AccelerateDisplayTransfer ({})", config.DebugName()}; @@ -274,10 +275,9 @@ bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTrans // Using flip_vertically alongside crop_input_lines produces skewed output on hardware. // We have to emulate this because some games rely on this behaviour to render correctly. - if (config.flip_vertically && config.crop_input_lines && - config.input_width > config.output_width) { + if (config.flip_vertically && config.crop_input_lines) { dst_params.addr += (config.input_width - config.output_width) * (config.output_height - 1) * - GPU::Regs::BytesPerPixel(config.output_format); + Pica::BytesPerPixel(config.output_format); } auto [src_surface_id, src_rect] = GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); @@ -320,7 +320,7 @@ bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTrans } template -bool RasterizerCache::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { +bool RasterizerCache::AccelerateFill(const Pica::MemoryFillConfig& config) { const DebugScope scope{runtime, Common::Vec4f{1.f, 0.f, 1.f, 1.f}, "RasterizerCache::AccelerateFill ({})", config.DebugName()}; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index c8553e270..8d401370b 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -12,6 +12,7 @@ #include #include #include + #include "video_core/rasterizer_cache/framebuffer_base.h" #include "video_core/rasterizer_cache/sampler_params.h" #include "video_core/rasterizer_cache/surface_params.h" @@ -22,8 +23,10 @@ class MemorySystem; } namespace Pica { -struct Regs; -} +struct RegsInternal; +struct DisplayTransferConfig; +struct MemoryFillConfig; +} // namespace Pica namespace Pica::Texture { struct TextureInfo; @@ -74,20 +77,20 @@ class RasterizerCache { public: explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, - Runtime& runtime, Pica::Regs& regs, RendererBase& renderer); + Runtime& runtime, Pica::RegsInternal& regs, RendererBase& renderer); ~RasterizerCache(); /// Notify the cache that a new frame has been queued void TickFrame(); /// Perform hardware accelerated texture copy according to the provided configuration - bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config); + bool AccelerateTextureCopy(const Pica::DisplayTransferConfig& config); /// Perform hardware accelerated display transfer according to the provided configuration - bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config); + bool AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config); /// Perform hardware accelerated memory fill according to the provided configuration - bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config); + bool AccelerateFill(const Pica::MemoryFillConfig& config); /// Returns a reference to the surface object assigned to surface_id Surface& GetSurface(SurfaceId surface_id); @@ -212,7 +215,7 @@ private: Memory::MemorySystem& memory; CustomTexManager& custom_tex_manager; Runtime& runtime; - Pica::Regs& regs; + Pica::RegsInternal& regs; RendererBase& renderer; std::unordered_map texture_cube_cache; tsl::robin_pg_map, Common::IdentityHash> page_table; diff --git a/src/video_core/rasterizer_cache/sampler_params.h b/src/video_core/rasterizer_cache/sampler_params.h index fcb47394e..4356ff2cc 100644 --- a/src/video_core/rasterizer_cache/sampler_params.h +++ b/src/video_core/rasterizer_cache/sampler_params.h @@ -6,7 +6,7 @@ #include #include "common/hash.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" namespace VideoCore { diff --git a/src/video_core/rasterizer_cache/texture_cube.h b/src/video_core/rasterizer_cache/texture_cube.h index 204dcb7f8..73501afb2 100644 --- a/src/video_core/rasterizer_cache/texture_cube.h +++ b/src/video_core/rasterizer_cache/texture_cube.h @@ -5,8 +5,8 @@ #pragma once #include "common/hash.h" +#include "video_core/pica/regs_texturing.h" #include "video_core/rasterizer_cache/slot_id.h" -#include "video_core/regs_texturing.h" namespace VideoCore { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5bb55b7bb..a75f10e6f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -7,11 +7,15 @@ #include #include #include "common/common_types.h" -#include "core/hw/gpu.h" -namespace Pica::Shader { +namespace Pica { struct OutputVertex; -} // namespace Pica::Shader +} + +namespace Pica { +struct DisplayTransferConfig; +struct MemoryFillConfig; +} // namespace Pica namespace VideoCore { @@ -29,9 +33,8 @@ public: virtual ~RasterizerInterface() = default; /// Queues the primitive formed by the given vertices for rendering - virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) = 0; + virtual void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, + const Pica::OutputVertex& v2) = 0; /// Draw the current batch of triangles virtual void DrawTriangles() = 0; @@ -56,19 +59,17 @@ public: virtual void ClearAll(bool flush) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 - virtual bool AccelerateDisplayTransfer( - [[maybe_unused]] const GPU::Regs::DisplayTransferConfig& config) { + virtual bool AccelerateDisplayTransfer(const Pica::DisplayTransferConfig&) { return false; } /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 - virtual bool AccelerateTextureCopy( - [[maybe_unused]] const GPU::Regs::DisplayTransferConfig& config) { + virtual bool AccelerateTextureCopy(const Pica::DisplayTransferConfig&) { return false; } /// Attempt to use a faster method to fill a region - virtual bool AccelerateFill([[maybe_unused]] const GPU::Regs::MemoryFillConfig& config) { + virtual bool AccelerateFill(const Pica::MemoryFillConfig&) { return false; } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 112d95436..16b59107a 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -49,10 +49,6 @@ void RendererBase::EndFrame() { system.frame_limiter.DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); system.perf_stats->BeginSystemFrame(); - - if (Pica::g_debug_context && Pica::g_debug_context->recorder) { - Pica::g_debug_context->recorder->FrameFinished(); - } } bool RendererBase::IsScreenshotPending() const { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 280045522..0487905de 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -8,15 +8,12 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" -#include "video_core/pica_state.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_rasterizer.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader/generator/shader_gen.h" #include "video_core/texture/texture_decode.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -76,10 +73,10 @@ GLenum MakeAttributeType(Pica::PipelineRegs::VertexAttributeFormat format) { } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, +RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Driver& driver_) - : VideoCore::RasterizerAccelerated{memory}, driver{driver_}, + : VideoCore::RasterizerAccelerated{memory, pica}, driver{driver_}, shader_manager{renderer.GetRenderWindow(), driver, !driver.IsOpenGLES()}, runtime{driver, renderer}, res_cache{memory, custom_tex_manager, runtime, regs, renderer}, texture_buffer_size{TextureBufferSize()}, vertex_buffer{driver, GL_ARRAY_BUFFER, @@ -259,7 +256,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, if (vertex_attributes.IsDefaultAttribute(i)) { const u32 reg = regs.vs.GetRegisterForAttribute(i); if (!enable_attributes[reg]) { - const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const auto& attr = pica.input_default_attributes[i]; glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), attr.w.ToFloat32()); } @@ -269,7 +266,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, bool RasterizerOpenGL::SetupVertexShader() { MICROPROFILE_SCOPE(OpenGL_VS); - return shader_manager.UseProgrammableVertexShader(regs, Pica::g_state.vs); + return shader_manager.UseProgrammableVertexShader(regs, pica.vs_setup); } bool RasterizerOpenGL::SetupGeometryShader() { @@ -710,19 +707,19 @@ void RasterizerOpenGL::ClearAll(bool flush) { res_cache.ClearAll(flush); } -bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerOpenGL::AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) { return res_cache.AccelerateDisplayTransfer(config); } -bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerOpenGL::AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) { return res_cache.AccelerateTextureCopy(config); } -bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { +bool RasterizerOpenGL::AccelerateFill(const Pica::MemoryFillConfig& config) { return res_cache.AccelerateFill(config); } -bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, +bool RasterizerOpenGL::AccelerateDisplay(const Pica::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { if (framebuffer_addr == 0) { @@ -943,7 +940,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[index]; + const auto& source_lut = pica.lighting.luts[index]; std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; @@ -968,7 +965,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { if (fs_uniform_block_data.fog_lut_dirty || invalidate) { std::array new_data; - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + std::transform(pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(), [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; }); @@ -1007,9 +1004,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap const auto sync_proc_tex_value_lut = - [this, buffer = buffer, offset = offset, invalidate = invalidate, - &bytes_used](const std::array& lut, - std::array& lut_data, GLint& lut_offset) { + [this, buffer = buffer, offset = offset, invalidate = invalidate, &bytes_used]( + const auto& lut, std::array& lut_data, GLint& lut_offset) { std::array new_data; std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; @@ -1027,21 +1023,21 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { // Sync the proctex noise lut if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { - sync_proc_tex_value_lut(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + sync_proc_tex_value_lut(pica.proctex.noise_table, proctex_noise_lut_data, fs_uniform_block_data.data.proctex_noise_lut_offset); fs_uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { - sync_proc_tex_value_lut(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + sync_proc_tex_value_lut(pica.proctex.color_map_table, proctex_color_map_data, fs_uniform_block_data.data.proctex_color_map_offset); fs_uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { - sync_proc_tex_value_lut(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + sync_proc_tex_value_lut(pica.proctex.alpha_map_table, proctex_alpha_map_data, fs_uniform_block_data.data.proctex_alpha_map_offset); fs_uniform_block_data.proctex_alpha_map_dirty = false; } @@ -1050,9 +1046,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { std::array new_data; - std::transform(Pica::g_state.proctex.color_table.begin(), - Pica::g_state.proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { + std::transform(pica.proctex.color_table.begin(), pica.proctex.color_table.end(), + new_data.begin(), [](const auto& entry) { auto rgba = entry.ToVector() / 255.0f; return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; }); @@ -1073,9 +1068,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { std::array new_data; - std::transform(Pica::g_state.proctex.color_diff_table.begin(), - Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), - [](const auto& entry) { + std::transform(pica.proctex.color_diff_table.begin(), pica.proctex.color_diff_table.end(), + new_data.begin(), [](const auto& entry) { auto rgba = entry.ToVector() / 255.0f; return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; }); @@ -1134,7 +1128,7 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { if (sync_vs_pica) { VSPicaUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSPicaData, uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b102d8089..3fe5e8dde 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -4,10 +4,8 @@ #pragma once -#include "core/hw/gpu.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" -#include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -21,6 +19,12 @@ namespace VideoCore { class CustomTexManager; } +namespace Pica { +struct DisplayTransferConfig; +struct MemoryFillConfig; +struct FramebufferConfig; +} // namespace Pica + namespace OpenGL { struct ScreenInfo; @@ -30,7 +34,7 @@ class ShaderProgramManager; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Memory::MemorySystem& memory, + explicit RasterizerOpenGL(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Driver& driver); ~RasterizerOpenGL() override; @@ -45,10 +49,10 @@ public: void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; void ClearAll(bool flush) override; - bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; - bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + bool AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) override; + bool AccelerateFill(const Pica::MemoryFillConfig& config) override; + bool AccelerateDisplay(const Pica::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info); bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9f2ed2bab..065eeedbe 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -83,7 +83,7 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { } // Just for future proofing, save the sizes of the array to the file - const std::size_t reg_array_len = Pica::Regs::NUM_REGS; + const std::size_t reg_array_len = Pica::RegsInternal::NUM_REGS; if (file.WriteObject(static_cast(reg_array_len)) != 1) { return false; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 8c285b032..4720b6ab7 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -4,23 +4,16 @@ #pragma once -#include -#include -#include #include #include -#include #include -#include #include - #include -#include "common/assert.h" #include "common/common_types.h" #include "common/file_util.h" -#include "video_core/regs.h" -#include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/pica/regs_internal.h" +#include "video_core/shader/generator/shader_gen.h" namespace Core { class System; @@ -35,7 +28,7 @@ namespace OpenGL { struct ShaderDiskCacheDecompiled; struct ShaderDiskCacheDump; -using RawShaderConfig = Pica::Regs; +using RawShaderConfig = Pica::RegsInternal; using ProgramCode = std::vector; using ProgramType = Pica::Shader::Generator::ProgramType; using ShaderDecompiledMap = std::unordered_map; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index b2361731e..d9747ebfd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -3,29 +3,33 @@ // Refer to the license.txt file included. #include +#include #include #include #include #include #include +#include "common/settings.h" #include "core/frontend/emu_window.h" +#include "video_core/pica/shader_setup.h" #include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/shader/generator/glsl_fs_shader_gen.h" +#include "video_core/shader/generator/glsl_shader_gen.h" #include "video_core/shader/generator/profile.h" -#include "video_core/video_core.h" using namespace Pica::Shader::Generator; using Pica::Shader::FSConfig; namespace OpenGL { -static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) { +static u64 GetUniqueIdentifier(const Pica::RegsInternal& regs, const ProgramCode& code) { std::size_t hash = 0; - u64 regs_uid = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32)); + u64 regs_uid = + Common::ComputeHash64(regs.reg_array.data(), Pica::RegsInternal::NUM_REGS * sizeof(u32)); hash = Common::HashCombine(hash, regs_uid); if (code.size() > 0) { @@ -77,15 +81,14 @@ static std::set GetSupportedFormats() { return supported_formats; } -static std::tuple BuildVSConfigFromRaw( +static std::tuple BuildVSConfigFromRaw( const ShaderDiskCacheRaw& raw, const Driver& driver) { - Pica::Shader::ProgramCode program_code{}; - Pica::Shader::SwizzleData swizzle_data{}; - std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH, - program_code.begin()); - std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH, - Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); - Pica::Shader::ShaderSetup setup; + Pica::ProgramCode program_code{}; + Pica::SwizzleData swizzle_data{}; + std::copy_n(raw.GetProgramCode().begin(), Pica::MAX_PROGRAM_CODE_LENGTH, program_code.begin()); + std::copy_n(raw.GetProgramCode().begin() + Pica::MAX_PROGRAM_CODE_LENGTH, + Pica::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); + Pica::ShaderSetup setup; setup.program_code = program_code; setup.swizzle_data = swizzle_data; @@ -193,14 +196,13 @@ private: // program buffer from the previous shader, which is hashed into the config, resulting several // different config values from the same shader program. template class ShaderDoubleCache { public: explicit ShaderDoubleCache(bool separable) : separable(separable) {} std::tuple> Get(const KeyConfigType& key, - const Pica::Shader::ShaderSetup& setup) { + const Pica::ShaderSetup& setup) { std::optional result{}; auto map_it = shader_map.find(key); if (map_it == shader_map.end()) { @@ -334,8 +336,8 @@ ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, con ShaderProgramManager::~ShaderProgramManager() = default; -bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, - Pica::Shader::ShaderSetup& setup) { +bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::RegsInternal& regs, + Pica::ShaderSetup& setup) { // Enable the geometry-shader only if we are actually doing per-fragment lighting // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders const bool use_geometry_shader = !regs.lighting.disable; @@ -356,8 +358,9 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, const u64 unique_identifier = GetUniqueIdentifier(regs, program_code); const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, std::move(program_code)}; + const bool sanitize_mul = Settings::values.shaders_accurate_mul.GetValue(); disk_cache.SaveRaw(raw); - disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul); + disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); } return true; } @@ -367,7 +370,7 @@ void ShaderProgramManager::UseTrivialVertexShader() { impl->current.vs_hash = 0; } -void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { +void ShaderProgramManager::UseFixedGeometryShader(const Pica::RegsInternal& regs) { PicaFixedGSConfig gs_config(regs, driver.HasClipCullDistance()); auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config, impl->separable); impl->current.gs = handle; @@ -379,7 +382,7 @@ void ShaderProgramManager::UseTrivialGeometryShader() { impl->current.gs_hash = 0; } -void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, +void ShaderProgramManager::UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user) { const FSConfig fs_config{regs, user, impl->profile}; auto [handle, result] = impl->fragment_shaders.Get(fs_config, impl->profile); @@ -415,8 +418,8 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) { cached_program.Create(false, std::array{impl->current.vs, impl->current.gs, impl->current.fs}); auto& disk_cache = impl->disk_cache; - disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, - VideoCore::g_hw_shader_accurate_mul); + const bool sanitize_mul = Settings::values.shaders_accurate_mul.GetValue(); + disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, sanitize_mul); } state.draw.shader_program = cached_program.handle; } @@ -481,8 +484,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, if (dump != dump_map.end() && decomp != decompiled_map.end()) { // Only load the vertex shader if its sanitize_mul setting matches + const bool sanitize_mul = Settings::values.shaders_accurate_mul.GetValue(); if (raw.GetProgramType() == ProgramType::VS && - decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + decomp->second.sanitize_mul != sanitize_mul) { continue; } @@ -537,7 +541,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, const auto decomp{decompiled_map.find(unique_identifier)}; // Only load the program if its sanitize_mul setting matches - if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + const bool sanitize_mul = Settings::values.shaders_accurate_mul.GetValue(); + if (decomp->second.sanitize_mul != sanitize_mul) { continue; } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 82dee27aa..705772b17 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,13 +12,13 @@ class EmuWindow; } namespace Pica { -struct Regs; -} +struct RegsInternal; +struct ShaderSetup; +} // namespace Pica namespace Pica::Shader { -struct ShaderSetup; union UserConfig; -} // namespace Pica::Shader +} namespace OpenGL { @@ -40,15 +40,15 @@ public: void LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback); - bool UseProgrammableVertexShader(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup); + bool UseProgrammableVertexShader(const Pica::RegsInternal& config, Pica::ShaderSetup& setup); void UseTrivialVertexShader(); - void UseFixedGeometryShader(const Pica::Regs& regs); + void UseFixedGeometryShader(const Pica::RegsInternal& regs); void UseTrivialGeometryShader(); - void UseFragmentShader(const Pica::Regs& config, const Pica::Shader::UserConfig& user); + void UseFragmentShader(const Pica::RegsInternal& config, const Pica::Shader::UserConfig& user); void ApplyTo(OpenGLState& state); diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 94ed61a6e..15a53b771 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -10,9 +10,9 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/telemetry_session.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_lighting.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_framebuffer.h" +#include "video_core/pica/regs_lighting.h" +#include "video_core/pica/regs_texturing.h" namespace PicaToGL { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c58f07a2c..341bfdc72 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -8,15 +8,13 @@ #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/frontend/framebuffer_layout.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" #include "core/memory.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_mailbox.h" #include "video_core/renderer_opengl/post_processing_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader/generator/glsl_shader_gen.h" -#include "video_core/video_core.h" #include "video_core/host_shaders/opengl_present_anaglyph_frag.h" #include "video_core/host_shaders/opengl_present_frag.h" @@ -74,11 +72,12 @@ static std::array MakeOrthographicMatrix(const float width, cons return matrix; } -RendererOpenGL::RendererOpenGL(Core::System& system, Frontend::EmuWindow& window, - Frontend::EmuWindow* secondary_window) - : VideoCore::RendererBase{system, window, secondary_window}, driver{system.TelemetrySession()}, - rasterizer{system.Memory(), system.CustomTexManager(), *this, driver}, frame_dumper{system, - window} { +RendererOpenGL::RendererOpenGL(Core::System& system, Pica::PicaCore& pica_, + Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) + : VideoCore::RendererBase{system, window, secondary_window}, pica{pica_}, + driver{system.TelemetrySession()}, rasterizer{system.Memory(), pica, + system.CustomTexManager(), *this, driver}, + frame_dumper{system, window} { const bool has_debug_tool = driver.HasDebugTool(); window.mailbox = std::make_unique(has_debug_tool); if (secondary_window) { @@ -156,39 +155,24 @@ void RendererOpenGL::RenderScreenshot() { } void RendererOpenGL::PrepareRendertarget() { - for (int i : {0, 1, 2}) { - int fb_id = i == 2 ? 1 : 0; - const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; - - // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 - u32 lcd_color_addr = - (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); - lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; - LCD::Regs::ColorFill color_fill = {0}; - LCD::Read(color_fill.raw, lcd_color_addr); + const auto& framebuffer_config = pica.regs.framebuffer_config; + const auto& regs_lcd = pica.regs_lcd; + for (u32 i = 0; i < 3; i++) { + const u32 fb_id = i == 2 ? 1 : 0; + const auto& framebuffer = framebuffer_config[fb_id]; + auto& texture = screen_infos[i].texture; + const auto color_fill = fb_id == 0 ? regs_lcd.color_fill_top : regs_lcd.color_fill_bottom; if (color_fill.is_enabled) { - LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, - screen_infos[i].texture); - - // Resize the texture in case the framebuffer size has changed - screen_infos[i].texture.width = 1; - screen_infos[i].texture.height = 1; - } else { - if (screen_infos[i].texture.width != (GLsizei)framebuffer.width || - screen_infos[i].texture.height != (GLsizei)framebuffer.height || - screen_infos[i].texture.format != framebuffer.color_format) { - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer); - } - LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); - - // Resize the texture in case the framebuffer size has changed - screen_infos[i].texture.width = framebuffer.width; - screen_infos[i].texture.height = framebuffer.height; + FillScreen(color_fill.AsVector(), texture); + continue; } + + if (texture.width != framebuffer.width || texture.height != framebuffer.height || + texture.format != framebuffer.color_format) { + ConfigureFramebufferTexture(texture, framebuffer); + } + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); } } @@ -245,7 +229,7 @@ void RendererOpenGL::RenderToMailbox(const Layout::FramebufferLayout& layout, /** * Loads framebuffer from emulated memory into the active OpenGL texture. */ -void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, +void RendererOpenGL::LoadFBToScreenInfo(const Pica::FramebufferConfig& framebuffer, ScreenInfo& screen_info, bool right_eye) { if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) @@ -260,7 +244,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), framebuffer.height.Value(), framebuffer.format); - int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + int bpp = Pica::BytesPerPixel(framebuffer.color_format); std::size_t pixel_stride = framebuffer.stride / bpp; // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately @@ -274,11 +258,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = Common::Rectangle(0.f, 0.f, 1.f, 1.f); + screen_info.display_texcoords = Common::Rectangle(0.f, 0.f, 1.f, 1.f); - Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); + rasterizer.FlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); - const u8* framebuffer_data = VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr); + const u8* framebuffer_data = system.Memory().GetPhysicalPointer(framebuffer_addr); state.texture_units[0].texture_2d = screen_info.texture.resource.handle; state.Apply(); @@ -302,23 +286,21 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram } } -/** - * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can - * be 1x1 but will stretch across whatever it's rendered on. - */ -void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, - const TextureInfo& texture) { +void RendererOpenGL::FillScreen(Common::Vec3 color, TextureInfo& texture) { state.texture_units[0].texture_2d = texture.resource.handle; state.Apply(); glActiveTexture(GL_TEXTURE0); - u8 framebuffer_data[3] = {color_r, color_g, color_b}; // Update existing texture - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, color.AsArray()); state.texture_units[0].texture_2d = 0; state.Apply(); + + // Resize the texture in case the framebuffer size has changed + texture.width = 1; + texture.height = 1; } /** @@ -446,8 +428,8 @@ void RendererOpenGL::ReloadShader() { } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, - const GPU::Regs::FramebufferConfig& framebuffer) { - GPU::Regs::PixelFormat format = framebuffer.color_format; + const Pica::FramebufferConfig& framebuffer) { + Pica::PixelFormat format = framebuffer.color_format; GLint internal_format{}; texture.format = format; @@ -455,13 +437,13 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, texture.height = framebuffer.height; switch (format) { - case GPU::Regs::PixelFormat::RGBA8: + case Pica::PixelFormat::RGBA8: internal_format = GL_RGBA; texture.gl_format = GL_RGBA; texture.gl_type = driver.IsOpenGLES() ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8; break; - case GPU::Regs::PixelFormat::RGB8: + case Pica::PixelFormat::RGB8: // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every // specific OpenGL type used in this function using native-endian (that is, little-endian // mostly everywhere) for words or half-words. @@ -473,19 +455,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, texture.gl_type = GL_UNSIGNED_BYTE; break; - case GPU::Regs::PixelFormat::RGB565: + case Pica::PixelFormat::RGB565: internal_format = GL_RGB; texture.gl_format = GL_RGB; texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; break; - case GPU::Regs::PixelFormat::RGB5A1: + case Pica::PixelFormat::RGB5A1: internal_format = GL_RGBA; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1; break; - case GPU::Regs::PixelFormat::RGBA4: + case Pica::PixelFormat::RGBA4: internal_format = GL_RGBA; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 5e005e8dc..a335f516e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -5,7 +5,6 @@ #pragma once #include -#include "core/hw/gpu.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/frame_dumper_opengl.h" #include "video_core/renderer_opengl/gl_driver.h" @@ -26,9 +25,9 @@ namespace OpenGL { /// Structure used for storing information about the textures for each 3DS screen struct TextureInfo { OGLTexture resource; - GLsizei width; - GLsizei height; - GPU::Regs::PixelFormat format; + u32 width; + u32 height; + Pica::PixelFormat format; GLenum gl_format; GLenum gl_type; }; @@ -42,7 +41,7 @@ struct ScreenInfo { class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::System& system, Frontend::EmuWindow& window, + explicit RendererOpenGL(Core::System& system, Pica::PicaCore& pica, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window); ~RendererOpenGL() override; @@ -64,7 +63,7 @@ private: void RenderToMailbox(const Layout::FramebufferLayout& layout, std::unique_ptr& mailbox, bool flipped); void ConfigureFramebufferTexture(TextureInfo& texture, - const GPU::Regs::FramebufferConfig& framebuffer); + const Pica::FramebufferConfig& framebuffer); void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped); void ApplySecondLayerOpacity(); void ResetSecondLayerOpacity(); @@ -79,12 +78,12 @@ private: Layout::DisplayOrientation orientation); // Loads framebuffer from emulated memory into the display information structure - void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, - ScreenInfo& screen_info, bool right_eye); - // Fills active OpenGL texture with the given RGB color. - void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); + void LoadFBToScreenInfo(const Pica::FramebufferConfig& framebuffer, ScreenInfo& screen_info, + bool right_eye); + void FillScreen(Common::Vec3 color, TextureInfo& texture); private: + Pica::PicaCore& pica; Driver driver; RasterizerOpenGL rasterizer; OpenGLState state; diff --git a/src/video_core/renderer_software/renderer_software.cpp b/src/video_core/renderer_software/renderer_software.cpp index 0a8f88950..226de33d5 100644 --- a/src/video_core/renderer_software/renderer_software.cpp +++ b/src/video_core/renderer_software/renderer_software.cpp @@ -4,16 +4,16 @@ #include "common/color.h" #include "core/core.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_software/renderer_software.h" namespace SwRenderer { -RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& window) - : VideoCore::RendererBase{system, window, nullptr}, memory{system.Memory()}, - rasterizer{system.Memory()} {} +RendererSoftware::RendererSoftware(Core::System& system, Pica::PicaCore& pica_, + Frontend::EmuWindow& window) + : VideoCore::RendererBase{system, window, nullptr}, memory{system.Memory()}, pica{pica_}, + rasterizer{memory, pica} {} RendererSoftware::~RendererSoftware() = default; @@ -23,15 +23,11 @@ void RendererSoftware::SwapBuffers() { } void RendererSoftware::PrepareRenderTarget() { + const auto& regs_lcd = pica.regs_lcd; for (u32 i = 0; i < 3; i++) { - const int fb_id = i == 2 ? 1 : 0; - - u32 lcd_color_addr = - (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); - lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; - LCD::Regs::ColorFill color_fill = {0}; - LCD::Read(color_fill.raw, lcd_color_addr); + const u32 fb_id = i == 2 ? 1 : 0; + const auto color_fill = fb_id == 0 ? regs_lcd.color_fill_top : regs_lcd.color_fill_bottom; if (!color_fill.is_enabled) { LoadFBToScreenInfo(i); } @@ -40,12 +36,12 @@ void RendererSoftware::PrepareRenderTarget() { void RendererSoftware::LoadFBToScreenInfo(int i) { const u32 fb_id = i == 2 ? 1 : 0; - const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; + const auto& framebuffer = pica.regs.framebuffer_config[fb_id]; auto& info = screen_infos[i]; const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; - const s32 bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + const s32 bpp = Pica::BytesPerPixel(framebuffer.color_format); const u8* framebuffer_data = memory.GetPhysicalPointer(framebuffer_addr); const s32 pixel_stride = framebuffer.stride / bpp; @@ -58,15 +54,15 @@ void RendererSoftware::LoadFBToScreenInfo(int i) { const u8* pixel = framebuffer_data + (y * pixel_stride + pixel_stride - x) * bpp; const Common::Vec4 color = [&] { switch (framebuffer.color_format) { - case GPU::Regs::PixelFormat::RGBA8: + case Pica::PixelFormat::RGBA8: return Common::Color::DecodeRGBA8(pixel); - case GPU::Regs::PixelFormat::RGB8: + case Pica::PixelFormat::RGB8: return Common::Color::DecodeRGB8(pixel); - case GPU::Regs::PixelFormat::RGB565: + case Pica::PixelFormat::RGB565: return Common::Color::DecodeRGB565(pixel); - case GPU::Regs::PixelFormat::RGB5A1: + case Pica::PixelFormat::RGB5A1: return Common::Color::DecodeRGB5A1(pixel); - case GPU::Regs::PixelFormat::RGBA4: + case Pica::PixelFormat::RGBA4: return Common::Color::DecodeRGBA4(pixel); } UNREACHABLE(); diff --git a/src/video_core/renderer_software/renderer_software.h b/src/video_core/renderer_software/renderer_software.h index 2df5550c4..4f1d9d370 100644 --- a/src/video_core/renderer_software/renderer_software.h +++ b/src/video_core/renderer_software/renderer_software.h @@ -21,7 +21,8 @@ struct ScreenInfo { class RendererSoftware : public VideoCore::RendererBase { public: - explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window); + explicit RendererSoftware(Core::System& system, Pica::PicaCore& pica, + Frontend::EmuWindow& window); ~RendererSoftware() override; [[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override { @@ -42,6 +43,7 @@ private: private: Memory::MemorySystem& memory; + Pica::PicaCore& pica; RasterizerSoftware rasterizer; std::array screen_infos{}; }; diff --git a/src/video_core/renderer_software/sw_blitter.cpp b/src/video_core/renderer_software/sw_blitter.cpp new file mode 100644 index 000000000..9a8361a34 --- /dev/null +++ b/src/video_core/renderer_software/sw_blitter.cpp @@ -0,0 +1,346 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/color.h" +#include "common/vector_math.h" +#include "core/memory.h" +#include "video_core/pica/regs_external.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_software/sw_blitter.h" +#include "video_core/utils.h" + +namespace SwRenderer { + +static Common::Vec4 DecodePixel(Pica::PixelFormat input_format, const u8* src_pixel) { + switch (input_format) { + case Pica::PixelFormat::RGBA8: + return Common::Color::DecodeRGBA8(src_pixel); + case Pica::PixelFormat::RGB8: + return Common::Color::DecodeRGB8(src_pixel); + case Pica::PixelFormat::RGB565: + return Common::Color::DecodeRGB565(src_pixel); + case Pica::PixelFormat::RGB5A1: + return Common::Color::DecodeRGB5A1(src_pixel); + case Pica::PixelFormat::RGBA4: + return Common::Color::DecodeRGBA4(src_pixel); + default: + LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", input_format); + return {0, 0, 0, 0}; + } +} + +SwBlitter::SwBlitter(Memory::MemorySystem& memory_, VideoCore::RasterizerInterface* rasterizer_) + : memory{memory_}, rasterizer{rasterizer_} {} + +SwBlitter::~SwBlitter() = default; + +void SwBlitter::TextureCopy(const Pica::DisplayTransferConfig& config) { + const PAddr src_addr = config.GetPhysicalInputAddress(); + const PAddr dst_addr = config.GetPhysicalOutputAddress(); + + // TODO: do hwtest with invalid addresses + if (!memory.IsValidPhysicalAddress(src_addr)) { + LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); + return; + } + + if (!memory.IsValidPhysicalAddress(dst_addr)) { + LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); + return; + } + + u8* src_pointer = memory.GetPhysicalPointer(src_addr); + u8* dst_pointer = memory.GetPhysicalPointer(dst_addr); + + u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16); + if (remaining_size == 0) { + LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this."); + return; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 output_gap = config.texture_copy.output_gap * 16; + + // Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width + // is assigned with the total size if gap = 0. + u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16; + u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16; + + if (input_width == 0) { + LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this."); + return; + } + + if (output_width == 0) { + LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this."); + return; + } + + const size_t contiguous_input_size = + config.texture_copy.size / input_width * (input_width + input_gap); + rasterizer->FlushRegion(config.GetPhysicalInputAddress(), + static_cast(contiguous_input_size)); + + const size_t contiguous_output_size = + config.texture_copy.size / output_width * (output_width + output_gap); + + // Only need to flush output if it has a gap + if (output_gap != 0) { + rasterizer->FlushAndInvalidateRegion(dst_addr, static_cast(contiguous_output_size)); + } else { + rasterizer->InvalidateRegion(dst_addr, static_cast(contiguous_output_size)); + } + + u32 remaining_input = input_width; + u32 remaining_output = output_width; + while (remaining_size > 0) { + u32 copy_size = std::min({remaining_input, remaining_output, remaining_size}); + + std::memcpy(dst_pointer, src_pointer, copy_size); + src_pointer += copy_size; + dst_pointer += copy_size; + + remaining_input -= copy_size; + remaining_output -= copy_size; + remaining_size -= copy_size; + + if (remaining_input == 0) { + remaining_input = input_width; + src_pointer += input_gap; + } + if (remaining_output == 0) { + remaining_output = output_width; + dst_pointer += output_gap; + } + } +} + +void SwBlitter::DisplayTransfer(const Pica::DisplayTransferConfig& config) { + const PAddr src_addr = config.GetPhysicalInputAddress(); + PAddr dst_addr = config.GetPhysicalOutputAddress(); + + // TODO: do hwtest with these cases + if (!memory.IsValidPhysicalAddress(src_addr)) { + LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); + return; + } + + if (!memory.IsValidPhysicalAddress(dst_addr)) { + LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); + return; + } + + if (config.input_width == 0) { + LOG_CRITICAL(HW_GPU, "zero input width"); + return; + } + + if (config.input_height == 0) { + LOG_CRITICAL(HW_GPU, "zero input height"); + return; + } + + if (config.output_width == 0) { + LOG_CRITICAL(HW_GPU, "zero output width"); + return; + } + + if (config.output_height == 0) { + LOG_CRITICAL(HW_GPU, "zero output height"); + return; + } + + // Using flip_vertically alongside crop_input_lines produces skewed output on hardware. + // We have to emulate this because some games rely on this behaviour to render correctly. + if (config.flip_vertically && config.crop_input_lines) { + dst_addr += (config.input_width - config.output_width) * (config.output_height - 1) * + BytesPerPixel(config.output_format); + } + + u8* src_pointer = memory.GetPhysicalPointer(src_addr); + u8* dst_pointer = memory.GetPhysicalPointer(dst_addr); + + if (config.scaling > config.ScaleXY) { + LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}", + config.scaling.Value()); + UNIMPLEMENTED(); + return; + } + + if (config.input_linear && config.scaling != config.NoScale) { + LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); + UNIMPLEMENTED(); + return; + } + + const u32 horizontal_scale = config.scaling != config.NoScale ? 1 : 0; + const u32 vertical_scale = config.scaling == config.ScaleXY ? 1 : 0; + + const u32 output_width = config.output_width >> horizontal_scale; + const u32 output_height = config.output_height >> vertical_scale; + + const u32 input_size = + config.input_width * config.input_height * BytesPerPixel(config.input_format); + const u32 output_size = output_width * output_height * BytesPerPixel(config.output_format); + + rasterizer->FlushRegion(config.GetPhysicalInputAddress(), input_size); + rasterizer->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + + for (u32 y = 0; y < output_height; ++y) { + for (u32 x = 0; x < output_width; ++x) { + Common::Vec4 src_color; + + // Calculate the [x,y] position of the input image + // based on the current output position and the scale + const u32 input_x = x << horizontal_scale; + const u32 input_y = y << vertical_scale; + + u32 output_y; + if (config.flip_vertically) { + // Flip the y value of the output data, + // we do this after calculating the [x,y] position of the input image + // to account for the scaling options. + output_y = output_height - y - 1; + } else { + output_y = y; + } + + const u32 dst_bytes_per_pixel = BytesPerPixel(config.output_format); + const u32 src_bytes_per_pixel = BytesPerPixel(config.input_format); + u32 src_offset; + u32 dst_offset; + + if (config.input_linear) { + if (!config.dont_swizzle) { + // Interpret the input as linear and the output as tiled + u32 coarse_y = output_y & ~7; + u32 stride = output_width * dst_bytes_per_pixel; + + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + + coarse_y * stride; + } else { + // Both input and output are linear + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; + } + } else { + if (!config.dont_swizzle) { + // Interpret the input as tiled and the output as linear + const u32 coarse_y = input_y & ~7; + const u32 stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + + coarse_y * stride; + dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; + } else { + // Both input and output are tiled + const u32 out_coarse_y = output_y & ~7; + const u32 out_stride = output_width * dst_bytes_per_pixel; + + const u32 in_coarse_y = input_y & ~7; + const u32 in_stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + + in_coarse_y * in_stride; + dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + + out_coarse_y * out_stride; + } + } + + const u8* src_pixel = src_pointer + src_offset; + src_color = DecodePixel(config.input_format, src_pixel); + if (config.scaling == config.ScaleX) { + const auto pixel = + DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); + src_color = ((src_color + pixel) / 2).Cast(); + } else if (config.scaling == config.ScaleXY) { + const auto pixel1 = + DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); + const auto pixel2 = + DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); + const auto pixel3 = + DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); + src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast(); + } + + u8* dst_pixel = dst_pointer + dst_offset; + switch (config.output_format) { + case Pica::PixelFormat::RGBA8: + Common::Color::EncodeRGBA8(src_color, dst_pixel); + break; + case Pica::PixelFormat::RGB8: + Common::Color::EncodeRGB8(src_color, dst_pixel); + break; + case Pica::PixelFormat::RGB565: + Common::Color::EncodeRGB565(src_color, dst_pixel); + break; + case Pica::PixelFormat::RGB5A1: + Common::Color::EncodeRGB5A1(src_color, dst_pixel); + break; + case Pica::PixelFormat::RGBA4: + Common::Color::EncodeRGBA4(src_color, dst_pixel); + break; + default: + LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}", + static_cast(config.output_format.Value())); + break; + } + } + } +} + +void SwBlitter::MemoryFill(const Pica::MemoryFillConfig& config) { + const PAddr start_addr = config.GetStartAddress(); + const PAddr end_addr = config.GetEndAddress(); + + // TODO: do hwtest with these cases + if (!memory.IsValidPhysicalAddress(start_addr)) { + LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr); + return; + } + + if (!memory.IsValidPhysicalAddress(end_addr)) { + LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr); + return; + } + + if (end_addr <= start_addr) { + LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr, + end_addr); + return; + } + + u8* start = memory.GetPhysicalPointer(start_addr); + u8* end = memory.GetPhysicalPointer(end_addr); + + rasterizer->InvalidateRegion(start_addr, end_addr - start_addr); + + if (config.fill_24bit) { + // Fill with 24-bit values + for (u8* ptr = start; ptr < end; ptr += 3) { + ptr[0] = config.value_24bit_r; + ptr[1] = config.value_24bit_g; + ptr[2] = config.value_24bit_b; + } + } else if (config.fill_32bit) { + // Fill with 32-bit values + if (end > start) { + const u32 value = config.value_32bit; + const size_t len = (end - start) / sizeof(u32); + for (std::size_t i = 0; i < len; ++i) { + std::memcpy(&start[i * sizeof(u32)], &value, sizeof(u32)); + } + } + } else { + // Fill with 16-bit values + const u16 value_16bit = config.value_16bit.Value(); + for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) { + std::memcpy(ptr, &value_16bit, sizeof(u16)); + } + } +} + +} // namespace SwRenderer diff --git a/src/video_core/renderer_software/sw_blitter.h b/src/video_core/renderer_software/sw_blitter.h new file mode 100644 index 000000000..ca855c64b --- /dev/null +++ b/src/video_core/renderer_software/sw_blitter.h @@ -0,0 +1,38 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Pica { +struct DisplayTransferConfig; +struct MemoryFillConfig; +} // namespace Pica + +namespace Memory { +class MemorySystem; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace SwRenderer { + +class SwBlitter { +public: + explicit SwBlitter(Memory::MemorySystem& memory, VideoCore::RasterizerInterface* rasterizer); + ~SwBlitter(); + + void TextureCopy(const Pica::DisplayTransferConfig& config); + + void DisplayTransfer(const Pica::DisplayTransferConfig& config); + + void MemoryFill(const Pica::MemoryFillConfig& config); + +private: + Memory::MemorySystem& memory; + VideoCore::RasterizerInterface* rasterizer; +}; + +} // namespace SwRenderer diff --git a/src/video_core/renderer_software/sw_clipper.cpp b/src/video_core/renderer_software/sw_clipper.cpp index f8d1192d6..03a287a37 100644 --- a/src/video_core/renderer_software/sw_clipper.cpp +++ b/src/video_core/renderer_software/sw_clipper.cpp @@ -4,7 +4,7 @@ #include #include -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" #include "video_core/renderer_software/sw_clipper.h" namespace SwRenderer { diff --git a/src/video_core/renderer_software/sw_framebuffer.cpp b/src/video_core/renderer_software/sw_framebuffer.cpp index 2fbcc6844..cc8ea49fb 100644 --- a/src/video_core/renderer_software/sw_framebuffer.cpp +++ b/src/video_core/renderer_software/sw_framebuffer.cpp @@ -5,10 +5,10 @@ #include #include "common/color.h" #include "common/logging/log.h" -#include "core/hw/gpu.h" #include "core/memory.h" +#include "video_core/pica/regs_external.h" +#include "video_core/pica/regs_framebuffer.h" #include "video_core/pica_types.h" -#include "video_core/regs_framebuffer.h" #include "video_core/renderer_software/sw_framebuffer.h" #include "video_core/utils.h" @@ -63,7 +63,7 @@ void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4& color) const { const u32 coarse_y = y & ~7; const u32 bytes_per_pixel = - GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value())); const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; u8* dst_pixel = color_buffer + dst_offset; @@ -97,7 +97,7 @@ const Common::Vec4 Framebuffer::GetPixel(u32 x, u32 y) const { const u32 coarse_y = y & ~7; const u32 bytes_per_pixel = - GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value())); const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; const u8* src_pixel = color_buffer + src_offset; diff --git a/src/video_core/renderer_software/sw_framebuffer.h b/src/video_core/renderer_software/sw_framebuffer.h index db5c8d259..55d7276cc 100644 --- a/src/video_core/renderer_software/sw_framebuffer.h +++ b/src/video_core/renderer_software/sw_framebuffer.h @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "common/vector_math.h" -#include "video_core/regs_framebuffer.h" +#include "video_core/pica/regs_framebuffer.h" namespace Memory { class MemorySystem; diff --git a/src/video_core/renderer_software/sw_lighting.cpp b/src/video_core/renderer_software/sw_lighting.cpp index 5dd8334eb..5ddf4d617 100644 --- a/src/video_core/renderer_software/sw_lighting.cpp +++ b/src/video_core/renderer_software/sw_lighting.cpp @@ -10,7 +10,7 @@ namespace SwRenderer { using Pica::f16; using Pica::LightingRegs; -static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_t lut_index, +static float LookupLightingLut(const Pica::PicaCore::Lighting& lighting, std::size_t lut_index, u8 index, float delta) { ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); @@ -24,7 +24,7 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_ } std::pair, Common::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Pica::LightingRegs& lighting, const Pica::PicaCore::Lighting& lighting_state, const Common::Quaternion& normquat, const Common::Vec3f& view, std::span, 4> texture_color) { diff --git a/src/video_core/renderer_software/sw_lighting.h b/src/video_core/renderer_software/sw_lighting.h index 98a8d3235..90b75c21e 100644 --- a/src/video_core/renderer_software/sw_lighting.h +++ b/src/video_core/renderer_software/sw_lighting.h @@ -9,12 +9,12 @@ #include "common/quaternion.h" #include "common/vector_math.h" -#include "video_core/pica_state.h" +#include "video_core/pica/pica_core.h" namespace SwRenderer { std::pair, Common::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Pica::LightingRegs& lighting, const Pica::PicaCore::Lighting& lighting_state, const Common::Quaternion& normquat, const Common::Vec3f& view, std::span, 4> texture_color); diff --git a/src/video_core/renderer_software/sw_proctex.cpp b/src/video_core/renderer_software/sw_proctex.cpp index 855208e57..862fc3036 100644 --- a/src/video_core/renderer_software/sw_proctex.cpp +++ b/src/video_core/renderer_software/sw_proctex.cpp @@ -15,7 +15,7 @@ using ProcTexCombiner = Pica::TexturingRegs::ProcTexCombiner; using ProcTexFilter = Pica::TexturingRegs::ProcTexFilter; using Pica::f16; -float LookupLUT(const std::array& lut, float coord) { +float LookupLUT(const std::array& lut, float coord) { // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. @@ -47,7 +47,7 @@ float NoiseRand2D(unsigned int x, unsigned int y) { } float NoiseCoef(float u, float v, const Pica::TexturingRegs& regs, - const Pica::State::ProcTex& state) { + const Pica::PicaCore::ProcTex& state) { const float freq_u = f16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); const float freq_v = f16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); const float phase_u = f16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); @@ -115,7 +115,7 @@ void ClampCoord(float& coord, ProcTexClamp mode) { } float CombineAndMap(float u, float v, ProcTexCombiner combiner, - const std::array& map_table) { + const std::array& map_table) { float f; switch (combiner) { case ProcTexCombiner::U: @@ -158,7 +158,7 @@ float CombineAndMap(float u, float v, ProcTexCombiner combiner, } // Anonymous namespace Common::Vec4 ProcTex(float u, float v, const Pica::TexturingRegs& regs, - const Pica::State::ProcTex& state) { + const Pica::PicaCore::ProcTex& state) { u = std::abs(u); v = std::abs(v); diff --git a/src/video_core/renderer_software/sw_proctex.h b/src/video_core/renderer_software/sw_proctex.h index 69836397f..49a97ea5d 100644 --- a/src/video_core/renderer_software/sw_proctex.h +++ b/src/video_core/renderer_software/sw_proctex.h @@ -6,12 +6,12 @@ #include "common/common_types.h" #include "common/vector_math.h" -#include "video_core/pica_state.h" +#include "video_core/pica/pica_core.h" namespace SwRenderer { /// Generates procedural texture color for the given coordinates Common::Vec4 ProcTex(float u, float v, const Pica::TexturingRegs& regs, - const Pica::State::ProcTex& state); + const Pica::PicaCore::ProcTex& state); } // namespace SwRenderer diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp index 21a562aee..f0f290ebe 100644 --- a/src/video_core/renderer_software/sw_rasterizer.cpp +++ b/src/video_core/renderer_software/sw_rasterizer.cpp @@ -8,14 +8,13 @@ #include "common/quaternion.h" #include "common/vector_math.h" #include "core/memory.h" -#include "video_core/pica_state.h" -#include "video_core/pica_types.h" +#include "video_core/pica/output_vertex.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_software/sw_framebuffer.h" #include "video_core/renderer_software/sw_lighting.h" #include "video_core/renderer_software/sw_proctex.h" #include "video_core/renderer_software/sw_rasterizer.h" #include "video_core/renderer_software/sw_texturing.h" -#include "video_core/shader/shader.h" #include "video_core/texture/texture_decode.h" namespace SwRenderer { @@ -33,7 +32,7 @@ using Pica::Texture::TextureInfo; // we can use a very small epsilon value for clip plane comparison. constexpr f32 EPSILON_Z = 0.00000001f; -struct Vertex : Pica::Shader::OutputVertex { +struct Vertex : Pica::OutputVertex { Vertex(const OutputVertex& v) : OutputVertex(v) {} /// Attributes used to store intermediate results position after perspective divide. @@ -101,14 +100,13 @@ private: } // Anonymous namespace -RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_) - : memory{memory_}, state{Pica::g_state}, regs{state.regs}, +RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_, Pica::PicaCore& pica_) + : memory{memory_}, pica{pica_}, regs{pica.regs.internal}, num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)}, sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {} -void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { +void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, + const Pica::OutputVertex& v2) { /** * Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at * the new edge (or less in degenerate cases). As such, we can say that each clipping plane @@ -170,8 +168,8 @@ void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, } } - if (state.regs.rasterizer.clip_enable) { - const ClippingEdge custom_edge{state.regs.rasterizer.GetClipCoef()}; + if (regs.rasterizer.clip_enable) { + const ClippingEdge custom_edge{regs.rasterizer.GetClipCoef()}; clip(custom_edge); if (output_list->size() < 3) { return; @@ -434,7 +432,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), }; std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view, + ComputeFragmentsColors(regs.lighting, pica.lighting, normquat, view, texture_color); } @@ -587,7 +585,7 @@ std::array, 4> RasterizerSoftware::TextureColor( if (regs.texturing.main_config.texture3_enable) { const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates]; texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(), - regs.texturing, state.proctex); + regs.texturing, pica.proctex); } return texture_color; @@ -813,7 +811,7 @@ void RasterizerSoftware::WriteFog(float depth, Common::Vec4& combiner_output // Generate clamped fog factor from LUT for given fog index const f32 fog_i = std::clamp(floorf(fog_index), 0.0f, 127.0f); const f32 fog_f = fog_index - fog_i; - const auto& fog_lut_entry = state.fog.lut[static_cast(fog_i)]; + const auto& fog_lut_entry = pica.fog.lut[static_cast(fog_i)]; f32 fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f; fog_factor = std::clamp(fog_factor, 0.0f, 1.0f); for (u32 i = 0; i < 3; i++) { diff --git a/src/video_core/renderer_software/sw_rasterizer.h b/src/video_core/renderer_software/sw_rasterizer.h index c2cec9e9d..25ca224d2 100644 --- a/src/video_core/renderer_software/sw_rasterizer.h +++ b/src/video_core/renderer_software/sw_rasterizer.h @@ -6,18 +6,14 @@ #include #include "common/thread_worker.h" +#include "video_core/pica/regs_texturing.h" #include "video_core/rasterizer_interface.h" -#include "video_core/regs_texturing.h" #include "video_core/renderer_software/sw_clipper.h" #include "video_core/renderer_software/sw_framebuffer.h" -namespace Pica::Shader { -struct OutputVertex; -} - namespace Pica { -struct State; -struct Regs; +struct RegsInternal; +class PicaCore; } // namespace Pica namespace SwRenderer { @@ -26,10 +22,10 @@ struct Vertex; class RasterizerSoftware : public VideoCore::RasterizerInterface { public: - explicit RasterizerSoftware(Memory::MemorySystem& memory); + explicit RasterizerSoftware(Memory::MemorySystem& memory, Pica::PicaCore& pica); - void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; + void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, + const Pica::OutputVertex& v2) override; void DrawTriangles() override {} void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} @@ -72,8 +68,8 @@ private: private: Memory::MemorySystem& memory; - Pica::State& state; - const Pica::Regs& regs; + Pica::PicaCore& pica; + Pica::RegsInternal& regs; size_t num_sw_threads; Common::ThreadWorker sw_workers; Framebuffer fb; diff --git a/src/video_core/renderer_software/sw_texturing.cpp b/src/video_core/renderer_software/sw_texturing.cpp index a41e733d2..c0f300093 100644 --- a/src/video_core/renderer_software/sw_texturing.cpp +++ b/src/video_core/renderer_software/sw_texturing.cpp @@ -6,7 +6,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/vector_math.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" #include "video_core/renderer_software/sw_texturing.h" namespace SwRenderer { diff --git a/src/video_core/renderer_software/sw_texturing.h b/src/video_core/renderer_software/sw_texturing.h index cf81736aa..cb60b3ddb 100644 --- a/src/video_core/renderer_software/sw_texturing.h +++ b/src/video_core/renderer_software/sw_texturing.h @@ -8,7 +8,7 @@ #include "common/common_types.h" #include "common/vector_math.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" namespace SwRenderer { diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h index c591df08e..6067c07ae 100644 --- a/src/video_core/renderer_vulkan/pica_to_vk.h +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -4,10 +4,8 @@ #pragma once -#include "common/logging/log.h" -#include "core/core.h" -#include "core/telemetry_session.h" -#include "video_core/regs.h" +#include "common/assert.h" +#include "video_core/pica/regs_internal.h" #include "video_core/renderer_vulkan/vk_common.h" namespace PicaToVK { @@ -56,14 +54,6 @@ inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapM const auto index = static_cast(mode); ASSERT_MSG(index < wrap_mode_table.size(), "Unknown texture wrap mode {}", index); - - if (index > 3) { - Core::System::GetInstance().TelemetrySession().AddField( - Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode", - static_cast(index)); - LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index); - } - return wrap_mode_table[index]; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index c3104524c..74a231bff 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -9,9 +9,8 @@ #include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" -#include "core/hw/gpu.h" -#include "core/hw/hw.h" -#include "core/hw/lcd.h" +#include "video_core/gpu.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_memory_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -51,15 +50,16 @@ constexpr static std::array PRESENT_BINDINGS {0, vk::DescriptorType::eCombinedImageSampler, 3, vk::ShaderStageFlagBits::eFragment}, }}; -RendererVulkan::RendererVulkan(Core::System& system, Frontend::EmuWindow& window, - Frontend::EmuWindow* secondary_window) - : RendererBase{system, window, secondary_window}, memory{system.Memory()}, +RendererVulkan::RendererVulkan(Core::System& system, Pica::PicaCore& pica_, + Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) + : RendererBase{system, window, secondary_window}, memory{system.Memory()}, pica{pica_}, instance{system.TelemetrySession(), window, Settings::values.physical_device.GetValue()}, scheduler{instance}, renderpass_cache{instance, scheduler}, pool{instance}, main_window{window, instance, scheduler}, vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, VERTEX_BUFFER_SIZE}, rasterizer{memory, + pica, system.CustomTexManager(), *this, render_window, @@ -103,37 +103,25 @@ void RendererVulkan::Sync() { } void RendererVulkan::PrepareRendertarget() { + const auto& framebuffer_config = pica.regs.framebuffer_config; + const auto& regs_lcd = pica.regs_lcd; for (u32 i = 0; i < 3; i++) { const u32 fb_id = i == 2 ? 1 : 0; - const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; - - // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 - u32 lcd_color_addr = - (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); - lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; - LCD::Regs::ColorFill color_fill{0}; - LCD::Read(color_fill.raw, lcd_color_addr); + const auto& framebuffer = framebuffer_config[fb_id]; + auto& texture = screen_infos[i].texture; + const auto color_fill = fb_id == 0 ? regs_lcd.color_fill_top : regs_lcd.color_fill_bottom; if (color_fill.is_enabled) { - LoadColorToActiveVkTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, - screen_infos[i].texture); - } else { - TextureInfo& texture = screen_infos[i].texture; - if (texture.width != framebuffer.width || texture.height != framebuffer.height || - texture.format != framebuffer.color_format) { - - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(texture, framebuffer); - } - - LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); - - // Resize the texture in case the framebuffer size has changed - texture.width = framebuffer.width; - texture.height = framebuffer.height; + FillScreen(color_fill.AsVector(), texture); + continue; } + + if (texture.width != framebuffer.width || texture.height != framebuffer.height || + texture.format != framebuffer.color_format) { + ConfigureFramebufferTexture(texture, framebuffer); + } + + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); } } @@ -203,7 +191,7 @@ void RendererVulkan::RenderToWindow(PresentWindow& window, const Layout::Framebu window.Present(frame); } -void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, +void RendererVulkan::LoadFBToScreenInfo(const Pica::FramebufferConfig& framebuffer, ScreenInfo& screen_info, bool right_eye) { if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) { @@ -219,7 +207,7 @@ void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), framebuffer.height.Value(), framebuffer.format); - const int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + const u32 bpp = Pica::BytesPerPixel(framebuffer.color_format); const std::size_t pixel_stride = framebuffer.stride / bpp; ASSERT(pixel_stride * bpp == framebuffer.stride); @@ -405,7 +393,7 @@ void RendererVulkan::BuildPipelines() { } void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, - const GPU::Regs::FramebufferConfig& framebuffer) { + const Pica::FramebufferConfig& framebuffer) { vk::Device device = instance.GetDevice(); if (texture.image_view) { device.destroyImageView(texture.image_view); @@ -466,14 +454,14 @@ void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, texture.format = framebuffer.color_format; } -void RendererVulkan::LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, - const TextureInfo& texture) { +void RendererVulkan::FillScreen(Common::Vec3 color, const TextureInfo& texture) { + return; const vk::ClearColorValue clear_color = { .float32 = std::array{ - color_r / 255.0f, - color_g / 255.0f, - color_b / 255.0f, + color.r() / 255.0f, + color.g() / 255.0f, + color.b() / 255.0f, 1.0f, }, }; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f180a2644..f50db92bb 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -4,12 +4,8 @@ #pragma once -#include -#include -#include #include "common/common_types.h" #include "common/math_util.h" -#include "core/hw/gpu.h" #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -17,7 +13,6 @@ #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_swapchain.h" namespace Core { class System; @@ -27,16 +22,24 @@ namespace Memory { class MemorySystem; } +namespace Pica { +class PicaCore; +} + namespace Layout { struct FramebufferLayout; } +namespace VideoCore { +class GPU; +} + namespace Vulkan { struct TextureInfo { u32 width; u32 height; - GPU::Regs::PixelFormat format; + Pica::PixelFormat format; vk::Image image; vk::ImageView image_view; VmaAllocation allocation; @@ -64,7 +67,7 @@ class RendererVulkan : public VideoCore::RendererBase { static constexpr std::size_t PRESENT_PIPELINES = 3; public: - explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window, + explicit RendererVulkan(Core::System& system, Pica::PicaCore& pica, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window); ~RendererVulkan() override; @@ -86,7 +89,7 @@ private: void BuildLayouts(); void BuildPipelines(); void ConfigureFramebufferTexture(TextureInfo& texture, - const GPU::Regs::FramebufferConfig& framebuffer); + const Pica::FramebufferConfig& framebuffer); void ConfigureRenderPipeline(); void PrepareRendertarget(); void RenderScreenshot(); @@ -105,12 +108,13 @@ private: Layout::DisplayOrientation orientation); void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h, Layout::DisplayOrientation orientation); - void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, - ScreenInfo& screen_info, bool right_eye); - void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); + void LoadFBToScreenInfo(const Pica::FramebufferConfig& framebuffer, ScreenInfo& screen_info, + bool right_eye); + void FillScreen(Common::Vec3 color, const TextureInfo& texture); private: Memory::MemorySystem& memory; + Pica::PicaCore& pica; Instance instance; Scheduler scheduler; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 5d20f0b13..93a4ebd48 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include "common/thread_worker.h" +#include "video_core/pica/regs_pipeline.h" +#include "video_core/pica/regs_rasterizer.h" #include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/regs_pipeline.h" -#include "video_core/regs_rasterizer.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Common { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 88f0a3fca..7b2cca1e6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -6,8 +6,8 @@ #include +#include "video_core/pica/regs_pipeline.h" #include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/regs_pipeline.h" #include "video_core/renderer_vulkan/vk_platform.h" namespace Core { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 4ab4c9cd9..1251536f0 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 62eac33a8..f689925d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -377,8 +377,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { return true; } -bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, - Pica::Shader::ShaderSetup& setup, +bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs, + Pica::ShaderSetup& setup, const VertexLayout& layout) { // Enable the geometry-shader only if we are actually doing per-fragment lighting // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders. @@ -443,7 +443,7 @@ void PipelineCache::UseTrivialVertexShader() { shader_hashes[ProgramType::VS] = 0; } -bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { +bool PipelineCache::UseFixedGeometryShader(const Pica::RegsInternal& regs) { if (!instance.UseGeometryShaders()) { UseTrivialGeometryShader(); return true; @@ -472,7 +472,7 @@ void PipelineCache::UseTrivialGeometryShader() { shader_hashes[ProgramType::GS] = 0; } -void PipelineCache::UseFragmentShader(const Pica::Regs& regs, +void PipelineCache::UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user) { const FSConfig fs_config{regs, user, profile}; const auto [it, new_shader] = fragment_shaders.try_emplace(fs_config, instance); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4ef581493..949bb3250 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,12 +14,9 @@ #include "video_core/shader/generator/shader_gen.h" namespace Pica { -struct Regs; -} - -namespace Pica::Shader { +struct RegsInternal; struct ShaderSetup; -} +} // namespace Pica namespace Vulkan { @@ -54,20 +51,20 @@ public: bool BindPipeline(const PipelineInfo& info, bool wait_built = false); /// Binds a PICA decompiled vertex shader - bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + bool UseProgrammableVertexShader(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, const VertexLayout& layout); /// Binds a passthrough vertex shader void UseTrivialVertexShader(); /// Binds a PICA decompiled geometry shader - bool UseFixedGeometryShader(const Pica::Regs& regs); + bool UseFixedGeometryShader(const Pica::RegsInternal& regs); /// Binds a passthrough geometry shader void UseTrivialGeometryShader(); /// Binds a fragment shader generated from PICA state - void UseFragmentShader(const Pica::Regs& regs, const Pica::Shader::UserConfig& user); + void UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user); /// Binds a texture to the specified binding void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 401c8010d..1d03f0acc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -8,10 +8,8 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/settings.h" -#include "video_core/pica_state.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_pipeline.h" -#include "video_core/regs_rasterizer.h" +#include "core/memory.h" +#include "video_core/pica/pica_core.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -56,13 +54,13 @@ struct DrawParams { } // Anonymous namespace -RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, +RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, RenderpassCache& renderpass_cache, u32 image_count) - : RasterizerAccelerated{memory}, instance{instance}, scheduler{scheduler}, + : RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache, pool}, runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(), @@ -278,7 +276,7 @@ void RasterizerVulkan::SetupFixedAttribs() { if (vertex_attributes.IsDefaultAttribute(i)) { const u32 reg = regs.vs.GetRegisterForAttribute(i); if (!enable_attributes[reg]) { - const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const auto& attr = pica.input_default_attributes[i]; const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), attr.w.ToFloat32()}; @@ -323,7 +321,7 @@ void RasterizerVulkan::SetupFixedAttribs() { bool RasterizerVulkan::SetupVertexShader() { MICROPROFILE_SCOPE(Vulkan_VS); - return pipeline_cache.UseProgrammableVertexShader(regs, Pica::g_state.vs, + return pipeline_cache.UseProgrammableVertexShader(regs, pica.vs_setup, pipeline_info.vertex_layout); } @@ -741,19 +739,19 @@ void RasterizerVulkan::ClearAll(bool flush) { res_cache.ClearAll(flush); } -bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerVulkan::AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) { return res_cache.AccelerateDisplayTransfer(config); } -bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerVulkan::AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) { return res_cache.AccelerateTextureCopy(config); } -bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { +bool RasterizerVulkan::AccelerateFill(const Pica::MemoryFillConfig& config) { return res_cache.AccelerateFill(config); } -bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, +bool RasterizerVulkan::AccelerateDisplay(const Pica::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { if (framebuffer_addr == 0) [[unlikely]] { @@ -935,7 +933,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[index]; + const auto& source_lut = pica.lighting.luts[index]; std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; @@ -960,7 +958,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { if (fs_uniform_block_data.fog_lut_dirty || invalidate) { std::array new_data; - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + std::transform(pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(), [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; }); @@ -981,7 +979,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { } void RasterizerVulkan::SyncAndUploadLUTs() { - const auto& proctex = Pica::g_state.proctex; + const auto& proctex = pica.proctex; constexpr std::size_t max_size = sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha sizeof(Common::Vec4f) * 256 + // proctex @@ -1000,7 +998,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() { // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap auto sync_proctex_value_lut = [this, buffer = buffer, offset = offset, invalidate = invalidate, - &bytes_used](const std::array& lut, + &bytes_used](const std::array& lut, std::array& lut_data, int& lut_offset) { std::array new_data; std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { @@ -1120,7 +1118,7 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { if (sync_vs_pica) { VSPicaUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); pipeline_cache.SetBufferOffset(0, offset + used_bytes); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cd7620c49..5cd795ecf 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -4,7 +4,6 @@ #pragma once -#include "core/hw/gpu.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" @@ -20,6 +19,12 @@ class CustomTexManager; class RendererBase; } // namespace VideoCore +namespace Pica { +struct DisplayTransferConfig; +struct MemoryFillConfig; +struct FramebufferConfig; +} // namespace Pica + namespace Vulkan { struct ScreenInfo; @@ -31,7 +36,7 @@ class DescriptorPool; class RasterizerVulkan : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Memory::MemorySystem& memory, + explicit RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, @@ -48,10 +53,10 @@ public: void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; void ClearAll(bool flush) override; - bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; - bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + bool AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) override; + bool AccelerateFill(const Pica::MemoryFillConfig& config) override; + bool AccelerateDisplay(const Pica::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info); bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index efb97da06..c85035abc 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -914,7 +914,7 @@ void FragmentModule::WriteLogicOp() { } void FragmentModule::WriteBlending() { - if (!config.EmulateBlend()) [[likely]] { + if (!config.EmulateBlend() || profile.is_vulkan) [[likely]] { return; } @@ -1258,7 +1258,7 @@ void FragmentModule::DefineExtensions() { use_fragment_shader_barycentric = false; } } - if (config.EmulateBlend()) { + if (config.EmulateBlend() && !profile.is_vulkan) { if (profile.has_gl_ext_framebuffer_fetch) { out += "#extension GL_EXT_shader_framebuffer_fetch : enable\n"; out += "#define destFactor color\n"; diff --git a/src/video_core/shader/generator/glsl_shader_decompiler.cpp b/src/video_core/shader/generator/glsl_shader_decompiler.cpp index 4c8dffd64..017e41a43 100644 --- a/src/video_core/shader/generator/glsl_shader_decompiler.cpp +++ b/src/video_core/shader/generator/glsl_shader_decompiler.cpp @@ -23,7 +23,7 @@ using nihstro::RegisterType; using nihstro::SourceRegister; using nihstro::SwizzlePattern; -constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH; +constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; class DecompileFail : public std::runtime_error { public: @@ -58,7 +58,7 @@ struct Subroutine { /// Analyzes shader code and produces a set of subroutines. class ControlFlowAnalyzer { public: - ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset) + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) : program_code(program_code) { // Recursively finds all subroutines. @@ -72,7 +72,7 @@ public: } private: - const Pica::Shader::ProgramCode& program_code; + const ProgramCode& program_code; std::set subroutines; std::map, ExitMethod> exit_method_map; @@ -265,9 +265,8 @@ constexpr auto GetSelectorSrc3 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc3 class GLSLGenerator { public: - GLSLGenerator(const std::set& subroutines, - const Pica::Shader::ProgramCode& program_code, - const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, + GLSLGenerator(const std::set& subroutines, const ProgramCode& program_code, + const SwizzleData& swizzle_data, u32 main_offset, const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, bool sanitize_mul) : subroutines(subroutines), program_code(program_code), swizzle_data(swizzle_data), @@ -921,8 +920,8 @@ private: private: const std::set& subroutines; - const Pica::Shader::ProgramCode& program_code; - const Pica::Shader::SwizzleData& swizzle_data; + const ProgramCode& program_code; + const SwizzleData& swizzle_data; const u32 main_offset; const RegGetter& inputreg_getter; const RegGetter& outputreg_getter; @@ -931,10 +930,9 @@ private: ShaderWriter shader; }; -std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code, - const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, - const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, - bool sanitize_mul) { +std::string DecompileProgram(const ProgramCode& program_code, const SwizzleData& swizzle_data, + u32 main_offset, const RegGetter& inputreg_getter, + const RegGetter& outputreg_getter, bool sanitize_mul) { try { auto subroutines = ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines(); diff --git a/src/video_core/shader/generator/glsl_shader_decompiler.h b/src/video_core/shader/generator/glsl_shader_decompiler.h index 933bc4df8..2eb84c7cf 100644 --- a/src/video_core/shader/generator/glsl_shader_decompiler.h +++ b/src/video_core/shader/generator/glsl_shader_decompiler.h @@ -6,15 +6,14 @@ #include #include -#include "common/common_types.h" -#include "video_core/shader/shader.h" +#include "video_core/pica/shader_setup.h" namespace Pica::Shader::Generator::GLSL { using RegGetter = std::function; -std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code, - const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, +std::string DecompileProgram(const Pica::ProgramCode& program_code, + const Pica::SwizzleData& swizzle_data, u32 main_offset, const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, bool sanitize_mul); diff --git a/src/video_core/shader/generator/glsl_shader_gen.cpp b/src/video_core/shader/generator/glsl_shader_gen.cpp index a936851be..1f1a5208b 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_shader_gen.cpp @@ -5,9 +5,10 @@ #include #include -#include "common/logging/log.h" +#include "video_core/pica/regs_rasterizer.h" #include "video_core/shader/generator/glsl_shader_decompiler.h" #include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/shader/generator/shader_gen.h" using VSOutputAttributes = Pica::RasterizerRegs::VSOutputAttributes; @@ -141,7 +142,7 @@ std::string_view MakeLoadPrefix(AttribLoadFlags flag) { return ""; } -std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, +std::string GenerateVertexShader(const ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { std::string out; if (separable_shader) { diff --git a/src/video_core/shader/generator/glsl_shader_gen.h b/src/video_core/shader/generator/glsl_shader_gen.h index 46e91c64c..20d826f90 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.h +++ b/src/video_core/shader/generator/glsl_shader_gen.h @@ -4,9 +4,6 @@ #pragma once -#include "video_core/shader/generator/shader_gen.h" -#include "video_core/shader/shader.h" - // High precision may or may not be supported in GLES3. If it isn't, use medium precision instead. static constexpr char fragment_shader_precision_OES[] = R"( #if GL_ES @@ -24,6 +21,15 @@ precision mediump uimage2D; #endif )"; +namespace Pica { +struct ShaderSetup; +} + +namespace Pica::Shader::Generator { +struct PicaVSConfig; +struct PicaFixedGSConfig; +} // namespace Pica::Shader::Generator + namespace Pica::Shader::Generator::GLSL { /** @@ -37,7 +43,7 @@ std::string GenerateTrivialVertexShader(bool use_clip_planes, bool separable_sha * Generates the GLSL vertex shader program source code for the given VS program * @returns String of the shader source code; empty on failure */ -std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, +std::string GenerateVertexShader(const Pica::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader); /** diff --git a/src/video_core/shader/generator/pica_fs_config.cpp b/src/video_core/shader/generator/pica_fs_config.cpp index 3cdeb46f6..34dee8712 100644 --- a/src/video_core/shader/generator/pica_fs_config.cpp +++ b/src/video_core/shader/generator/pica_fs_config.cpp @@ -6,7 +6,7 @@ namespace Pica::Shader { -FramebufferConfig::FramebufferConfig(const Pica::Regs& regs, const Profile& profile) { +FramebufferConfig::FramebufferConfig(const Pica::RegsInternal& regs, const Profile& profile) { const auto& output_merger = regs.framebuffer.output_merger; scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode); depthmap_enable.Assign(regs.rasterizer.depthmap_enable); @@ -186,7 +186,7 @@ ProcTexConfig::ProcTexConfig(const Pica::TexturingRegs& regs) { lut_filter.Assign(regs.proctex_lut.filter); } -FSConfig::FSConfig(const Pica::Regs& regs, const UserConfig& user_, const Profile& profile) +FSConfig::FSConfig(const Pica::RegsInternal& regs, const UserConfig& user_, const Profile& profile) : framebuffer{regs, profile}, texture{regs.texturing, profile}, lighting{regs.lighting}, proctex{regs.texturing}, user{user_} {} diff --git a/src/video_core/shader/generator/pica_fs_config.h b/src/video_core/shader/generator/pica_fs_config.h index 6b18b735f..a4ebfa459 100644 --- a/src/video_core/shader/generator/pica_fs_config.h +++ b/src/video_core/shader/generator/pica_fs_config.h @@ -5,7 +5,7 @@ #pragma once #include "common/hash.h" -#include "video_core/regs.h" +#include "video_core/pica/regs_internal.h" #include "video_core/shader/generator/profile.h" namespace Pica::Shader { @@ -17,7 +17,7 @@ struct BlendConfig { }; struct FramebufferConfig { - explicit FramebufferConfig(const Pica::Regs& regs, const Profile& profile); + explicit FramebufferConfig(const Pica::RegsInternal& regs, const Profile& profile); union { u32 raw{}; @@ -158,7 +158,8 @@ union UserConfig { static_assert(std::has_unique_object_representations_v); struct FSConfig { - explicit FSConfig(const Pica::Regs& regs, const UserConfig& user, const Profile& profile); + explicit FSConfig(const Pica::RegsInternal& regs, const UserConfig& user, + const Profile& profile); [[nodiscard]] bool TevStageUpdatesCombinerBufferColor(u32 stage_index) const { return (stage_index < 4) && (texture.combiner_buffer_input & (1 << stage_index)); diff --git a/src/video_core/shader/generator/shader_gen.cpp b/src/video_core/shader/generator/shader_gen.cpp index 8654a6e3c..70be1bd40 100644 --- a/src/video_core/shader/generator/shader_gen.cpp +++ b/src/video_core/shader/generator/shader_gen.cpp @@ -4,12 +4,14 @@ #include "common/bit_set.h" #include "common/logging/log.h" +#include "common/settings.h" +#include "video_core/pica/regs_internal.h" +#include "video_core/pica/shader_setup.h" #include "video_core/shader/generator/shader_gen.h" -#include "video_core/video_core.h" namespace Pica::Shader::Generator { -void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) { +void PicaGSConfigState::Init(const Pica::RegsInternal& regs, bool use_clip_planes_) { use_clip_planes = use_clip_planes_; vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); @@ -34,7 +36,7 @@ void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) { } } -void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, +void PicaVSConfigState::Init(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, bool use_clip_planes_, bool use_geometry_shader_) { use_clip_planes = use_clip_planes_; use_geometry_shader = use_geometry_shader_; @@ -42,13 +44,13 @@ void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& program_hash = setup.GetProgramCodeHash(); swizzle_hash = setup.GetSwizzleDataHash(); main_offset = regs.vs.main_offset; - sanitize_mul = VideoCore::g_hw_shader_accurate_mul; + sanitize_mul = Settings::values.shaders_accurate_mul.GetValue(); num_outputs = 0; load_flags.fill(AttribLoadFlags::Float); output_map.fill(16); - for (int reg : Common::BitSet(regs.vs.output_mask)) { + for (u32 reg : Common::BitSet(regs.vs.output_mask)) { output_map[reg] = num_outputs++; } @@ -57,12 +59,12 @@ void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& } } -PicaVSConfig::PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, +PicaVSConfig::PicaVSConfig(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, bool use_clip_planes_, bool use_geometry_shader_) { state.Init(regs, setup, use_clip_planes_, use_geometry_shader_); } -PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_) { +PicaFixedGSConfig::PicaFixedGSConfig(const Pica::RegsInternal& regs, bool use_clip_planes_) { state.Init(regs, use_clip_planes_); } diff --git a/src/video_core/shader/generator/shader_gen.h b/src/video_core/shader/generator/shader_gen.h index c6db249bf..02a9ca33c 100644 --- a/src/video_core/shader/generator/shader_gen.h +++ b/src/video_core/shader/generator/shader_gen.h @@ -5,8 +5,11 @@ #pragma once #include "common/hash.h" -#include "video_core/regs.h" -#include "video_core/shader/shader.h" + +namespace Pica { +struct RegsInternal; +struct ShaderSetup; +} // namespace Pica namespace Pica::Shader::Generator { @@ -41,7 +44,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AttribLoadFlags) * PICA geometry shader. */ struct PicaGSConfigState { - void Init(const Pica::Regs& regs, bool use_clip_planes_); + void Init(const Pica::RegsInternal& regs, bool use_clip_planes_); bool use_clip_planes; @@ -62,7 +65,7 @@ struct PicaGSConfigState { * PICA vertex shader. */ struct PicaVSConfigState { - void Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, bool use_clip_planes_, + void Init(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, bool use_clip_planes_, bool use_geometry_shader_); bool use_clip_planes; @@ -88,7 +91,7 @@ struct PicaVSConfigState { * shader. */ struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + explicit PicaVSConfig(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, bool use_clip_planes_, bool use_geometry_shader_); }; @@ -97,7 +100,7 @@ struct PicaVSConfig : Common::HashableStruct { * shader pipeline */ struct PicaFixedGSConfig : Common::HashableStruct { - explicit PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_); + explicit PicaFixedGSConfig(const Pica::RegsInternal& regs, bool use_clip_planes_); }; } // namespace Pica::Shader::Generator diff --git a/src/video_core/shader/generator/shader_uniforms.cpp b/src/video_core/shader/generator/shader_uniforms.cpp index 2a3e6beee..f5e471015 100644 --- a/src/video_core/shader/generator/shader_uniforms.cpp +++ b/src/video_core/shader/generator/shader_uniforms.cpp @@ -3,13 +3,13 @@ // Refer to the license.txt file included. #include +#include "video_core/pica/regs_shader.h" +#include "video_core/pica/shader_setup.h" #include "video_core/shader/generator/shader_uniforms.h" -#include "video_core/shader/shader.h" namespace Pica::Shader::Generator { -void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, - const Pica::Shader::ShaderSetup& setup) { +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::ShaderSetup& setup) { std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), diff --git a/src/video_core/shader/generator/shader_uniforms.h b/src/video_core/shader/generator/shader_uniforms.h index b37083687..e5cc9ba0a 100644 --- a/src/video_core/shader/generator/shader_uniforms.h +++ b/src/video_core/shader/generator/shader_uniforms.h @@ -5,15 +5,12 @@ #pragma once #include "common/vector_math.h" -#include "video_core/regs_lighting.h" +#include "video_core/pica/regs_lighting.h" namespace Pica { struct ShaderRegs; -} - -namespace Pica::Shader { struct ShaderSetup; -} +} // namespace Pica namespace Pica::Shader::Generator { @@ -24,8 +21,8 @@ struct LightSrc { alignas(16) Common::Vec3f ambient; alignas(16) Common::Vec3f position; alignas(16) Common::Vec3f spot_direction; // negated - float dist_atten_bias; - float dist_atten_scale; + f32 dist_atten_bias; + f32 dist_atten_scale; }; /** diff --git a/src/video_core/shader/generator/spv_fs_shader_gen.cpp b/src/video_core/shader/generator/spv_fs_shader_gen.cpp index cbc812208..1703ce8dd 100644 --- a/src/video_core/shader/generator/spv_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/spv_fs_shader_gen.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include "video_core/shader/generator/pica_fs_config.h" #include "video_core/shader/generator/spv_fs_shader_gen.h" namespace Pica::Shader::Generator::SPIRV { diff --git a/src/video_core/shader/generator/spv_fs_shader_gen.h b/src/video_core/shader/generator/spv_fs_shader_gen.h index f37d9a409..875ac33bf 100644 --- a/src/video_core/shader/generator/spv_fs_shader_gen.h +++ b/src/video_core/shader/generator/spv_fs_shader_gen.h @@ -7,7 +7,13 @@ #include #include -#include "video_core/shader/generator/pica_fs_config.h" +#include "video_core/pica/regs_framebuffer.h" +#include "video_core/pica/regs_texturing.h" + +namespace Pica::Shader { +struct FSConfig; +struct Profile; +} // namespace Pica::Shader namespace Pica::Shader::Generator::SPIRV { diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 27bd8c8c3..180ca7755 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -2,166 +2,23 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include #include "common/arch.h" -#include "common/assert.h" -#include "common/bit_set.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_shader.h" -#include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) #include "video_core/shader/shader_jit.h" -#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) -#include "video_core/video_core.h" +#endif +#include "video_core/shader/shader.h" -namespace Pica::Shader { - -void OutputVertex::ValidateSemantics(const RasterizerRegs& regs) { - u32 num_attributes = regs.vs_output_total; - ASSERT(num_attributes <= 7); - for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) { - u32 output_register_map = regs.vs_output_attributes[attrib].raw; - for (std::size_t comp = 0; comp < 4; ++comp) { - u32 semantic = (output_register_map >> (8 * comp)) & 0x1F; - ASSERT_MSG(semantic < 24 || semantic == RasterizerRegs::VSOutputAttributes::INVALID, - "Invalid/unknown semantic id: {}", semantic); - } - } -} - -OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, - const AttributeBuffer& input) { - // Setup output data - union { - OutputVertex ret{}; - // Allow us to overflow OutputVertex to avoid branches, since - // RasterizerRegs::VSOutputAttributes::INVALID would write to slot 31, which - // would be out of bounds otherwise. - std::array vertex_slots_overflow; - }; - - // Some games use attributes without setting them in GPUREG_SH_OUTMAP_Oi - // Hardware tests have shown that they are initialized to 1.f in this case. - vertex_slots_overflow.fill(f24::One()); - - // Assert that OutputVertex has enough space for 24 semantic registers - static_assert(sizeof(std::array) == sizeof(ret), - "Struct and array have different sizes."); - - u32 num_attributes = regs.vs_output_total & 7; - for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) { - const auto output_register_map = regs.vs_output_attributes[attrib]; - vertex_slots_overflow[output_register_map.map_x] = input.attr[attrib][0]; - vertex_slots_overflow[output_register_map.map_y] = input.attr[attrib][1]; - vertex_slots_overflow[output_register_map.map_z] = input.attr[attrib][2]; - vertex_slots_overflow[output_register_map.map_w] = input.attr[attrib][3]; - } - - // The hardware takes the absolute and saturates vertex colors like this, *before* doing - // interpolation - for (u32 i = 0; i < 4; ++i) { - float c = std::fabs(ret.color[i].ToFloat32()); - ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f); - } - - LOG_TRACE(HW_GPU, - "Output vertex: pos({:.2}, {:.2}, {:.2}, {:.2}), quat({:.2}, {:.2}, {:.2}, {:.2}), " - "col({:.2}, {:.2}, {:.2}, {:.2}), tc0({:.2}, {:.2}), view({:.2}, {:.2}, {:.2})", - ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), - ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), - ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), - ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(), - ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); - - return ret; -} - -void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) { - const u32 max_attribute = config.max_input_attribute_index; - - for (u32 attr = 0; attr <= max_attribute; ++attr) { - u32 reg = config.GetRegisterForAttribute(attr); - registers.input[reg] = input.attr[attr]; - } -} - -static void CopyRegistersToOutput(std::span, 16> regs, u32 mask, - AttributeBuffer& buffer) { - int output_i = 0; - for (int reg : Common::BitSet(mask)) { - buffer.attr[output_i++] = regs[reg]; - } -} - -void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { - CopyRegistersToOutput(registers.output, config.output_mask, output); -} - -UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} - -GSEmitter::GSEmitter() { - handlers = new Handlers; -} - -GSEmitter::~GSEmitter() { - delete handlers; -} - -void GSEmitter::Emit(std::span, 16> output_regs) { - ASSERT(vertex_id < 3); - // TODO: This should be merged with UnitState::WriteOutput somehow - CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]); - - if (prim_emit) { - if (winding) - handlers->winding_setter(); - for (std::size_t i = 0; i < buffer.size(); ++i) { - handlers->vertex_handler(buffer[i]); - } - } -} - -GSUnitState::GSUnitState() : UnitState(&emitter) {} - -void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { - emitter.handlers->vertex_handler = std::move(vertex_handler); - emitter.handlers->winding_setter = std::move(winding_setter); -} - -void GSUnitState::ConfigOutput(const ShaderRegs& config) { - emitter.output_mask = config.output_mask; -} - -MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); +namespace Pica { +std::unique_ptr CreateEngine(bool use_jit) { #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) -static std::unique_ptr jit_engine; -#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) -static InterpreterEngine interpreter_engine; - -ShaderEngine* GetEngine() { -#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) - // TODO(yuriks): Re-initialize on each change rather than being persistent - if (VideoCore::g_shader_jit_enabled) { - if (jit_engine == nullptr) { - jit_engine = std::make_unique(); - } - return jit_engine.get(); + if (use_jit) { + return std::make_unique(); } -#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) +#endif - return &interpreter_engine; + return std::make_unique(); } -void Shutdown() { -#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) - jit_engine.reset(); -#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) -} - -} // namespace Pica::Shader +} // namespace Pica diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1b564de59..a8dd4c1c6 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -4,301 +4,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "common/common_funcs.h" #include "common/common_types.h" -#include "common/hash.h" -#include "common/vector_math.h" -#include "video_core/pica_types.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_shader.h" -namespace Pica::Shader { +namespace Pica { -constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096; -constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096; -using ProgramCode = std::array; -using SwizzleData = std::array; - -struct AttributeBuffer { - alignas(16) Common::Vec4 attr[16]; - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& attr; - } -}; - -/// Handler type for receiving vertex outputs from vertex shader or geometry shader -using VertexHandler = std::function; - -/// Handler type for signaling to invert the vertex order of the next triangle -using WindingSetter = std::function; - -struct OutputVertex { - Common::Vec4 pos; - Common::Vec4 quat; - Common::Vec4 color; - Common::Vec2 tc0; - Common::Vec2 tc1; - f24 tc0_w; - INSERT_PADDING_WORDS(1); - Common::Vec3 view; - INSERT_PADDING_WORDS(1); - Common::Vec2 tc2; - - static void ValidateSemantics(const RasterizerRegs& regs); - static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, - const AttributeBuffer& output); - -private: - template - void serialize(Archive& ar, const u32) { - ar& pos; - ar& quat; - ar& color; - ar& tc0; - ar& tc1; - ar& tc0_w; - ar& view; - ar& tc2; - } - friend class boost::serialization::access; -}; -#define ASSERT_POS(var, pos) \ - static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong " \ - "offset.") -ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X); -ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X); -ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R); -ASSERT_POS(tc0, RasterizerRegs::VSOutputAttributes::TEXCOORD0_U); -ASSERT_POS(tc1, RasterizerRegs::VSOutputAttributes::TEXCOORD1_U); -ASSERT_POS(tc0_w, RasterizerRegs::VSOutputAttributes::TEXCOORD0_W); -ASSERT_POS(view, RasterizerRegs::VSOutputAttributes::VIEW_X); -ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U); -#undef ASSERT_POS -static_assert(std::is_trivial_v, "Structure is not POD"); -static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); - -/** - * This structure contains state information for primitive emitting in geometry shader. - */ -struct GSEmitter { - std::array buffer; - u8 vertex_id; - bool prim_emit; - bool winding; - u32 output_mask; - - // Function objects are hidden behind a raw pointer to make the structure standard layout type, - // for JIT to use offsetof to access other members. - struct Handlers { - VertexHandler vertex_handler; - WindingSetter winding_setter; - }* handlers; - - GSEmitter(); - ~GSEmitter(); - void Emit(std::span, 16> output_regs); - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& buffer; - ar& vertex_id; - ar& prim_emit; - ar& winding; - ar& output_mask; - // Handlers are ignored because they're constant - } -}; -static_assert(std::is_standard_layout::value, "GSEmitter is not standard layout type"); - -/** - * This structure contains the state information that needs to be unique for a shader unit. The 3DS - * has four shader units that process shaders in parallel. At the present, Citra only implements a - * single shader unit that processes all shaders serially. Putting the state information in a struct - * here will make it easier for us to parallelize the shader processing later. - */ -struct UnitState { - explicit UnitState(GSEmitter* emitter = nullptr); - - // Two Address registers and one loop counter - // TODO: How many bits do these actually have? - s32 address_registers[3]; - - bool conditional_code[2]; - - struct Registers { - // The registers are accessed by the shader JIT using SSE instructions, and are therefore - // required to be 16-byte aligned. - alignas(16) std::array, 16> input; - alignas(16) std::array, 16> temporary; - alignas(16) std::array, 16> output; - - private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& input; - ar& temporary; - ar& output; - } - } registers; - static_assert(std::is_trivial_v, "Structure is not POD"); - - GSEmitter* emitter_ptr; - - static std::size_t InputOffset(s32 register_index) { - return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4); - } - - static std::size_t OutputOffset(s32 register_index) { - return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4); - } - - static std::size_t TemporaryOffset(s32 register_index) { - return offsetof(UnitState, registers.temporary) + - register_index * sizeof(Common::Vec4); - } - - /** - * Loads the unit state with an input vertex. - * - * @param config Shader configuration registers corresponding to the unit. - * @param input Attribute buffer to load into the input registers. - */ - void LoadInput(const ShaderRegs& config, const AttributeBuffer& input); - - void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& registers; - ar& conditional_code; - ar& address_registers; - // emitter_ptr is only set by GSUnitState and is serialized there - } -}; - -/** - * This is an extended shader unit state that represents the special unit that can run both vertex - * shader and geometry shader. It contains an additional primitive emitter and utilities for - * geometry shader. - */ -struct GSUnitState : public UnitState { - GSUnitState(); - void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); - void ConfigOutput(const ShaderRegs& config); - - GSEmitter emitter; - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& boost::serialization::base_object(*this); - ar& emitter; - } -}; - -struct Uniforms { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) std::array, 96> f; - - std::array b; - std::array, 4> i; - - static std::size_t GetFloatUniformOffset(u32 index) { - return offsetof(Uniforms, f) + index * sizeof(Common::Vec4); - } - - static std::size_t GetBoolUniformOffset(u32 index) { - return offsetof(Uniforms, b) + index * sizeof(bool); - } - - static std::size_t GetIntUniformOffset(u32 index) { - return offsetof(Uniforms, i) + index * sizeof(Common::Vec4); - } - -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& f; - ar& b; - ar& i; - } -}; - -struct ShaderSetup { - Uniforms uniforms; - - ProgramCode program_code; - SwizzleData swizzle_data; - - /// Data private to ShaderEngines - struct EngineData { - u32 entry_point; - /// Used by the JIT, points to a compiled shader object. - const void* cached_shader = nullptr; - } engine_data; - - void MarkProgramCodeDirty() { - program_code_hash_dirty = true; - } - - void MarkSwizzleDataDirty() { - swizzle_data_hash_dirty = true; - } - - u64 GetProgramCodeHash() { - if (program_code_hash_dirty) { - program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); - program_code_hash_dirty = false; - } - return program_code_hash; - } - - u64 GetSwizzleDataHash() { - if (swizzle_data_hash_dirty) { - swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); - swizzle_data_hash_dirty = false; - } - return swizzle_data_hash; - } - -private: - bool program_code_hash_dirty = true; - bool swizzle_data_hash_dirty = true; - u64 program_code_hash = 0xDEADC0DE; - u64 swizzle_data_hash = 0xDEADC0DE; - - friend class boost::serialization::access; - template - void serialize(Archive& ar, const u32 file_version) { - ar& uniforms; - ar& program_code; - ar& swizzle_data; - ar& program_code_hash_dirty; - ar& swizzle_data_hash_dirty; - ar& program_code_hash; - ar& swizzle_data_hash; - } -}; +struct ShaderSetup; +struct ShaderUnit; class ShaderEngine { public: @@ -316,11 +27,9 @@ public: * @param setup Shader engine state, must be setup with SetupBatch on each shader change. * @param state Shader unit state, must be setup with input data before each shader invocation. */ - virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0; + virtual void Run(const ShaderSetup& setup, ShaderUnit& state) const = 0; }; -// TODO(yuriks): Remove and make it non-global state somewhere -ShaderEngine* GetEngine(); -void Shutdown(); +std::unique_ptr CreateEngine(bool use_jit); -} // namespace Pica::Shader +} // namespace Pica diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 324e1f07a..04d9446f1 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include #include @@ -14,9 +13,9 @@ #include "common/logging/log.h" #include "common/microprofile.h" #include "common/vector_math.h" -#include "video_core/pica_state.h" +#include "video_core/pica/shader_setup.h" +#include "video_core/pica/shader_unit.h" #include "video_core/pica_types.h" -#include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" using nihstro::Instruction; @@ -46,8 +45,8 @@ struct LoopStackElement { }; template -static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned entry_point) { +static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state, + DebugData& debug_data, unsigned entry_point) { boost::circular_buffer if_stack(8); boost::circular_buffer call_stack(4); boost::circular_buffer loop_stack(4); @@ -136,10 +135,10 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData int index = source_reg.GetIndex(); switch (source_reg.GetRegisterType()) { case RegisterType::Input: - return &state.registers.input[index].x; + return &state.input[index].x; case RegisterType::Temporary: - return &state.registers.temporary[index].x; + return &state.temporary[index].x; case RegisterType::FloatUniform: if (address_register_index != 0) { @@ -202,9 +201,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } f24* dest = (instr.common.dest.Value() < 0x10) - ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] + ? &state.output[instr.common.dest.Value().GetIndex()][0] : (instr.common.dest.Value() < 0x20) - ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] + ? &state.temporary[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24_zeros; debug_data.max_opdesc_id = @@ -537,9 +536,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } f24* dest = (instr.mad.dest.Value() < 0x10) - ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] + ? &state.output[instr.mad.dest.Value().GetIndex()][0] : (instr.mad.dest.Value() < 0x20) - ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] + ? &state.temporary[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24_zeros; Record(debug_data, iteration, src1); @@ -652,14 +651,14 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } case OpCode::Id::EMIT: { - GSEmitter* emitter = state.emitter_ptr; + auto* emitter = state.emitter_ptr; ASSERT_MSG(emitter, "Execute EMIT on VS"); - emitter->Emit(state.registers.output); + emitter->Emit(state.output); break; } case OpCode::Id::SETEMIT: { - GSEmitter* emitter = state.emitter_ptr; + auto* emitter = state.emitter_ptr; ASSERT_MSG(emitter, "Execute SETEMIT on VS"); emitter->vertex_id = instr.setemit.vertex_id; emitter->prim_emit = instr.setemit.prim_emit != 0; @@ -726,29 +725,29 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); - setup.engine_data.entry_point = entry_point; + setup.entry_point = entry_point; } -MICROPROFILE_DECLARE(GPU_Shader); +MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { +void InterpreterEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const { MICROPROFILE_SCOPE(GPU_Shader); DebugData dummy_debug_data; - RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point); + RunInterpreter(setup, state, dummy_debug_data, setup.entry_point); } DebugData InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, const ShaderRegs& config) const { - UnitState state; + ShaderUnit state; DebugData debug_data; // Setup input register table - state.registers.input.fill(Common::Vec4::AssignToAll(f24::Zero())); + state.input.fill(Common::Vec4::AssignToAll(f24::Zero())); state.LoadInput(config, input); - RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); + RunInterpreter(setup, state, debug_data, setup.entry_point); return debug_data; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 978f1e34b..feef45491 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -4,15 +4,20 @@ #pragma once +#include "video_core/pica/output_vertex.h" #include "video_core/shader/debug_data.h" #include "video_core/shader/shader.h" +namespace Pica { +struct ShaderRegs; +} + namespace Pica::Shader { class InterpreterEngine final : public ShaderEngine { public: - void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override; - void Run(const ShaderSetup& setup, UnitState& state) const override; + void SetupBatch(ShaderSetup& setup, u32 entry_point) override; + void Run(const ShaderSetup& setup, ShaderUnit& state) const override; /** * Produce debug information based on the given shader and input vertex diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp index dfefe37f6..644586226 100644 --- a/src/video_core/shader/shader_jit.cpp +++ b/src/video_core/shader/shader_jit.cpp @@ -6,6 +6,7 @@ #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) #include "common/assert.h" +#include "common/hash.h" #include "common/microprofile.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_jit.h" @@ -23,7 +24,7 @@ JitEngine::~JitEngine() = default; void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); - setup.engine_data.entry_point = entry_point; + setup.entry_point = entry_point; const u64 code_hash = setup.GetProgramCodeHash(); const u64 swizzle_hash = setup.GetSwizzleDataHash(); @@ -31,24 +32,24 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash); auto iter = cache.find(cache_key); if (iter != cache.end()) { - setup.engine_data.cached_shader = iter->second.get(); + setup.cached_shader = iter->second.get(); } else { auto shader = std::make_unique(); shader->Compile(&setup.program_code, &setup.swizzle_data); - setup.engine_data.cached_shader = shader.get(); + setup.cached_shader = shader.get(); cache.emplace_hint(iter, cache_key, std::move(shader)); } } MICROPROFILE_DECLARE(GPU_Shader); -void JitEngine::Run(const ShaderSetup& setup, UnitState& state) const { - ASSERT(setup.engine_data.cached_shader != nullptr); +void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const { + ASSERT(setup.cached_shader != nullptr); MICROPROFILE_SCOPE(GPU_Shader); - const JitShader* shader = static_cast(setup.engine_data.cached_shader); - shader->Run(setup, state, setup.engine_data.entry_point); + const JitShader* shader = static_cast(setup.cached_shader); + shader->Run(setup, state, setup.entry_point); } } // namespace Pica::Shader diff --git a/src/video_core/shader/shader_jit.h b/src/video_core/shader/shader_jit.h index 7f0b4758a..2f3e77b02 100644 --- a/src/video_core/shader/shader_jit.h +++ b/src/video_core/shader/shader_jit.h @@ -22,7 +22,7 @@ public: ~JitEngine() override; void SetupBatch(ShaderSetup& setup, u32 entry_point) override; - void Run(const ShaderSetup& setup, UnitState& state) const override; + void Run(const ShaderSetup& setup, ShaderUnit& state) const override; private: std::unordered_map> cache; diff --git a/src/video_core/shader/shader_jit_a64_compiler.cpp b/src/video_core/shader/shader_jit_a64_compiler.cpp index 2afc30a3d..95027c751 100644 --- a/src/video_core/shader/shader_jit_a64_compiler.cpp +++ b/src/video_core/shader/shader_jit_a64_compiler.cpp @@ -15,9 +15,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/vector_math.h" -#include "video_core/pica_state.h" +#include "video_core/pica/shader_unit.h" #include "video_core/pica_types.h" -#include "video_core/shader/shader.h" #include "video_core/shader/shader_jit_a64_compiler.h" using namespace Common::A64; @@ -174,11 +173,11 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegiste break; case RegisterType::Input: src_ptr = STATE; - src_offset = UnitState::InputOffset(src_reg.GetIndex()); + src_offset = ShaderUnit::InputOffset(src_reg.GetIndex()); break; case RegisterType::Temporary: src_ptr = STATE; - src_offset = UnitState::TemporaryOffset(src_reg.GetIndex()); + src_offset = ShaderUnit::TemporaryOffset(src_reg.GetIndex()); break; default: UNREACHABLE_MSG("Encountered unknown source register type: {}", src_reg.GetRegisterType()); @@ -317,10 +316,10 @@ void JitShader::Compile_DestEnable(Instruction instr, QReg src) { std::size_t dest_offset_disp; switch (dest.GetRegisterType()) { case RegisterType::Output: - dest_offset_disp = UnitState::OutputOffset(dest.GetIndex()); + dest_offset_disp = ShaderUnit::OutputOffset(dest.GetIndex()); break; case RegisterType::Temporary: - dest_offset_disp = UnitState::TemporaryOffset(dest.GetIndex()); + dest_offset_disp = ShaderUnit::TemporaryOffset(dest.GetIndex()); break; default: UNREACHABLE_MSG("Encountered unknown destination register type: {}", @@ -628,13 +627,13 @@ void JitShader::Compile_NOP(Instruction instr) {} void JitShader::Compile_END(Instruction instr) { // Save conditional code - STRB(COND0.toW(), STATE, u32(offsetof(UnitState, conditional_code[0]))); - STRB(COND1.toW(), STATE, u32(offsetof(UnitState, conditional_code[1]))); + STRB(COND0.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[0]))); + STRB(COND1.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[1]))); // Save address/loop registers STP(ADDROFFS_REG_0.toW(), ADDROFFS_REG_1.toW(), STATE, - u32(offsetof(UnitState, address_registers))); - STR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(UnitState, address_registers[2]))); + u32(offsetof(ShaderUnit, address_registers))); + STR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(ShaderUnit, address_registers[2]))); ABI_PopRegisters(*this, ABI_ALL_CALLEE_SAVED, 16); RET(); @@ -804,14 +803,14 @@ void JitShader::Compile_JMP(Instruction instr) { } } -static void Emit(GSEmitter* emitter, Common::Vec4 (*output)[16]) { +static void Emit(GeometryEmitter* emitter, Common::Vec4 (*output)[16]) { emitter->Emit(*output); } void JitShader::Compile_EMIT(Instruction instr) { Label have_emitter, end; - LDR(XSCRATCH0, STATE, u32(offsetof(UnitState, emitter_ptr))); + LDR(XSCRATCH0, STATE, u32(offsetof(ShaderUnit, emitter_ptr))); CBNZ(XSCRATCH0, have_emitter); ABI_PushRegisters(*this, PersistentCallerSavedRegs()); @@ -824,7 +823,7 @@ void JitShader::Compile_EMIT(Instruction instr) { ABI_PushRegisters(*this, PersistentCallerSavedRegs()); MOV(ABI_PARAM1, XSCRATCH0); MOV(ABI_PARAM2, STATE); - ADD(ABI_PARAM2, ABI_PARAM2, u32(offsetof(UnitState, registers.output))); + ADD(ABI_PARAM2, ABI_PARAM2, u32(offsetof(ShaderUnit, output))); CallFarFunction(*this, Emit); ABI_PopRegisters(*this, PersistentCallerSavedRegs()); l(end); @@ -833,7 +832,7 @@ void JitShader::Compile_EMIT(Instruction instr) { void JitShader::Compile_SETE(Instruction instr) { Label have_emitter, end; - LDR(XSCRATCH0, STATE, u32(offsetof(UnitState, emitter_ptr))); + LDR(XSCRATCH0, STATE, u32(offsetof(ShaderUnit, emitter_ptr))); CBNZ(XSCRATCH0, have_emitter); @@ -846,11 +845,11 @@ void JitShader::Compile_SETE(Instruction instr) { l(have_emitter); MOV(XSCRATCH1.toW(), instr.setemit.vertex_id); - STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, vertex_id))); + STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, vertex_id))); MOV(XSCRATCH1.toW(), instr.setemit.prim_emit); - STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, prim_emit))); + STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, prim_emit))); MOV(XSCRATCH1.toW(), instr.setemit.winding); - STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, winding))); + STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, winding))); l(end); } @@ -943,12 +942,12 @@ void JitShader::Compile(const std::array* program_ // Load address/loop registers LDP(ADDROFFS_REG_0.toW(), ADDROFFS_REG_1.toW(), STATE, - u32(offsetof(UnitState, address_registers))); - LDR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(UnitState, address_registers[2]))); + u32(offsetof(ShaderUnit, address_registers))); + LDR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(ShaderUnit, address_registers[2]))); //// Load conditional code - LDRB(COND0.toW(), STATE, u32(offsetof(UnitState, conditional_code[0]))); - LDRB(COND1.toW(), STATE, u32(offsetof(UnitState, conditional_code[1]))); + LDRB(COND0.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[0]))); + LDRB(COND1.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[1]))); // Used to set a register to one FMOV(ONE.S4(), FImm8(false, 7, 0)); diff --git a/src/video_core/shader/shader_jit_a64_compiler.h b/src/video_core/shader/shader_jit_a64_compiler.h index b47ef3665..823dd460e 100644 --- a/src/video_core/shader/shader_jit_a64_compiler.h +++ b/src/video_core/shader/shader_jit_a64_compiler.h @@ -17,13 +17,17 @@ #include #include #include "common/common_types.h" -#include "video_core/shader/shader.h" +#include "video_core/pica/shader_setup.h" using nihstro::Instruction; using nihstro::OpCode; using nihstro::SourceRegister; using nihstro::SwizzlePattern; +namespace Pica { +struct ShaderUnit; +} + namespace Pica::Shader { /// Memory allocated for each compiled shader @@ -37,7 +41,7 @@ class JitShader : private oaknut::CodeBlock, public oaknut::CodeGenerator { public: JitShader(); - void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const { + void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const { program(&setup.uniforms, &state, instruction_labels[offset].ptr()); } diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index e5b808146..f04492eeb 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -5,9 +5,6 @@ #include "common/arch.h" #if CITRA_ARCH(x86_64) -#include -#include -#include #include #include #include @@ -18,9 +15,8 @@ #include "common/x64/cpu_detect.h" #include "common/x64/xbyak_abi.h" #include "common/x64/xbyak_util.h" -#include "video_core/pica_state.h" +#include "video_core/pica/shader_unit.h" #include "video_core/pica_types.h" -#include "video_core/shader/shader.h" #include "video_core/shader/shader_jit_x64_compiler.h" using namespace Common::X64; @@ -125,7 +121,7 @@ constexpr Reg32 LOOPINC = edi; constexpr Reg64 COND0 = r13; /// Result of the previous CMP instruction for the Y-component comparison constexpr Reg64 COND1 = r14; -/// Pointer to the UnitState instance for the current VS unit +/// Pointer to the ShaderUnit instance for the current VS unit constexpr Reg64 STATE = r15; /// SIMD scratch register constexpr Xmm SCRATCH = xmm0; @@ -198,11 +194,11 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegiste break; case RegisterType::Input: src_ptr = STATE; - src_offset = UnitState::InputOffset(src_reg.GetIndex()); + src_offset = ShaderUnit::InputOffset(src_reg.GetIndex()); break; case RegisterType::Temporary: src_ptr = STATE; - src_offset = UnitState::TemporaryOffset(src_reg.GetIndex()); + src_offset = ShaderUnit::TemporaryOffset(src_reg.GetIndex()); break; default: UNREACHABLE_MSG("Encountered unknown source register type: {}", src_reg.GetRegisterType()); @@ -312,10 +308,10 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { std::size_t dest_offset_disp; switch (dest.GetRegisterType()) { case RegisterType::Output: - dest_offset_disp = UnitState::OutputOffset(dest.GetIndex()); + dest_offset_disp = ShaderUnit::OutputOffset(dest.GetIndex()); break; case RegisterType::Temporary: - dest_offset_disp = UnitState::TemporaryOffset(dest.GetIndex()); + dest_offset_disp = ShaderUnit::TemporaryOffset(dest.GetIndex()); break; default: UNREACHABLE_MSG("Encountered unknown destination register type: {}", @@ -669,13 +665,13 @@ void JitShader::Compile_NOP(Instruction instr) {} void JitShader::Compile_END(Instruction instr) { // Save conditional code - mov(byte[STATE + offsetof(UnitState, conditional_code[0])], COND0.cvt8()); - mov(byte[STATE + offsetof(UnitState, conditional_code[1])], COND1.cvt8()); + mov(byte[STATE + offsetof(ShaderUnit, conditional_code[0])], COND0.cvt8()); + mov(byte[STATE + offsetof(ShaderUnit, conditional_code[1])], COND1.cvt8()); // Save address/loop registers - mov(dword[STATE + offsetof(UnitState, address_registers[0])], ADDROFFS_REG_0.cvt32()); - mov(dword[STATE + offsetof(UnitState, address_registers[1])], ADDROFFS_REG_1.cvt32()); - mov(dword[STATE + offsetof(UnitState, address_registers[2])], LOOPCOUNT_REG); + mov(dword[STATE + offsetof(ShaderUnit, address_registers[0])], ADDROFFS_REG_0.cvt32()); + mov(dword[STATE + offsetof(ShaderUnit, address_registers[1])], ADDROFFS_REG_1.cvt32()); + mov(dword[STATE + offsetof(ShaderUnit, address_registers[2])], LOOPCOUNT_REG); ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); ret(); @@ -870,13 +866,13 @@ void JitShader::Compile_JMP(Instruction instr) { } } -static void Emit(GSEmitter* emitter, Common::Vec4 (*output)[16]) { +static void Emit(GeometryEmitter* emitter, Common::Vec4 (*output)[16]) { emitter->Emit(*output); } void JitShader::Compile_EMIT(Instruction instr) { Label have_emitter, end; - mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + mov(rax, qword[STATE + offsetof(ShaderUnit, emitter_ptr)]); test(rax, rax); jnz(have_emitter); @@ -890,7 +886,7 @@ void JitShader::Compile_EMIT(Instruction instr) { ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(ABI_PARAM1, rax); mov(ABI_PARAM2, STATE); - add(ABI_PARAM2, static_cast(offsetof(UnitState, registers.output))); + add(ABI_PARAM2, static_cast(offsetof(ShaderUnit, output))); CallFarFunction(*this, Emit); ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); L(end); @@ -898,7 +894,7 @@ void JitShader::Compile_EMIT(Instruction instr) { void JitShader::Compile_SETE(Instruction instr) { Label have_emitter, end; - mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + mov(rax, qword[STATE + offsetof(ShaderUnit, emitter_ptr)]); test(rax, rax); jnz(have_emitter); @@ -909,9 +905,9 @@ void JitShader::Compile_SETE(Instruction instr) { jmp(end); L(have_emitter); - mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); - mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); - mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); + mov(byte[rax + offsetof(GeometryEmitter, vertex_id)], instr.setemit.vertex_id); + mov(byte[rax + offsetof(GeometryEmitter, prim_emit)], instr.setemit.prim_emit); + mov(byte[rax + offsetof(GeometryEmitter, winding)], instr.setemit.winding); L(end); } @@ -1001,13 +997,13 @@ void JitShader::Compile(const std::array* program_ mov(STATE, ABI_PARAM2); // Load address/loop registers - movsxd(ADDROFFS_REG_0, dword[STATE + offsetof(UnitState, address_registers[0])]); - movsxd(ADDROFFS_REG_1, dword[STATE + offsetof(UnitState, address_registers[1])]); - mov(LOOPCOUNT_REG, dword[STATE + offsetof(UnitState, address_registers[2])]); + movsxd(ADDROFFS_REG_0, dword[STATE + offsetof(ShaderUnit, address_registers[0])]); + movsxd(ADDROFFS_REG_1, dword[STATE + offsetof(ShaderUnit, address_registers[1])]); + mov(LOOPCOUNT_REG, dword[STATE + offsetof(ShaderUnit, address_registers[2])]); // Load conditional code - mov(COND0, byte[STATE + offsetof(UnitState, conditional_code[0])]); - mov(COND1, byte[STATE + offsetof(UnitState, conditional_code[1])]); + mov(COND0, byte[STATE + offsetof(ShaderUnit, conditional_code[0])]); + mov(COND1, byte[STATE + offsetof(ShaderUnit, conditional_code[1])]); // Used to set a register to one static const __m128 one = {1.f, 1.f, 1.f, 1.f}; diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 1e782b53c..911183296 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -10,19 +10,21 @@ #include #include #include -#include -#include #include #include #include #include "common/common_types.h" -#include "video_core/shader/shader.h" +#include "video_core/pica/shader_setup.h" using nihstro::Instruction; using nihstro::OpCode; using nihstro::SourceRegister; using nihstro::SwizzlePattern; +namespace Pica { +struct ShaderUnit; +} + namespace Pica::Shader { /// Memory allocated for each compiled shader @@ -36,7 +38,7 @@ class JitShader : public Xbyak::CodeGenerator { public: JitShader(); - void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const { + void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const { program(&setup.uniforms, &state, instruction_labels[offset].getAddress()); } diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index 90ce45726..8c5bea703 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -5,10 +5,9 @@ #include "common/assert.h" #include "common/color.h" #include "common/logging/log.h" -#include "common/math_util.h" #include "common/swap.h" #include "common/vector_math.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" #include "video_core/texture/etc1.h" #include "video_core/texture/texture_decode.h" #include "video_core/utils.h" diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h index 9e6c216f2..67ee03e5d 100644 --- a/src/video_core/texture/texture_decode.h +++ b/src/video_core/texture/texture_decode.h @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "common/vector_math.h" -#include "video_core/regs_texturing.h" +#include "video_core/pica/regs_texturing.h" namespace Pica::Texture { @@ -15,8 +15,8 @@ size_t CalculateTileSize(TexturingRegs::TextureFormat format); struct TextureInfo { PAddr physical_address; - unsigned int width; - unsigned int height; + u32 width; + u32 height; ptrdiff_t stride; TexturingRegs::TextureFormat format; diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp deleted file mode 100644 index f0a2501ff..000000000 --- a/src/video_core/vertex_loader.cpp +++ /dev/null @@ -1,161 +0,0 @@ -#include -#include "common/alignment.h" -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "common/vector_math.h" -#include "core/memory.h" -#include "video_core/debug_utils/debug_utils.h" -#include "video_core/pica_state.h" -#include "video_core/pica_types.h" -#include "video_core/regs_pipeline.h" -#include "video_core/shader/shader.h" -#include "video_core/vertex_loader.h" -#include "video_core/video_core.h" - -namespace Pica { - -void VertexLoader::Setup(const PipelineRegs& regs) { - ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once."); - - const auto& attribute_config = regs.vertex_attributes; - num_total_attributes = attribute_config.GetNumTotalAttributes(); - - vertex_attribute_sources.fill(0xdeadbeef); - - for (int i = 0; i < 16; i++) { - vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); - } - - // Setup attribute data from loaders - for (int loader = 0; loader < 12; ++loader) { - const auto& loader_config = attribute_config.attribute_loaders[loader]; - - u32 offset = 0; - - // TODO: What happens if a loader overwrites a previous one's data? - for (unsigned component = 0; component < loader_config.component_count; ++component) { - if (component >= 12) { - LOG_ERROR(HW_GPU, - "Overflow in the vertex attribute loader {} trying to load component {}", - loader, component); - continue; - } - - u32 attribute_index = loader_config.GetComponent(component); - if (attribute_index < 12) { - offset = Common::AlignUp(offset, - attribute_config.GetElementSizeInBytes(attribute_index)); - vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; - vertex_attribute_strides[attribute_index] = - static_cast(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = - attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = - attribute_config.GetNumElements(attribute_index); - offset += attribute_config.GetStride(attribute_index); - } else if (attribute_index < 16) { - // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, - // respectively - offset = Common::AlignUp(offset, 4); - offset += (attribute_index - 11) * 4; - } else { - UNREACHABLE(); // This is truly unreachable due to the number of bits for each - // component - } - } - } - - is_setup = true; -} - -void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, - Shader::AttributeBuffer& input, - DebugUtils::MemoryAccessTracker& memory_accesses) { - ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); - - for (int i = 0; i < num_total_attributes; ++i) { - if (vertex_attribute_elements[i] != 0) { - // Load per-vertex data from the loader arrays - u32 source_addr = - base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; - - if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess( - source_addr, - vertex_attribute_elements[i] * - ((vertex_attribute_formats[i] == PipelineRegs::VertexAttributeFormat::FLOAT) - ? 4 - : (vertex_attribute_formats[i] == - PipelineRegs::VertexAttributeFormat::SHORT) - ? 2 - : 1)); - } - - switch (vertex_attribute_formats[i]) { - case PipelineRegs::VertexAttributeFormat::BYTE: { - const s8* srcdata = reinterpret_cast( - VideoCore::g_memory->GetPhysicalPointer(source_addr)); - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); - } - break; - } - case PipelineRegs::VertexAttributeFormat::UBYTE: { - const u8* srcdata = reinterpret_cast( - VideoCore::g_memory->GetPhysicalPointer(source_addr)); - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); - } - break; - } - case PipelineRegs::VertexAttributeFormat::SHORT: { - const s16* srcdata = reinterpret_cast( - VideoCore::g_memory->GetPhysicalPointer(source_addr)); - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); - } - break; - } - case PipelineRegs::VertexAttributeFormat::FLOAT: { - const float* srcdata = reinterpret_cast( - VideoCore::g_memory->GetPhysicalPointer(source_addr)); - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); - } - break; - } - } - - // Default attribute values set if array elements have < 4 components. This - // is *not* carried over from the default attribute settings even if they're - // enabled for this attribute. - for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { - input.attr[i][comp] = comp == 3 ? f24::One() : f24::Zero(); - } - - LOG_TRACE(HW_GPU, - "Loaded {} components of attribute {:x} for vertex {:x} (index {:x}) from " - "0x{:08x} + 0x{:08x} + 0x{:04x}: {} {} {} {}", - vertex_attribute_elements[i], i, vertex, index, base_address, - vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex, - input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), - input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); - } else if (vertex_attribute_is_default[i]) { - // Load the default attribute if we're configured to do so - input.attr[i] = g_state.input_default_attributes.attr[i]; - LOG_TRACE( - HW_GPU, - "Loaded default attribute {:x} for vertex {:x} (index {:x}): ({}, {}, {}, {})", i, - vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), - input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); - } else { - // TODO(yuriks): In this case, no data gets loaded and the vertex - // remains with the last value it had. This isn't currently maintained - // as global state, however, and so won't work in Citra yet. - } - } -} - -} // namespace Pica diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h deleted file mode 100644 index 02db10aee..000000000 --- a/src/video_core/vertex_loader.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include "common/common_types.h" -#include "video_core/regs_pipeline.h" - -namespace Pica { - -namespace DebugUtils { -class MemoryAccessTracker; -} - -namespace Shader { -struct AttributeBuffer; -} - -class VertexLoader { -public: - VertexLoader() = default; - explicit VertexLoader(const PipelineRegs& regs) { - Setup(regs); - } - - void Setup(const PipelineRegs& regs); - void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input, - DebugUtils::MemoryAccessTracker& memory_accesses); - - int GetNumTotalAttributes() const { - return num_total_attributes; - } - -private: - std::array vertex_attribute_sources; - std::array vertex_attribute_strides{}; - std::array vertex_attribute_formats; - std::array vertex_attribute_elements{}; - std::array vertex_attribute_is_default; - int num_total_attributes = 0; - bool is_setup = false; -}; - -} // namespace Pica diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index e8f090a0f..51aa62282 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -2,15 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include "common/archives.h" #include "common/logging/log.h" #include "common/settings.h" -#include "core/core.h" -#include "core/frontend/emu_window.h" -#include "video_core/pica.h" -#include "video_core/pica_state.h" -#include "video_core/renderer_base.h" +#include "video_core/gpu.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_software/renderer_software.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -18,50 +12,21 @@ namespace VideoCore { -std::unique_ptr g_renderer{}; ///< Renderer plugin - -std::atomic g_shader_jit_enabled; -std::atomic g_hw_shader_enabled; -std::atomic g_hw_shader_accurate_mul; - -Memory::MemorySystem* g_memory; - -/// Initialize the video core -void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window, - Core::System& system) { - g_memory = &system.Memory(); - Pica::Init(); - +std::unique_ptr CreateRenderer(Frontend::EmuWindow& emu_window, + Frontend::EmuWindow* secondary_window, + Pica::PicaCore& pica, Core::System& system) { const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue(); switch (graphics_api) { case Settings::GraphicsAPI::Software: - g_renderer = std::make_unique(system, emu_window); - break; + return std::make_unique(system, pica, emu_window); case Settings::GraphicsAPI::Vulkan: - g_renderer = std::make_unique(system, emu_window, secondary_window); - break; + return std::make_unique(system, pica, emu_window, secondary_window); case Settings::GraphicsAPI::OpenGL: - g_renderer = std::make_unique(system, emu_window, secondary_window); - break; + return std::make_unique(system, pica, emu_window, secondary_window); default: LOG_CRITICAL(Render, "Unknown graphics API {}, using OpenGL", graphics_api); - g_renderer = std::make_unique(system, emu_window, secondary_window); + return std::make_unique(system, pica, emu_window, secondary_window); } } -/// Shutdown the video core -void Shutdown() { - Pica::Shutdown(); - g_renderer.reset(); - - LOG_DEBUG(Render, "shutdown OK"); -} - -template -void serialize(Archive& ar, const unsigned int) { - ar& Pica::g_state; -} - } // namespace VideoCore - -SERIALIZE_IMPL(VideoCore) diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 94620b958..1275b0b46 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -4,8 +4,6 @@ #pragma once -#include -#include #include namespace Frontend { @@ -16,32 +14,16 @@ namespace Core { class System; } -namespace Memory { -class MemorySystem; +namespace Pica { +class PicaCore; } namespace VideoCore { class RendererBase; -extern std::unique_ptr g_renderer; ///< Renderer plugin - -// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from -// qt ui) -extern std::atomic g_shader_jit_enabled; -extern std::atomic g_hw_shader_enabled; -extern std::atomic g_hw_shader_accurate_mul; - -extern Memory::MemorySystem* g_memory; - -/// Initialize the video core -void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window, - Core::System& system); - -/// Shutdown the video core -void Shutdown(); - -template -void serialize(Archive& ar, const unsigned int file_version); +std::unique_ptr CreateRenderer(Frontend::EmuWindow& emu_window, + Frontend::EmuWindow* secondary_window, + Pica::PicaCore& pica, Core::System& system); } // namespace VideoCore