Core Timing: Rework Core Timing to run all cores evenly.
This commit is contained in:
		
				
					committed by
					
						 FernandoS27
						FernandoS27
					
				
			
			
				
	
			
			
			
						parent
						
							e664c24355
						
					
				
				
					commit
					555866f8dc
				
			| @@ -116,7 +116,7 @@ public: | ||||
|         num_interpreted_instructions = 0; | ||||
|     } | ||||
|     u64 GetTicksRemaining() override { | ||||
|         return std::max(parent.system.CoreTiming().GetDowncount(), 0); | ||||
|         return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0LL); | ||||
|     } | ||||
|     u64 GetCNTPCT() override { | ||||
|         return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); | ||||
|   | ||||
| @@ -156,7 +156,7 @@ void ARM_Unicorn::Run() { | ||||
|     if (GDBStub::IsServerEnabled()) { | ||||
|         ExecuteInstructions(std::max(4000000, 0)); | ||||
|     } else { | ||||
|         ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0)); | ||||
|         ExecuteInstructions(std::max<s64>(system.CoreTiming().GetDowncount(), 0LL)); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -85,24 +85,16 @@ void Cpu::RunLoop(bool tight_loop) { | ||||
|     // instead advance to the next event and try to yield to the next thread | ||||
|     if (Kernel::GetCurrentThread() == nullptr) { | ||||
|         LOG_TRACE(Core, "Core-{} idling", core_index); | ||||
|  | ||||
|         if (IsMainCore()) { | ||||
|             // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. | ||||
|             core_timing.Idle(); | ||||
|             core_timing.Advance(); | ||||
|         } | ||||
|  | ||||
|         core_timing.Idle(); | ||||
|         core_timing.Advance(); | ||||
|         PrepareReschedule(); | ||||
|     } else { | ||||
|         if (IsMainCore()) { | ||||
|             core_timing.Advance(); | ||||
|         } | ||||
|  | ||||
|         if (tight_loop) { | ||||
|             arm_interface->Run(); | ||||
|         } else { | ||||
|             arm_interface->Step(); | ||||
|         } | ||||
|         core_timing.Advance(); | ||||
|     } | ||||
|  | ||||
|     Reschedule(); | ||||
|   | ||||
| @@ -15,7 +15,7 @@ | ||||
|  | ||||
| namespace Core::Timing { | ||||
|  | ||||
| constexpr int MAX_SLICE_LENGTH = 20000; | ||||
| constexpr int MAX_SLICE_LENGTH = 10000; | ||||
|  | ||||
| struct CoreTiming::Event { | ||||
|     s64 time; | ||||
| @@ -38,10 +38,14 @@ CoreTiming::CoreTiming() = default; | ||||
| CoreTiming::~CoreTiming() = default; | ||||
|  | ||||
| void CoreTiming::Initialize() { | ||||
|     downcount = MAX_SLICE_LENGTH; | ||||
|     for (std::size_t core = 0; core < num_cpu_cores; core++) { | ||||
|         downcounts[core] = MAX_SLICE_LENGTH; | ||||
|         time_slice[core] = MAX_SLICE_LENGTH; | ||||
|     } | ||||
|     slice_length = MAX_SLICE_LENGTH; | ||||
|     global_timer = 0; | ||||
|     idled_cycles = 0; | ||||
|     current_context = 0; | ||||
|  | ||||
|     // The time between CoreTiming being initialized and the first call to Advance() is considered | ||||
|     // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before | ||||
| @@ -110,7 +114,7 @@ void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) { | ||||
| u64 CoreTiming::GetTicks() const { | ||||
|     u64 ticks = static_cast<u64>(global_timer); | ||||
|     if (!is_global_timer_sane) { | ||||
|         ticks += slice_length - downcount; | ||||
|         ticks += time_slice[current_context] - downcounts[current_context]; | ||||
|     } | ||||
|     return ticks; | ||||
| } | ||||
| @@ -120,7 +124,7 @@ u64 CoreTiming::GetIdleTicks() const { | ||||
| } | ||||
|  | ||||
| void CoreTiming::AddTicks(u64 ticks) { | ||||
|     downcount -= static_cast<int>(ticks); | ||||
|     downcounts[current_context] -= static_cast<s64>(ticks); | ||||
| } | ||||
|  | ||||
| void CoreTiming::ClearPendingEvents() { | ||||
| @@ -141,22 +145,36 @@ void CoreTiming::RemoveEvent(const EventType* event_type) { | ||||
|  | ||||
| void CoreTiming::ForceExceptionCheck(s64 cycles) { | ||||
|     cycles = std::max<s64>(0, cycles); | ||||
|     if (downcount <= cycles) { | ||||
|     if (downcounts[current_context] <= cycles) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int | ||||
|     // here. Account for cycles already executed by adjusting the g.slice_length | ||||
|     slice_length -= downcount - static_cast<int>(cycles); | ||||
|     downcount = static_cast<int>(cycles); | ||||
|     slice_length -= downcounts[current_context] - static_cast<int>(cycles); | ||||
|     downcounts[current_context] = static_cast<int>(cycles); | ||||
| } | ||||
|  | ||||
| std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const { | ||||
|     const u64 original_context = current_context; | ||||
|     u64 next_context = (original_context + 1) % num_cpu_cores; | ||||
|     while (next_context != original_context) { | ||||
|         if (time_slice[next_context] >= needed_ticks) { | ||||
|             return {next_context}; | ||||
|         } else if (time_slice[next_context] >= 0) { | ||||
|             return {}; | ||||
|         } | ||||
|         next_context = (next_context + 1) % num_cpu_cores; | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
|  | ||||
| void CoreTiming::Advance() { | ||||
|     std::unique_lock<std::mutex> guard(inner_mutex); | ||||
|  | ||||
|     const int cycles_executed = slice_length - downcount; | ||||
|     const int cycles_executed = time_slice[current_context] - downcounts[current_context]; | ||||
|     time_slice[current_context] = std::max<s64>(0, downcounts[current_context]); | ||||
|     global_timer += cycles_executed; | ||||
|     slice_length = MAX_SLICE_LENGTH; | ||||
|  | ||||
|     is_global_timer_sane = true; | ||||
|  | ||||
| @@ -173,24 +191,40 @@ void CoreTiming::Advance() { | ||||
|  | ||||
|     // Still events left (scheduled in the future) | ||||
|     if (!event_queue.empty()) { | ||||
|         slice_length = static_cast<int>( | ||||
|             std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH)); | ||||
|         s64 needed_ticks = std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); | ||||
|         const auto next_core = NextAvailableCore(needed_ticks); | ||||
|         if (next_core) { | ||||
|             downcounts[*next_core] = needed_ticks; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     downcount = slice_length; | ||||
|     downcounts[current_context] = time_slice[current_context]; | ||||
| } | ||||
|  | ||||
| void CoreTiming::ResetRun() { | ||||
|     for (std::size_t core = 0; core < num_cpu_cores; core++) { | ||||
|         downcounts[core] = MAX_SLICE_LENGTH; | ||||
|         time_slice[core] = MAX_SLICE_LENGTH; | ||||
|     } | ||||
|     current_context = 0; | ||||
|     // Still events left (scheduled in the future) | ||||
|     if (!event_queue.empty()) { | ||||
|         s64 needed_ticks = std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); | ||||
|         downcounts[current_context] = needed_ticks; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void CoreTiming::Idle() { | ||||
|     idled_cycles += downcount; | ||||
|     downcount = 0; | ||||
|     idled_cycles += downcounts[current_context]; | ||||
|     downcounts[current_context] = 0; | ||||
| } | ||||
|  | ||||
| std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { | ||||
|     return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; | ||||
| } | ||||
|  | ||||
| int CoreTiming::GetDowncount() const { | ||||
|     return downcount; | ||||
| s64 CoreTiming::GetDowncount() const { | ||||
|     return downcounts[current_context]; | ||||
| } | ||||
|  | ||||
| } // namespace Core::Timing | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <chrono> | ||||
| #include <functional> | ||||
| #include <mutex> | ||||
| #include <optional> | ||||
| #include <string> | ||||
| #include <unordered_map> | ||||
| #include <vector> | ||||
| @@ -104,7 +105,19 @@ public: | ||||
|  | ||||
|     std::chrono::microseconds GetGlobalTimeUs() const; | ||||
|  | ||||
|     int GetDowncount() const; | ||||
|     void ResetRun(); | ||||
|  | ||||
|     s64 GetDowncount() const; | ||||
|  | ||||
|     void SwitchContext(u64 new_context) { | ||||
|         current_context = new_context; | ||||
|     } | ||||
|  | ||||
|     bool CurrentContextCanRun() const { | ||||
|         return time_slice[current_context] > 0; | ||||
|     } | ||||
|  | ||||
|     std::optional<u64> NextAvailableCore(const s64 needed_ticks) const; | ||||
|  | ||||
| private: | ||||
|     struct Event; | ||||
| @@ -112,10 +125,15 @@ private: | ||||
|     /// Clear all pending events. This should ONLY be done on exit. | ||||
|     void ClearPendingEvents(); | ||||
|  | ||||
|     static constexpr u64 num_cpu_cores = 4; | ||||
|  | ||||
|     s64 global_timer = 0; | ||||
|     s64 idled_cycles = 0; | ||||
|     int slice_length = 0; | ||||
|     int downcount = 0; | ||||
|     s64 slice_length = 0; | ||||
|     std::array<s64, num_cpu_cores> downcounts{}; | ||||
|     // Slice of time assigned to each core per run. | ||||
|     std::array<s64, num_cpu_cores> time_slice{}; | ||||
|     u64 current_context = 0; | ||||
|  | ||||
|     // Are we in a function that has been called from Advance() | ||||
|     // If events are scheduled from a function that gets called from Advance(), | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
| #include "core/arm/exclusive_monitor.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_cpu.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/cpu_core_manager.h" | ||||
| #include "core/gdbstub/gdbstub.h" | ||||
| #include "core/settings.h" | ||||
| @@ -122,13 +123,19 @@ void CpuCoreManager::RunLoop(bool tight_loop) { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { | ||||
|         cores[active_core]->RunLoop(tight_loop); | ||||
|         if (Settings::values.use_multi_core) { | ||||
|             // Cores 1-3 are run on other threads in this mode | ||||
|             break; | ||||
|     auto& core_timing = system.CoreTiming(); | ||||
|     core_timing.ResetRun(); | ||||
|     bool keep_running{}; | ||||
|     do { | ||||
|         keep_running = false; | ||||
|         for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { | ||||
|             core_timing.SwitchContext(active_core); | ||||
|             if (core_timing.CurrentContextCanRun()) { | ||||
|                 cores[active_core]->RunLoop(tight_loop); | ||||
|             } | ||||
|             keep_running |= core_timing.CurrentContextCanRun(); | ||||
|         } | ||||
|     } | ||||
|     } while (keep_running); | ||||
|  | ||||
|     if (GDBStub::IsServerEnabled()) { | ||||
|         GDBStub::SetCpuStepFlag(false); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user