Improve core timing accuracy (#5257)
* Improve core timing accuracy * remove wrong global_ticks, use biggest ticks over all cores for GetGlobalTicks * merge max slice length change
This commit is contained in:
		| @@ -148,7 +148,11 @@ System::ResultStatus System::RunLoop(bool tight_loop) { | |||||||
|     for (auto& cpu_core : cpu_cores) { |     for (auto& cpu_core : cpu_cores) { | ||||||
|         if (cpu_core->GetTimer().GetTicks() < global_ticks) { |         if (cpu_core->GetTimer().GetTicks() < global_ticks) { | ||||||
|             s64 delay = global_ticks - cpu_core->GetTimer().GetTicks(); |             s64 delay = global_ticks - cpu_core->GetTimer().GetTicks(); | ||||||
|             cpu_core->GetTimer().Advance(delay); |             kernel->SetRunningCPU(cpu_core.get()); | ||||||
|  |             cpu_core->GetTimer().Advance(); | ||||||
|  |             cpu_core->PrepareReschedule(); | ||||||
|  |             kernel->GetThreadManager(cpu_core->GetID()).Reschedule(); | ||||||
|  |             cpu_core->GetTimer().SetNextSlice(delay); | ||||||
|             if (max_delay < delay) { |             if (max_delay < delay) { | ||||||
|                 max_delay = delay; |                 max_delay = delay; | ||||||
|                 current_core_to_execute = cpu_core.get(); |                 current_core_to_execute = cpu_core.get(); | ||||||
| @@ -156,10 +160,14 @@ System::ResultStatus System::RunLoop(bool tight_loop) { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (max_delay > 0) { |     // jit sometimes overshoot by a few ticks which might lead to a minimal desync in the cores. | ||||||
|  |     // This small difference shouldn't make it necessary to sync the cores and would only cost | ||||||
|  |     // performance. Thus we don't sync delays below min_delay | ||||||
|  |     static constexpr s64 min_delay = 100; | ||||||
|  |     if (max_delay > min_delay) { | ||||||
|         LOG_TRACE(Core_ARM11, "Core {} running (delayed) for {} ticks", |         LOG_TRACE(Core_ARM11, "Core {} running (delayed) for {} ticks", | ||||||
|                   current_core_to_execute->GetID(), |                      current_core_to_execute->GetID(), | ||||||
|                   current_core_to_execute->GetTimer().GetDowncount()); |                      current_core_to_execute->GetTimer().GetDowncount()); | ||||||
|         if (running_core != current_core_to_execute) { |         if (running_core != current_core_to_execute) { | ||||||
|             running_core = current_core_to_execute; |             running_core = current_core_to_execute; | ||||||
|             kernel->SetRunningCPU(running_core); |             kernel->SetRunningCPU(running_core); | ||||||
| @@ -181,12 +189,15 @@ System::ResultStatus System::RunLoop(bool tight_loop) { | |||||||
|         // TODO: Make special check for idle since we can easily revert the time of idle cores |         // TODO: Make special check for idle since we can easily revert the time of idle cores | ||||||
|         s64 max_slice = Timing::MAX_SLICE_LENGTH; |         s64 max_slice = Timing::MAX_SLICE_LENGTH; | ||||||
|         for (const auto& cpu_core : cpu_cores) { |         for (const auto& cpu_core : cpu_cores) { | ||||||
|  |             kernel->SetRunningCPU(cpu_core.get()); | ||||||
|  |             cpu_core->GetTimer().Advance(); | ||||||
|  |             cpu_core->PrepareReschedule(); | ||||||
|  |             kernel->GetThreadManager(cpu_core->GetID()).Reschedule(); | ||||||
|             max_slice = std::min(max_slice, cpu_core->GetTimer().GetMaxSliceLength()); |             max_slice = std::min(max_slice, cpu_core->GetTimer().GetMaxSliceLength()); | ||||||
|         } |         } | ||||||
|         for (auto& cpu_core : cpu_cores) { |         for (auto& cpu_core : cpu_cores) { | ||||||
|             cpu_core->GetTimer().Advance(max_slice); |             cpu_core->GetTimer().SetNextSlice(max_slice); | ||||||
|         } |             auto start_ticks = cpu_core->GetTimer().GetTicks(); | ||||||
|         for (auto& cpu_core : cpu_cores) { |  | ||||||
|             LOG_TRACE(Core_ARM11, "Core {} running for {} ticks", cpu_core->GetID(), |             LOG_TRACE(Core_ARM11, "Core {} running for {} ticks", cpu_core->GetID(), | ||||||
|                       cpu_core->GetTimer().GetDowncount()); |                       cpu_core->GetTimer().GetDowncount()); | ||||||
|             running_core = cpu_core.get(); |             running_core = cpu_core.get(); | ||||||
| @@ -204,8 +215,8 @@ System::ResultStatus System::RunLoop(bool tight_loop) { | |||||||
|                     cpu_core->Step(); |                     cpu_core->Step(); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |             max_slice = cpu_core->GetTimer().GetTicks() - start_ticks; | ||||||
|         } |         } | ||||||
|         timing->AddToGlobalTicks(max_slice); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (GDBStub::IsServerEnabled()) { |     if (GDBStub::IsServerEnabled()) { | ||||||
|   | |||||||
| @@ -111,11 +111,15 @@ s64 Timing::GetTicks() const { | |||||||
| } | } | ||||||
|  |  | ||||||
| s64 Timing::GetGlobalTicks() const { | s64 Timing::GetGlobalTicks() const { | ||||||
|     return global_timer; |     const auto& timer = | ||||||
|  |         std::max_element(timers.cbegin(), timers.cend(), [](const auto& a, const auto& b) { | ||||||
|  |             return a->GetTicks() < b->GetTicks(); | ||||||
|  |         }); | ||||||
|  |     return (*timer)->GetTicks(); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::chrono::microseconds Timing::GetGlobalTimeUs() const { | std::chrono::microseconds Timing::GetGlobalTimeUs() const { | ||||||
|     return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE_ARM11}; |     return std::chrono::microseconds{GetGlobalTicks() * 1000000 / BASE_CLOCK_RATE_ARM11}; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::shared_ptr<Timing::Timer> Timing::GetTimer(std::size_t cpu_id) { | std::shared_ptr<Timing::Timer> Timing::GetTimer(std::size_t cpu_id) { | ||||||
| @@ -161,21 +165,22 @@ void Timing::Timer::MoveEvents() { | |||||||
| } | } | ||||||
|  |  | ||||||
| s64 Timing::Timer::GetMaxSliceLength() const { | s64 Timing::Timer::GetMaxSliceLength() const { | ||||||
|     auto next_event = std::find_if(event_queue.begin(), event_queue.end(), |     const auto& next_event = event_queue.begin(); | ||||||
|                                    [&](const Event& e) { return e.time - executed_ticks > 0; }); |  | ||||||
|     if (next_event != event_queue.end()) { |     if (next_event != event_queue.end()) { | ||||||
|  |         ASSERT(next_event->time - executed_ticks > 0); | ||||||
|         return next_event->time - executed_ticks; |         return next_event->time - executed_ticks; | ||||||
|     } |     } | ||||||
|     return MAX_SLICE_LENGTH; |     return MAX_SLICE_LENGTH; | ||||||
| } | } | ||||||
|  |  | ||||||
| void Timing::Timer::Advance(s64 max_slice_length) { | void Timing::Timer::Advance() { | ||||||
|     MoveEvents(); |     MoveEvents(); | ||||||
|  |  | ||||||
|     s64 cycles_executed = slice_length - downcount; |     s64 cycles_executed = slice_length - downcount; | ||||||
|     idled_cycles = 0; |     idled_cycles = 0; | ||||||
|     executed_ticks += cycles_executed; |     executed_ticks += cycles_executed; | ||||||
|     slice_length = max_slice_length; |     slice_length = 0; | ||||||
|  |     downcount = 0; | ||||||
|  |  | ||||||
|     is_timer_sane = true; |     is_timer_sane = true; | ||||||
|  |  | ||||||
| @@ -191,6 +196,10 @@ void Timing::Timer::Advance(s64 max_slice_length) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     is_timer_sane = false; |     is_timer_sane = false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Timing::Timer::SetNextSlice(s64 max_slice_length) { | ||||||
|  |     slice_length = max_slice_length; | ||||||
|  |  | ||||||
|     // Still events left (scheduled in the future) |     // Still events left (scheduled in the future) | ||||||
|     if (!event_queue.empty()) { |     if (!event_queue.empty()) { | ||||||
|   | |||||||
| @@ -171,7 +171,13 @@ public: | |||||||
|         BOOST_SERIALIZATION_SPLIT_MEMBER() |         BOOST_SERIALIZATION_SPLIT_MEMBER() | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     static constexpr int MAX_SLICE_LENGTH = 20000; |     // currently Service::HID::pad_update_ticks is the smallest interval for an event that gets | ||||||
|  |     // always scheduled. Therfore we use this as orientation for the MAX_SLICE_LENGTH | ||||||
|  |     // For performance bigger slice length are desired, though this will lead to cores desync | ||||||
|  |     // But we never want to schedule events into the current slice, because then cores might to | ||||||
|  |     // run small slices to sync up again. This is especially important for events that are always | ||||||
|  |     // scheduled and repated. | ||||||
|  |     static constexpr int MAX_SLICE_LENGTH = BASE_CLOCK_RATE_ARM11 / 234; | ||||||
|  |  | ||||||
|     class Timer { |     class Timer { | ||||||
|     public: |     public: | ||||||
| @@ -180,7 +186,9 @@ public: | |||||||
|  |  | ||||||
|         s64 GetMaxSliceLength() const; |         s64 GetMaxSliceLength() const; | ||||||
|  |  | ||||||
|         void Advance(s64 max_slice_length = MAX_SLICE_LENGTH); |         void Advance(); | ||||||
|  |  | ||||||
|  |         void SetNextSlice(s64 max_slice_length = MAX_SLICE_LENGTH); | ||||||
|  |  | ||||||
|         void Idle(); |         void Idle(); | ||||||
|  |  | ||||||
| @@ -227,6 +235,9 @@ public: | |||||||
|         void serialize(Archive& ar, const unsigned int) { |         void serialize(Archive& ar, const unsigned int) { | ||||||
|             MoveEvents(); |             MoveEvents(); | ||||||
|             // NOTE: ts_queue should be empty now |             // NOTE: ts_queue should be empty now | ||||||
|  |             // TODO(SaveState): Remove the next two lines when we break compatibility | ||||||
|  |             s64 x; | ||||||
|  |             ar& x; // to keep compatibility with old save states that stored global_timer | ||||||
|             ar& event_queue; |             ar& event_queue; | ||||||
|             ar& event_fifo_id; |             ar& event_fifo_id; | ||||||
|             ar& slice_length; |             ar& slice_length; | ||||||
| @@ -260,10 +271,6 @@ public: | |||||||
|  |  | ||||||
|     s64 GetGlobalTicks() const; |     s64 GetGlobalTicks() const; | ||||||
|  |  | ||||||
|     void AddToGlobalTicks(s64 ticks) { |  | ||||||
|         global_timer += ticks; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Updates the value of the cpu clock scaling to the new percentage. |      * Updates the value of the cpu clock scaling to the new percentage. | ||||||
|      */ |      */ | ||||||
| @@ -274,8 +281,6 @@ public: | |||||||
|     std::shared_ptr<Timer> GetTimer(std::size_t cpu_id); |     std::shared_ptr<Timer> GetTimer(std::size_t cpu_id); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     s64 global_timer = 0; |  | ||||||
|  |  | ||||||
|     // unordered_map stores each element separately as a linked list node so pointers to |     // unordered_map stores each element separately as a linked list node so pointers to | ||||||
|     // elements remain stable regardless of rehashes/resizing. |     // elements remain stable regardless of rehashes/resizing. | ||||||
|     std::unordered_map<std::string, TimingEventType> event_types = {}; |     std::unordered_map<std::string, TimingEventType> event_types = {}; | ||||||
| @@ -290,7 +295,6 @@ private: | |||||||
|     template <class Archive> |     template <class Archive> | ||||||
|     void serialize(Archive& ar, const unsigned int file_version) { |     void serialize(Archive& ar, const unsigned int file_version) { | ||||||
|         // event_types set during initialization of other things |         // event_types set during initialization of other things | ||||||
|         ar& global_timer; |  | ||||||
|         ar& timers; |         ar& timers; | ||||||
|         if (file_version == 0) { |         if (file_version == 0) { | ||||||
|             std::shared_ptr<Timer> x; |             std::shared_ptr<Timer> x; | ||||||
|   | |||||||
| @@ -111,7 +111,7 @@ void ThreadManager::SwitchContext(Thread* new_thread) { | |||||||
|     // Save context for previous thread |     // Save context for previous thread | ||||||
|     if (previous_thread) { |     if (previous_thread) { | ||||||
|         previous_process = previous_thread->owner_process; |         previous_process = previous_thread->owner_process; | ||||||
|         previous_thread->last_running_ticks = timing.GetGlobalTicks(); |         previous_thread->last_running_ticks = cpu->GetTimer().GetTicks(); | ||||||
|         cpu->SaveContext(previous_thread->context); |         cpu->SaveContext(previous_thread->context); | ||||||
|  |  | ||||||
|         if (previous_thread->status == ThreadStatus::Running) { |         if (previous_thread->status == ThreadStatus::Running) { | ||||||
| @@ -344,7 +344,7 @@ ResultVal<std::shared_ptr<Thread>> KernelSystem::CreateThread( | |||||||
|     thread->entry_point = entry_point; |     thread->entry_point = entry_point; | ||||||
|     thread->stack_top = stack_top; |     thread->stack_top = stack_top; | ||||||
|     thread->nominal_priority = thread->current_priority = priority; |     thread->nominal_priority = thread->current_priority = priority; | ||||||
|     thread->last_running_ticks = timing.GetGlobalTicks(); |     thread->last_running_ticks = timing.GetTimer(processor_id)->GetTicks(); | ||||||
|     thread->processor_id = processor_id; |     thread->processor_id = processor_id; | ||||||
|     thread->wait_objects.clear(); |     thread->wait_objects.clear(); | ||||||
|     thread->wait_address = 0; |     thread->wait_address = 0; | ||||||
|   | |||||||
| @@ -13,7 +13,8 @@ | |||||||
|  |  | ||||||
| // Numbers are chosen randomly to make sure the correct one is given. | // Numbers are chosen randomly to make sure the correct one is given. | ||||||
| static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; | static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; | ||||||
| static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals | static constexpr int MAX_SLICE_LENGTH = | ||||||
|  |     BASE_CLOCK_RATE_ARM11 / 234; // Copied from CoreTiming internals | ||||||
|  |  | ||||||
| static std::bitset<CB_IDS.size()> callbacks_ran_flags; | static std::bitset<CB_IDS.size()> callbacks_ran_flags; | ||||||
| static u64 expected_callback = 0; | static u64 expected_callback = 0; | ||||||
| @@ -36,7 +37,9 @@ static void AdvanceAndCheck(Core::Timing& timing, u32 idx, int downcount, int ex | |||||||
|  |  | ||||||
|     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount() - |     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount() - | ||||||
|                                  cpu_downcount); // Pretend we executed X cycles of instructions. |                                  cpu_downcount); // Pretend we executed X cycles of instructions. | ||||||
|  |  | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|  |  | ||||||
|     REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); |     REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); | ||||||
|     REQUIRE(downcount == timing.GetTimer(0)->GetDowncount()); |     REQUIRE(downcount == timing.GetTimer(0)->GetDowncount()); | ||||||
| @@ -53,6 +56,7 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") { | |||||||
|  |  | ||||||
|     // Enter slice 0 |     // Enter slice 0 | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|  |  | ||||||
|     // D -> B -> C -> A -> E |     // D -> B -> C -> A -> E | ||||||
|     timing.ScheduleEvent(1000, cb_a, CB_IDS[0], 0); |     timing.ScheduleEvent(1000, cb_a, CB_IDS[0], 0); | ||||||
| @@ -106,6 +110,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { | |||||||
|  |  | ||||||
|     // Enter slice 0 |     // Enter slice 0 | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|     REQUIRE(1000 == timing.GetTimer(0)->GetDowncount()); |     REQUIRE(1000 == timing.GetTimer(0)->GetDowncount()); | ||||||
|  |  | ||||||
|     callbacks_ran_flags = 0; |     callbacks_ran_flags = 0; | ||||||
| @@ -113,6 +118,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { | |||||||
|     lateness = 0; |     lateness = 0; | ||||||
|     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); |     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|     REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); |     REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); | ||||||
|     REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); |     REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); | ||||||
| } | } | ||||||
| @@ -125,6 +131,7 @@ TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { | |||||||
|  |  | ||||||
|     // Enter slice 0 |     // Enter slice 0 | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|  |  | ||||||
|     timing.ScheduleEvent(100, cb_a, CB_IDS[0], 0); |     timing.ScheduleEvent(100, cb_a, CB_IDS[0], 0); | ||||||
|     timing.ScheduleEvent(200, cb_b, CB_IDS[1], 0); |     timing.ScheduleEvent(200, cb_b, CB_IDS[1], 0); | ||||||
| @@ -161,6 +168,7 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { | |||||||
|  |  | ||||||
|     // Enter slice 0 |     // Enter slice 0 | ||||||
|     timing.GetTimer(0)->Advance(); |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); | ||||||
|  |  | ||||||
|     timing.ScheduleEvent(800, cb_a, CB_IDS[0], 0); |     timing.ScheduleEvent(800, cb_a, CB_IDS[0], 0); | ||||||
|     timing.ScheduleEvent(1000, cb_b, CB_IDS[1], 0); |     timing.ScheduleEvent(1000, cb_b, CB_IDS[1], 0); | ||||||
| @@ -174,14 +182,16 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { | |||||||
|     REQUIRE(2 == reschedules); |     REQUIRE(2 == reschedules); | ||||||
|  |  | ||||||
|     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); |     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); | ||||||
|     timing.GetTimer(0)->Advance(); // cb_rs |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); // cb_rs | ||||||
|     REQUIRE(1 == reschedules); |     REQUIRE(1 == reschedules); | ||||||
|     REQUIRE(200 == timing.GetTimer(0)->GetDowncount()); |     REQUIRE(200 == timing.GetTimer(0)->GetDowncount()); | ||||||
|  |  | ||||||
|     AdvanceAndCheck(timing, 2, 800); // cb_c |     AdvanceAndCheck(timing, 2, 800); // cb_c | ||||||
|  |  | ||||||
|     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); |     timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); | ||||||
|     timing.GetTimer(0)->Advance(); // cb_rs |     timing.GetTimer(0)->Advance(); | ||||||
|  |     timing.GetTimer(0)->SetNextSlice(); // cb_rs | ||||||
|     REQUIRE(0 == reschedules); |     REQUIRE(0 == reschedules); | ||||||
|     REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); |     REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user