Merge pull request #10935 from Morph1984/mwaitx
x64: Make use of monitorx instructions for power efficient sleeps (AMD)
This commit is contained in:
		| @@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { | ||||
|     add_field("CPU_Extension_x64_GFNI", caps.gfni); | ||||
|     add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); | ||||
|     add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); | ||||
|     add_field("CPU_Extension_x64_MONITORX", caps.monitorx); | ||||
|     add_field("CPU_Extension_x64_MOVBE", caps.movbe); | ||||
|     add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); | ||||
|     add_field("CPU_Extension_x64_POPCNT", caps.popcnt); | ||||
|   | ||||
| @@ -168,6 +168,7 @@ static CPUCaps Detect() { | ||||
|         __cpuid(cpu_id, 0x80000001); | ||||
|         caps.lzcnt = Common::Bit<5>(cpu_id[2]); | ||||
|         caps.fma4 = Common::Bit<16>(cpu_id[2]); | ||||
|         caps.monitorx = Common::Bit<29>(cpu_id[2]); | ||||
|     } | ||||
|  | ||||
|     if (max_ex_fn >= 0x80000007) { | ||||
|   | ||||
| @@ -63,6 +63,7 @@ struct CPUCaps { | ||||
|     bool gfni : 1; | ||||
|     bool invariant_tsc : 1; | ||||
|     bool lzcnt : 1; | ||||
|     bool monitorx : 1; | ||||
|     bool movbe : 1; | ||||
|     bool pclmulqdq : 1; | ||||
|     bool popcnt : 1; | ||||
|   | ||||
| @@ -13,36 +13,60 @@ | ||||
|  | ||||
| namespace Common::X64 { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||||
| // For reference: | ||||
| // At 1 GHz, 100K cycles is 100us | ||||
| // At 2 GHz, 100K cycles is 50us | ||||
| // At 4 GHz, 100K cycles is 25us | ||||
| constexpr auto PauseCycles = 100'000U; | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| #ifdef _MSC_VER | ||||
| __forceinline static void TPAUSE() { | ||||
|     // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||||
|     // For reference: | ||||
|     // At 1 GHz, 100K cycles is 100us | ||||
|     // At 2 GHz, 100K cycles is 50us | ||||
|     // At 4 GHz, 100K cycles is 25us | ||||
|     static constexpr auto PauseCycles = 100'000; | ||||
|     _tpause(0, FencedRDTSC() + PauseCycles); | ||||
|     static constexpr auto RequestC02State = 0U; | ||||
|     _tpause(RequestC02State, FencedRDTSC() + PauseCycles); | ||||
| } | ||||
|  | ||||
| __forceinline static void MWAITX() { | ||||
|     static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||||
|     static constexpr auto RequestC1State = 0U; | ||||
|  | ||||
|     // monitor_var should be aligned to a cache line. | ||||
|     alignas(64) u64 monitor_var{}; | ||||
|     _mm_monitorx(&monitor_var, 0, 0); | ||||
|     _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); | ||||
| } | ||||
| #else | ||||
| static void TPAUSE() { | ||||
|     // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||||
|     // For reference: | ||||
|     // At 1 GHz, 100K cycles is 100us | ||||
|     // At 2 GHz, 100K cycles is 50us | ||||
|     // At 4 GHz, 100K cycles is 25us | ||||
|     static constexpr auto PauseCycles = 100'000; | ||||
|     static constexpr auto RequestC02State = 0U; | ||||
|     const auto tsc = FencedRDTSC() + PauseCycles; | ||||
|     const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | ||||
|     const auto edx = static_cast<u32>(tsc >> 32); | ||||
|     asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); | ||||
|     asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); | ||||
| } | ||||
|  | ||||
| static void MWAITX() { | ||||
|     static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||||
|     static constexpr auto RequestC1State = 0U; | ||||
|  | ||||
|     // monitor_var should be aligned to a cache line. | ||||
|     alignas(64) u64 monitor_var{}; | ||||
|     asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||||
|     asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| void MicroSleep() { | ||||
|     static const bool has_waitpkg = GetCPUCaps().waitpkg; | ||||
|     static const bool has_monitorx = GetCPUCaps().monitorx; | ||||
|  | ||||
|     if (has_waitpkg) { | ||||
|         TPAUSE(); | ||||
|     } else if (has_monitorx) { | ||||
|         MWAITX(); | ||||
|     } else { | ||||
|         std::this_thread::yield(); | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user