From df7da8988791a121ba564a423dcd4f2be0db23e3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 13 Dec 2024 12:50:30 -0800 Subject: [PATCH 01/16] x86/cpu: Remove unnecessary MwAIT leaf checks commit 262fba55708b60a063b30d103963477dc5026f8c upstream. The CPUID leaf dependency checker will remove X86_FEATURE_MWAIT if the CPUID level is below the required level (CPUID_MWAIT_LEAF). Thus, if you check X86_FEATURE_MWAIT you do not need to also check the CPUID level. Intel-SIG: commit 262fba55708b x86/cpu: Remove unnecessary MwAIT leaf checks Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20241213205030.9B42B458%40davehans-spike.ostc.intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/hpet.c | 3 --- arch/x86/kernel/smpboot.c | 2 -- drivers/acpi/acpi_pad.c | 2 -- drivers/idle/intel_idle.c | 3 --- 4 files changed, 10 deletions(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e1dcb241b2e1..c51e103bca79 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -942,9 +942,6 @@ static bool __init mwait_pc10_supported(void) if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) return false; - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return false; - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c3ac9e602558..88100d16d637 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1308,8 +1308,6 @@ static inline void mwait_play_dead(void) return; if (!this_cpu_has(X86_FEATURE_CLFLUSH)) return; - if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) - return; eax = CPUID_MWAIT_LEAF; ecx = 0; diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 71e25c798976..5af09a6f84e4 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -41,8 +41,6 @@ static void power_saving_mwait_init(void) if (!boot_cpu_has(X86_FEATURE_MWAIT)) return; - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 21dfe0d73ef4..60e1f0afffbc 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2242,9 +2242,6 @@ static int __init intel_idle_init(void) return -ENODEV; } - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return -ENODEV; - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || -- Gitee From e1be53553b81d3d11e86cb2c293d930c04026f26 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Nov 2024 21:58:31 +0100 Subject: [PATCH 02/16] cpuidle: Do not return from cpuidle_play_dead() on callback failures commit f65ee094eda6e5897172a1276ffee88cf5489928 upstream. If the :enter_dead() idle state callback fails for a certain state, there may be still a shallower state for which it will work. Because the only caller of cpuidle_play_dead(), native_play_dead(), falls back to hlt_play_dead() if it returns an error, it should better try all of the idle states for which :enter_dead() is present before failing, so change it accordingly. Also notice that the :enter_dead() state callback is not expected to return on success (the CPU should be "dead" then), so make cpuidle_play_dead() ignore its return value. Intel-SIG: commit f65ee094eda6 cpuidle: Do not return from cpuidle_play_dead() on callback failures Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello Tested-by: Mario Limonciello # 6.12-rc7 Reviewed-by: Gautham R. Shenoy Link: https://patch.msgid.link/3318440.aeNJFYEL58@rjwysocki.net [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/cpuidle/cpuidle.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6704d610573a..c075e0e1ab6a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -69,11 +69,15 @@ int cpuidle_play_dead(void) if (!drv) return -ENODEV; - /* Find lowest-power state that supports long-term idle */ - for (i = drv->state_count - 1; i >= 0; i--) + for (i = drv->state_count - 1; i >= 0; i--) { if (drv->states[i].enter_dead) - return drv->states[i].enter_dead(dev, i); + drv->states[i].enter_dead(dev, i); + } + /* + * If :enter_dead() is successful, it will never return, so reaching + * here means that all of them failed above or were not present. + */ return -ENODEV; } -- Gitee From 655d52019a7b30e93a1c5a924303ac784a5cb745 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Nov 2024 22:00:25 +0100 Subject: [PATCH 03/16] cpuidle: Change :enter_dead() driver callback return type to void commit 9cf9f2e70bea4e66a2c8b8c4743489beb21258a8 upstream. After a previous change, cpuidle_play_dead(), which is the only caller of idle state :enter_dead() callbacks, ignores their return values, so they may as well be void. Intel-SIG: commit 9cf9f2e70bea cpuidle: Change :enter_dead() driver callback return type to void Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/2285569.iZASKD2KPV@rjwysocki.net [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/acpi/processor_idle.c | 7 ++----- include/linux/cpuidle.h | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 86e026c49aeb..4ba9fe2400a1 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -588,7 +588,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) * @dev: the target CPU * @index: the index of suggested state */ -static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +static void acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); @@ -601,11 +601,8 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); } else - return -ENODEV; + return; } - - /* Never reached */ - return 0; } static __always_inline bool acpi_idle_fallback_to_c1(struct acpi_processor *pr) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4..a9ee4fe55dcf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -61,7 +61,7 @@ struct cpuidle_state { struct cpuidle_driver *drv, int index); - int (*enter_dead) (struct cpuidle_device *dev, int index); + void (*enter_dead) (struct cpuidle_device *dev, int index); /* * CPUs execute ->enter_s2idle with the local tick or entire timekeeping -- Gitee From 84f6452ed2bbf8f8417d89bb3fa9df0c9318fe0c Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 5 Feb 2025 17:52:08 +0200 Subject: [PATCH 04/16] x86/smp: Allow calling mwait_play_dead with an arbitrary hint commit a7dd183f0b3848c056bbeed78ef5d5c52fe94d83 upstream. Introduce a helper function to allow offlined CPUs to enter idle states with a specific MWAIT hint. The new helper will be used in subsequent patches by the acpi_idle and intel_idle drivers. No functional change intended. Intel-SIG: commit a7dd183f0b38 x86/smp: Allow calling mwait_play_dead with an arbitrary hint Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Patryk Wlazlyn Signed-off-by: Artem Bityutskiy Signed-off-by: Dave Hansen Reviewed-by: Gautham R. Shenoy Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20250205155211.329780-2-artem.bityutskiy%40linux.intel.com [ Zhang Rui: resolve conflict (use CPUID_MWAIT_LEAF) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/include/asm/smp.h | 3 ++ arch/x86/kernel/smpboot.c | 88 ++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index b36e8785c199..3d201d78455b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -122,6 +122,7 @@ void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); void smp_kick_mwait_play_dead(void); +void mwait_play_dead(unsigned int eax_hint); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); @@ -172,6 +173,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) { return (struct cpumask *)cpumask_of(0); } + +static inline void mwait_play_dead(unsigned int eax_hint) { } #endif /* CONFIG_SMP */ #ifdef CONFIG_DEBUG_NMI_SELFTEST diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 88100d16d637..6cb3e821d864 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1289,47 +1289,9 @@ void play_dead_common(void) local_irq_disable(); } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead(void) +void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_MWAIT_LEAF; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } /* Set up state for the kexec() hack below */ md->status = CPUDEAD_MWAIT_WAIT; @@ -1350,7 +1312,7 @@ static inline void mwait_play_dead(void) mb(); __monitor(md, 0, 0); mb(); - __mwait(eax, 0); + __mwait(eax_hint, 0); if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { /* @@ -1372,6 +1334,50 @@ static inline void mwait_play_dead(void) } } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead_cpuid_hint(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return; + if (!this_cpu_has(X86_FEATURE_MWAIT)) + return; + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) + return; + + eax = CPUID_MWAIT_LEAF; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + mwait_play_dead(eax); +} + /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1415,7 +1421,7 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead(); + mwait_play_dead_cpuid_hint(); if (cpuidle_play_dead()) hlt_play_dead(); } -- Gitee From b0c1428720df6b37e180395129a5f76d72d07091 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 5 Feb 2025 17:52:09 +0200 Subject: [PATCH 05/16] ACPI/processor_idle: Add FFH state handling commit 541ddf31e30022b8e6f44b3a943964e8f0989d15 upstream. Recent Intel platforms will depend on the idle driver to pass the correct hint for playing dead via mwait_play_dead_with_hint(). Expand the existing enter_dead interface with handling for FFH states and pass the MWAIT hint to the mwait_play_dead code. Intel-SIG: commit 541ddf31e300 ACPI/processor_idle: Add FFH state handling Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Suggested-by: Gautham R. Shenoy Signed-off-by: Patryk Wlazlyn Signed-off-by: Artem Bityutskiy Signed-off-by: Dave Hansen Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20250205155211.329780-3-artem.bityutskiy%40linux.intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/acpi/cstate.c | 10 ++++++++++ drivers/acpi/processor_idle.c | 2 ++ include/acpi/processor.h | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a851e580689a..b047da0eb842 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * Initialize bm_flags based on the CPU cache properties @@ -204,6 +205,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); +void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + unsigned int cpu = smp_processor_id(); + struct cstate_entry *percpu_entry; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + mwait_play_dead(percpu_entry->states[cx->index].eax); +} + void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4ba9fe2400a1..bd088cff4917 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -600,6 +600,8 @@ static void acpi_idle_play_dead(struct cpuidle_device *dev, int index) raw_safe_halt(); else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); + } else if (cx->entry_method == ACPI_CSTATE_FFH) { + acpi_processor_ffh_play_dead(cx); } else return; } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3f34ebb27525..ce5ab2e20a99 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -280,6 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg); void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate); +void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx); #else static inline void acpi_processor_power_init_bm_check(struct acpi_processor_flags @@ -300,6 +301,10 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx { return; } +static inline void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + return; +} #endif static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, -- Gitee From 85f58e80f572c3c0dae4a9c7eeb50c785f08bb2e Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 5 Feb 2025 17:52:10 +0200 Subject: [PATCH 06/16] intel_idle: Provide the default enter_dead() handler commit fc4ca9537bc4e3141ba7e058700369ea242703df upstream. Recent Intel platforms require idle driver to provide information about the MWAIT hint used to enter the deepest idle state in the play_dead code. Provide the default enter_dead() handler for all of the platforms and allow overwriting with a custom handler for each platform if needed. Intel-SIG: commit fc4ca9537bc4 intel_idle: Provide the default enter_dead() handler Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Patryk Wlazlyn Signed-off-by: Artem Bityutskiy Signed-off-by: Dave Hansen Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20250205155211.329780-4-artem.bityutskiy%40linux.intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/idle/intel_idle.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 60e1f0afffbc..1b3c6e6ee843 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -58,6 +58,7 @@ #include #include #include +#include #define INTEL_IDLE_VERSION "0.5.1" @@ -228,6 +229,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, return 0; } +static void intel_idle_enter_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + + mwait_play_dead(eax); +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -1727,6 +1737,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) mark_tsc_unstable("TSC halts in idle"); state->enter = intel_idle; + state->enter_dead = intel_idle_enter_dead; state->enter_s2idle = intel_idle_s2idle; } } @@ -2073,6 +2084,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) !cpuidle_state_table[cstate].enter_s2idle) break; + if (!cpuidle_state_table[cstate].enter_dead) + cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead; + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", -- Gitee From f9f9a2c9216cacf9fe1910a22259e11ce6feae0e Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 5 Feb 2025 17:52:11 +0200 Subject: [PATCH 07/16] x86/smp: Eliminate mwait_play_dead_cpuid_hint() commit 96040f7273e2bc0be1871ad9ed4da7b504da9410 upstream. Currently, mwait_play_dead_cpuid_hint() looks up the MWAIT hint of the deepest idle state by inspecting CPUID leaf 0x5 with the assumption that, if the number of sub-states for a given major C-state is nonzero, those sub-states are always represented by consecutive numbers starting from 0. This assumption is not based on the documented platform behavior and in fact it is not met on recent Intel platforms. For example, Intel's Sierra Forest report two C-states with two substates each in cpuid leaf 0x5: Name* target cstate target subcstate (mwait hint) =========================================================== C1 0x00 0x00 C1E 0x00 0x01 -- 0x10 ---- C6S 0x20 0x22 C6P 0x20 0x23 -- 0x30 ---- /* No more (sub)states all the way down to the end. */ =========================================================== * Names of the cstates are not included in the CPUID leaf 0x5, they are taken from the product specific documentation. Notice that hints 0x20 and 0x21 are not defined for C-state 0x20 (C6), so the existing MWAIT hint lookup in mwait_play_dead_cpuid_hint() based on the CPUID leaf 0x5 contents does not work in this case. Instead of using MWAIT hint lookup that is not guaranteed to work, make native_play_dead() rely on the idle driver for the given platform to put CPUs going offline into appropriate idle state and, if that fails, fall back to hlt_play_dead(). Accordingly, drop mwait_play_dead_cpuid_hint() altogether and make native_play_dead() call cpuidle_play_dead() instead of it unconditionally with the assumption that it will not return if it is successful. Still, in case cpuidle_play_dead() fails, call hlt_play_dead() at the end. Intel-SIG: commit 96040f7273e2 x86/smp: Eliminate mwait_play_dead_cpuid_hint() Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Patryk Wlazlyn Signed-off-by: Artem Bityutskiy Signed-off-by: Dave Hansen Reviewed-by: Gautham R. Shenoy Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20250205155211.329780-5-artem.bityutskiy%40linux.intel.com [ Zhang Rui: resolve conflict (use CPUID_MWAIT_LEAF) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/smpboot.c | 54 +++++---------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6cb3e821d864..9b818af170ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1289,6 +1289,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1334,50 +1338,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_MWAIT_LEAF; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1421,9 +1381,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ -- Gitee From 79019ed508e1e395fc55b3397e20b62590a9faeb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 16 Feb 2025 14:26:14 +0200 Subject: [PATCH 08/16] ACPI/processor_idle: Export acpi_processor_ffh_play_dead() commit 64aad4749d7911f8c5e69d93a929a269605dd3cb upstream. The kernel test robot reported the following build error: >> ERROR: modpost: "acpi_processor_ffh_play_dead" [drivers/acpi/processor.ko] undefined! Caused by this recently merged commit: 541ddf31e300 ("ACPI/processor_idle: Add FFH state handling") The build failure is due to an oversight in the 'CONFIG_ACPI_PROCESSOR=m' case, the function export is missing. Add it. Intel-SIG: commit 64aad4749d79 ACPI/processor_idle: Export acpi_processor_ffh_play_dead() Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502151207.FA9UO1iX-lkp@intel.com/ Fixes: 541ddf31e300 ("ACPI/processor_idle: Add FFH state handling") Signed-off-by: Artem Bityutskiy Signed-off-by: Ingo Molnar Cc: Dave Hansen Link: https://lore.kernel.org/r/de5bf4f116779efde315782a15146fdc77a4a044.camel@linux.intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/acpi/cstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index b047da0eb842..ea45b2d5454c 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -213,6 +213,7 @@ void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); mwait_play_dead(percpu_entry->states[cx->index].eax); } +EXPORT_SYMBOL_GPL(acpi_processor_ffh_play_dead); void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { -- Gitee From 130d11e9065cd337a5429192ae2201aa60b9ae34 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 2 Mar 2025 16:48:51 -0800 Subject: [PATCH 09/16] x86/smp: Fix mwait_play_dead() and acpi_processor_ffh_play_dead() noreturn behavior commit 4e32645cd8f97a308300623f81c902747df6b97b upstream. Fix some related issues (done in a single patch to avoid introducing intermediate bisect warnings): 1) The SMP version of mwait_play_dead() doesn't return, but its !SMP counterpart does. Make its calling behavior consistent by resolving the !SMP version to a BUG(). It should never be called anyway, this just enforces that at runtime and enables its callers to be marked as __noreturn. 2) While the SMP definition of mwait_play_dead() is annotated as __noreturn, the declaration isn't. Nor is it listed in tools/objtool/noreturns.h. Fix that. 3) Similar to #1, the SMP version of acpi_processor_ffh_play_dead() doesn't return but its !SMP counterpart does. Make the !SMP version a BUG(). It should never be called. 4) acpi_processor_ffh_play_dead() doesn't return, but is lacking any __noreturn annotations. Fix that. This fixes the following objtool warnings: vmlinux.o: warning: objtool: acpi_processor_ffh_play_dead+0x67: mwait_play_dead() is missing a __noreturn annotation vmlinux.o: warning: objtool: acpi_idle_play_dead+0x3c: acpi_processor_ffh_play_dead() is missing a __noreturn annotation Intel-SIG: commit 4e32645cd8f9 x86/smp: Fix mwait_play_dead() and acpi_processor_ffh_play_dead() noreturn behavior Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Fixes: a7dd183f0b38 ("x86/smp: Allow calling mwait_play_dead with an arbitrary hint") Fixes: 541ddf31e300 ("ACPI/processor_idle: Add FFH state handling") Reported-by: Paul E. McKenney Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/e885c6fa9e96a61471b33e48c2162d28b15b14c5.1740962711.git.jpoimboe@kernel.org [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/include/asm/smp.h | 4 ++-- arch/x86/kernel/acpi/cstate.c | 2 +- include/acpi/processor.h | 6 +++--- tools/objtool/noreturns.h | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 3d201d78455b..a6de0a8c1dc6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -122,7 +122,7 @@ void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); void smp_kick_mwait_play_dead(void); -void mwait_play_dead(unsigned int eax_hint); +void __noreturn mwait_play_dead(unsigned int eax_hint); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); @@ -174,7 +174,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) return (struct cpumask *)cpumask_of(0); } -static inline void mwait_play_dead(unsigned int eax_hint) { } +static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); } #endif /* CONFIG_SMP */ #ifdef CONFIG_DEBUG_NMI_SELFTEST diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index ea45b2d5454c..8d4c9cff0b5b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -205,7 +205,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); -void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); struct cstate_entry *percpu_entry; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ce5ab2e20a99..034edf4178be 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -280,7 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg); void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate); -void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx); +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx); #else static inline void acpi_processor_power_init_bm_check(struct acpi_processor_flags @@ -301,9 +301,9 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx { return; } -static inline void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +static inline void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) { - return; + BUG(); } #endif diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 80a3e6acf31e..57cb18636e34 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -12,6 +12,7 @@ NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__ubsan_handle_builtin_unreachable) NORETURN(arch_call_rest_init) +NORETURN(acpi_processor_ffh_play_dead) NORETURN(arch_cpu_idle_dead) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) @@ -28,6 +29,7 @@ NORETURN(kunit_try_catch_throw) NORETURN(machine_real_restart) NORETURN(make_task_dead) NORETURN(mpt_halt_firmware) +NORETURN(mwait_play_dead) NORETURN(nmi_panic_self_stop) NORETURN(panic) NORETURN(panic_smp_self_stop) -- Gitee From 5aeee8a32c9dad689828051d7acb161225500a35 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 May 2025 15:40:43 +0200 Subject: [PATCH 10/16] Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" commit 70523f335734b0b42f97647556d331edf684c7dc upstream. Revert commit 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") because it introduced a significant power regression on systems that start with "nosmt" in the kernel command line. Namely, on such systems, SMT siblings permanently go offline early, when cpuidle has not been initialized yet, so after the above commit, hlt_play_dead() is called for them. Later on, when the processor attempts to enter a deep package C-state, including PC10 which is requisite for reaching minimum power in suspend-to-idle, it is not able to do that because of the SMT siblings staying in C1 (which they have been put into by HLT). As a result, the idle power (including power in suspend-to-idle) rises quite dramatically on those systems with all of the possible consequences, which (needless to say) may not be expected by their users. This issue is hard to debug and potentially dangerous, so it needs to be addressed as soon as possible in a way that will work for 6.15.y, hence the revert. Of course, after this revert, the issue that commit 96040f7273e2 attempted to address will be back and it will need to be fixed again later. Intel-SIG: commit 70523f335734 Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Fixes: 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") Reported-by: Todd Brandt Tested-by: Todd Brandt Cc: 6.15+ # 6.15+ Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Link: https://patch.msgid.link/12674167.O9o76ZdvQC@rjwysocki.net [ Zhang Rui: resolve conflict (use CPUID_MWAIT_LEAF) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/smpboot.c | 54 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9b818af170ed..6cb3e821d864 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1289,10 +1289,6 @@ void play_dead_common(void) local_irq_disable(); } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1338,6 +1334,50 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead_cpuid_hint(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return; + if (!this_cpu_has(X86_FEATURE_MWAIT)) + return; + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) + return; + + eax = CPUID_MWAIT_LEAF; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + mwait_play_dead(eax); +} + /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1381,9 +1421,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - /* Below returns only on error. */ - cpuidle_play_dead(); - hlt_play_dead(); + mwait_play_dead_cpuid_hint(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ -- Gitee From c0f33044cb406cd381248f385cdba8369b6fe1ed Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:04:11 +0200 Subject: [PATCH 11/16] intel_idle: Use subsys_initcall_sync() for initialization commit c0f691388992c708436ab5f6e810865be6ddf5c6 upstream. It is not necessary to wait until the device_initcall() stage with intel_idle initialization. All of its dependencies are met after all subsys_initcall()s have run, so subsys_initcall_sync() can be used for initializing it. It is also better to ensure that intel_idle will always initialize before the ACPI processor driver that uses module_init() for its initialization. Intel-SIG: commit c0f691388992 intel_idle: Use subsys_initcall_sync() for initialization Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2994397.e9J7NaK4W3@rjwysocki.net [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1b3c6e6ee843..4dbec64cc600 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2312,7 +2312,7 @@ static int __init intel_idle_init(void) return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also -- Gitee From 46386c190f662f5858ce61de7990f8a1ea26b408 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 7 Jun 2025 14:22:56 +0200 Subject: [PATCH 12/16] x86/smp: PM/hibernate: Split arch_resume_nosmt() commit 4c529a4a7260776bb4abe264498857b4537aa70d upstream. Move the inner part of the arch_resume_nosmt() code into a separate function called arch_cpu_rescan_dead_smt_siblings(), so it can be used in other places where "dead" SMT siblings may need to be taken online and offline again in order to get into deep idle states. No intentional functional impact. Intel-SIG: commit 4c529a4a7260 x86/smp: PM/hibernate: Split arch_resume_nosmt() Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/3361688.44csPzL39Z@rjwysocki.net [ rjw: Prevent build issues with CONFIG_SMP unset ] Signed-off-by: Rafael J. Wysocki [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/smp.c | 24 ++++++++++++++++++++++++ arch/x86/power/hibernate.c | 19 ++++++------------- include/linux/cpu.h | 3 +++ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96a771f9f930..1e8e6efe8ddf 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -295,3 +295,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index d8af46e67750..ee3c56af548e 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -188,7 +188,8 @@ int relocate_restore_code(void) int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -202,18 +203,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8e596dc11a0f..afbaf4762371 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -115,6 +115,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int setup_max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -129,6 +130,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; -- Gitee From eab8d8529f1057d480653e4e367ae42386ccb858 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:06:08 +0200 Subject: [PATCH 13/16] intel_idle: Rescan "dead" SMT siblings during initialization commit a430c11f401589a0f4f57fd398271a5d85142c7a upstream. Make intel_idle_init() call arch_cpu_rescan_dead_smt_siblings() after successfully registering intel_idle as the cpuidle driver so as to allow the "dead" SMT siblings (if any) to go into deep idle states. This is necessary for the processor to be able to reach deep package C-states (like PC10) going forward which is requisite for reducing power sufficiently in suspend-to-idle, among other things. Intel-SIG: commit a430c11f4015 intel_idle: Rescan "dead" SMT siblings during initialization Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/10669885.nUPlyArG6x@rjwysocki.net [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/idle/intel_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 4dbec64cc600..c576d614a8e5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2302,6 +2302,8 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: -- Gitee From 8feaa879a0fd286d91202785861da83fa2429dd3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:07:31 +0200 Subject: [PATCH 14/16] ACPI: processor: Rescan "dead" SMT siblings during initialization commit f694481b1d3177144fcac4242eb750cfcb9f7bd5 upstream. Make acpi_processor_driver_init() call arch_cpu_rescan_dead_smt_siblings(), via a new wrapper function called acpi_idle_rescan_dead_smt_siblings(), after successfully initializing the driver, to allow the "dead" SMT siblings to go into deep idle states, which is necessary for the processor to be able to reach deep package C-states (like PC10) going forward, so that power can be reduced sufficiently in suspend-to-idle, among other things. However, do it only if the ACPI idle driver is the current cpuidle driver (otherwise it is assumed that another cpuidle driver will take care of this) and avoid doing it on architectures other than x86. Intel-SIG: commit f694481b1d31 ACPI: processor: Rescan "dead" SMT siblings during initialization Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2005721.PYKUYFuaPT@rjwysocki.net [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/acpi/internal.h | 6 ++++++ drivers/acpi/processor_driver.c | 3 +++ drivers/acpi/processor_idle.c | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1e8ee97fc85f..a7fb8a3038bd 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -155,6 +155,12 @@ bool processor_physically_present(acpi_handle handle); static inline void acpi_early_processor_control_setup(void) {} #endif +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void); +#else +static inline void acpi_idle_rescan_dead_smt_siblings(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781..126643dde2d4 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -283,6 +283,9 @@ static int __init acpi_processor_driver_init(void) NULL, acpi_soft_cpu_dead); acpi_processor_throttling_init(); + + acpi_idle_rescan_dead_smt_siblings(); + return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index bd088cff4917..24b56b074860 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -24,6 +24,8 @@ #include #include +#include "internal.h" + /* * Include the apic definitions for x86 to have the APIC timer related defines * available also for UP (on SMP it gets magically included via linux/smp.h). @@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = { }; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void) +{ + if (cpuidle_get_driver() == &acpi_idle_driver) + arch_cpu_rescan_dead_smt_siblings(); +} + static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); -- Gitee From 161b5b02d296983f0ae6922ba3d634f167a5414e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:09:35 +0200 Subject: [PATCH 15/16] Reapply "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" commit a18d098f2aab82abde1d59c56aef9beca01c892b upstream. Revert commit 70523f335734 ("Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()"") to reapply the changes from commit 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") reverted by it. Previously, these changes caused idle power to rise on systems booting with "nosmt" in the kernel command line because they effectively caused "dead" SMT siblings to remain in idle state C1 after executing the HLT instruction, which prevented the processor from reaching package idle states deeper than PC2 going forward. Now, the "dead" SMT siblings are rescanned after initializing a proper cpuidle driver for the processor (either intel_idle or ACPI idle), at which point they are able to enter a sufficiently deep idle state in native_play_dead() via cpuidle, so the code changes in question can be reapplied. Intel-SIG: commit a18d098f2aab Reapply "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/7813065.EvYhyI6sBW@rjwysocki.net [ Zhang Rui: resolve conflict (use CPUID_MWAIT_LEAF) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/smpboot.c | 54 +++++---------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6cb3e821d864..9b818af170ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1289,6 +1289,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1334,50 +1338,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_MWAIT_LEAF; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1421,9 +1381,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ -- Gitee From dff95b9d7bb26fae86cfd2b8f9f620ec7d136f33 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 21 Sep 2025 10:56:40 +0200 Subject: [PATCH 16/16] x86/topology: Implement topology_is_core_online() to address SMT regression commit 2066f00e5b2dc061fb6d8c88fadaebc97f11feaa upstream. Christian reported that commit a430c11f4015 ("intel_idle: Rescan "dead" SMT siblings during initialization") broke the use case in which both 'nosmt' and 'maxcpus' are on the kernel command line because it onlines primary threads, which were offline due to the maxcpus limit. The initially proposed fix to skip primary threads in the loop is inconsistent. While it prevents the primary thread to be onlined, it then onlines the corresponding hyperthread(s), which does not really make sense. The CPU iterator in cpuhp_smt_enable() contains a check which excludes all threads of a core, when the primary thread is offline. The default implementation is a NOOP and therefore not effective on x86. Implement topology_is_core_online() on x86 to address this issue. This makes the behaviour consistent between x86 and PowerPC. Intel-SIG: commit 2066f00e5b2d x86/topology: Implement topology_is_core_online() to address SMT regression Fix the incorrect power/idle state of the offlined cpus. For GRR/SRF/CWF. Fixes: a430c11f4015 ("intel_idle: Rescan "dead" SMT siblings during initialization") Fixes: f694481b1d31 ("ACPI: processor: Rescan "dead" SMT siblings during initialization") Closes: https://lore.kernel.org/linux-pm/724616a2-6374-4ba3-8ce3-ea9c45e2ae3b@arm.com/ Reported-by: Christian Loehle Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Rafael J. Wysocki (Intel) Tested-by: Christian Loehle Cc: stable@vger.kernel.org Link: https://lore.kernel.org/12740505.O9o76ZdvQC@rafael.j.wysocki [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/include/asm/topology.h | 10 ++++++++++ arch/x86/kernel/cpu/topology.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 76b1d87f1531..ff09685f00ba 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -220,6 +220,16 @@ static inline bool topology_is_primary_thread(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_primary_thread_mask); } +int topology_get_primary_thread(unsigned int cpu); + +static inline bool topology_is_core_online(unsigned int cpu) +{ + int pcpu = topology_get_primary_thread(cpu); + + return pcpu >= 0 ? cpu_online(pcpu) : false; +} +#define topology_is_core_online topology_is_core_online + #else /* CONFIG_SMP */ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a8d1cc6d223d..cd0cfd42ea8b 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -371,6 +371,19 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); } +#ifdef CONFIG_SMP +int topology_get_primary_thread(unsigned int cpu) +{ + u32 apic_id = cpuid_to_apicid[cpu]; + + /* + * Get the core domain level APIC id, which is the primary thread + * and return the CPU number assigned to it. + */ + return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN)); +} +#endif + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot -- Gitee