diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index b36e8785c199f4563d2d3727bafbe2ae62365149..a6de0a8c1dc64611928ce3356dce0f1577c4b8eb 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -122,6 +122,7 @@ void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); void smp_kick_mwait_play_dead(void); +void __noreturn mwait_play_dead(unsigned int eax_hint); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); @@ -172,6 +173,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) { return (struct cpumask *)cpumask_of(0); } + +static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); } #endif /* CONFIG_SMP */ #ifdef CONFIG_DEBUG_NMI_SELFTEST diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 76b1d87f1531d1328338ad315c5950fd9a77f8a3..ff09685f00ba5212243ab66c4d87e365a2edff72 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -220,6 +220,16 @@ static inline bool topology_is_primary_thread(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_primary_thread_mask); } +int topology_get_primary_thread(unsigned int cpu); + +static inline bool topology_is_core_online(unsigned int cpu) +{ + int pcpu = topology_get_primary_thread(cpu); + + return pcpu >= 0 ? cpu_online(pcpu) : false; +} +#define topology_is_core_online topology_is_core_online + #else /* CONFIG_SMP */ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a851e580689a32099f123461a16d8ab673f5d0ab..8d4c9cff0b5bbc136f7f77c0378a0dee491981ea 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * Initialize bm_flags based on the CPU cache properties @@ -204,6 +205,16 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + unsigned int cpu = smp_processor_id(); + struct cstate_entry *percpu_entry; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + mwait_play_dead(percpu_entry->states[cx->index].eax); +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_play_dead); + void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a8d1cc6d223df9fbe5b6f3aab5f72ba31f1b19e3..cd0cfd42ea8b0bbdb6cf0fabbb9f7453e7ff5d55 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -371,6 +371,19 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); } +#ifdef CONFIG_SMP +int topology_get_primary_thread(unsigned int cpu) +{ + u32 apic_id = cpuid_to_apicid[cpu]; + + /* + * Get the core domain level APIC id, which is the primary thread + * and return the CPU number assigned to it. + */ + return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN)); +} +#endif + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e1dcb241b2e10458fcc74f65c211cf1d3a746b88..c51e103bca79a272d8a49dcd5637edf11aec5855 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -942,9 +942,6 @@ static bool __init mwait_pc10_supported(void) if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) return false; - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return false; - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96a771f9f930a6aba1b77967169808bce3b3eace..1e8e6efe8ddffa8d532294ca808644b94ef944b8 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -295,3 +295,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c3ac9e6025582426d18e53a5b5a62a426b4b18c6..9b818af170ed0d1dc338bdf9d45d9246dbab3523 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1293,45 +1293,9 @@ void play_dead_common(void) * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. */ -static inline void mwait_play_dead(void) +void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) - return; - - eax = CPUID_MWAIT_LEAF; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } /* Set up state for the kexec() hack below */ md->status = CPUDEAD_MWAIT_WAIT; @@ -1352,7 +1316,7 @@ static inline void mwait_play_dead(void) mb(); __monitor(md, 0, 0); mb(); - __mwait(eax, 0); + __mwait(eax_hint, 0); if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { /* @@ -1417,9 +1381,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index d8af46e6775034a49211c07431f3d35433c5ff29..ee3c56af548ece29e423232c2cf9d83db43c16d8 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -188,7 +188,8 @@ int relocate_restore_code(void) int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -202,18 +203,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 71e25c7989762870b9871472e4d7ddb503a3e95d..5af09a6f84e44e58ecef16ae4b7172df16d0f5e5 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -41,8 +41,6 @@ static void power_saving_mwait_init(void) if (!boot_cpu_has(X86_FEATURE_MWAIT)) return; - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1e8ee97fc85f38c49182c2c574c8d254a33be79d..a7fb8a3038bdd793841c755683624b18a7ae6f25 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -155,6 +155,12 @@ bool processor_physically_present(acpi_handle handle); static inline void acpi_early_processor_control_setup(void) {} #endif +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void); +#else +static inline void acpi_idle_rescan_dead_smt_siblings(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781481f6cfd54d760f47f3ad54b405a..126643dde2d4a3c733ebee3b968f244664e860e8 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -283,6 +283,9 @@ static int __init acpi_processor_driver_init(void) NULL, acpi_soft_cpu_dead); acpi_processor_throttling_init(); + + acpi_idle_rescan_dead_smt_siblings(); + return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 86e026c49aeb31e745d735ac8c8fcc811e1a23cc..24b56b074860dee414e31af3655288fac20d0c07 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -24,6 +24,8 @@ #include #include +#include "internal.h" + /* * Include the apic definitions for x86 to have the APIC timer related defines * available also for UP (on SMP it gets magically included via linux/smp.h). @@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = { }; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void) +{ + if (cpuidle_get_driver() == &acpi_idle_driver) + arch_cpu_rescan_dead_smt_siblings(); +} + static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); @@ -588,7 +596,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) * @dev: the target CPU * @index: the index of suggested state */ -static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +static void acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); @@ -600,12 +608,11 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) raw_safe_halt(); else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); + } else if (cx->entry_method == ACPI_CSTATE_FFH) { + acpi_processor_ffh_play_dead(cx); } else - return -ENODEV; + return; } - - /* Never reached */ - return 0; } static __always_inline bool acpi_idle_fallback_to_c1(struct acpi_processor *pr) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6704d610573ad653133df994daa2778c2173e053..c075e0e1ab6ae400916ba62ae0a6e70ba6b73d1e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -69,11 +69,15 @@ int cpuidle_play_dead(void) if (!drv) return -ENODEV; - /* Find lowest-power state that supports long-term idle */ - for (i = drv->state_count - 1; i >= 0; i--) + for (i = drv->state_count - 1; i >= 0; i--) { if (drv->states[i].enter_dead) - return drv->states[i].enter_dead(dev, i); + drv->states[i].enter_dead(dev, i); + } + /* + * If :enter_dead() is successful, it will never return, so reaching + * here means that all of them failed above or were not present. + */ return -ENODEV; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 21dfe0d73ef4b0467493931771e830d1a78c36de..c576d614a8e5b033f44fb928e2022171f4a7df09 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -58,6 +58,7 @@ #include #include #include +#include #define INTEL_IDLE_VERSION "0.5.1" @@ -228,6 +229,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, return 0; } +static void intel_idle_enter_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + + mwait_play_dead(eax); +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -1727,6 +1737,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) mark_tsc_unstable("TSC halts in idle"); state->enter = intel_idle; + state->enter_dead = intel_idle_enter_dead; state->enter_s2idle = intel_idle_s2idle; } } @@ -2073,6 +2084,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) !cpuidle_state_table[cstate].enter_s2idle) break; + if (!cpuidle_state_table[cstate].enter_dead) + cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead; + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", @@ -2242,9 +2256,6 @@ static int __init intel_idle_init(void) return -ENODEV; } - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return -ENODEV; - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || @@ -2291,6 +2302,8 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: @@ -2301,7 +2314,7 @@ static int __init intel_idle_init(void) return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3f34ebb275253e281fbecaad602a0531b1e6e55d..034edf4178be2f90751b4e151d963188572a3021 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -280,6 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg); void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate); +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx); #else static inline void acpi_processor_power_init_bm_check(struct acpi_processor_flags @@ -300,6 +301,10 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx { return; } +static inline void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + BUG(); +} #endif static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8e596dc11a0ff5ee80d57b92caf560790d17e52e..afbaf476237143e2f27b0670b34f6518975b5c5f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -115,6 +115,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int setup_max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -129,6 +130,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4238a2b88349ae4bf8c492475460f..a9ee4fe55dcfcca8200cc69b9651f91ca1b6b696 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -61,7 +61,7 @@ struct cpuidle_state { struct cpuidle_driver *drv, int index); - int (*enter_dead) (struct cpuidle_device *dev, int index); + void (*enter_dead) (struct cpuidle_device *dev, int index); /* * CPUs execute ->enter_s2idle with the local tick or entire timekeeping diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 80a3e6acf31e4b962f7b79fb10d98192e08013f6..57cb18636e34c13d7df3e840e37e2ecfa1d0a71e 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -12,6 +12,7 @@ NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__ubsan_handle_builtin_unreachable) NORETURN(arch_call_rest_init) +NORETURN(acpi_processor_ffh_play_dead) NORETURN(arch_cpu_idle_dead) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) @@ -28,6 +29,7 @@ NORETURN(kunit_try_catch_throw) NORETURN(machine_real_restart) NORETURN(make_task_dead) NORETURN(mpt_halt_firmware) +NORETURN(mwait_play_dead) NORETURN(nmi_panic_self_stop) NORETURN(panic) NORETURN(panic_smp_self_stop)