From d86f4b0a137a579558b7d7c5aee57860221fa31a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Dec 2024 16:12:02 +0000 Subject: [PATCH 01/22] x86/amd_nb, hwmon: (k10temp): Simplify amd_pci_dev_to_node_id() ANBZ: #28102 commit 7dd57db495d49c004fffc77265ffbaccf340aa20 upstream. amd_pci_dev_to_node_id() tries to find the AMD node ID of a device by searching and counting devices. The AMD node ID of an AMD node device is simply its slot number minus the AMD node 0 slot number. Simplify this function and move it to k10temp.c. [ Yazen: Update commit message and simplify function. ] Signed-off-by: Mario Limonciello Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20241206161210.163701-10-yazen.ghannam@amd.com Signed-off-by: Prithivish Shivdasani --- arch/x86/include/asm/amd_nb.h | 17 ----------------- drivers/hwmon/k10temp.c | 5 +++++ 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 288101025fb4..7611b0a82f97 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -54,23 +54,6 @@ bool hygon_f18h_m10h(void); u16 hygon_nb_num(void); int get_df_id(struct pci_dev *misc, u8 *id); -static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) -{ - struct pci_dev *misc; - int i; - - for (i = 0; i != amd_nb_num(); i++) { - misc = node_to_amd_nb(i)->misc; - - if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && - PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) - return i; - } - - WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); - return 0; -} - static inline bool amd_gart_present(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index e9f00ece639e..4292befc6944 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -165,6 +165,11 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval); } +static u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) +{ + return PCI_SLOT(pdev->devfn) - AMD_NODE0_PCI_SLOT; +} + static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval) { if (amd_smn_read(amd_pci_dev_to_node_id(pdev), -- Gitee From ca3c6867b4f56459507b6d88d832716eda4c6b42 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 24 Jun 2025 14:15:57 +0000 Subject: [PATCH 02/22] x86/mce: Ensure user polling settings are honored when restarting timer ANBZ: #28102 commit 00c092de6f28ebd32208aef83b02d61af2229b60 upstream Users can disable MCA polling by setting the "ignore_ce" parameter or by setting "check_interval=0". This tells the kernel to *not* start the MCE timer on a CPU. If the user did not disable CMCI, then storms can occur. When these happen, the MCE timer will be started with a fixed interval. After the storm subsides, the timer's next interval is set to check_interval. This disregards the user's input through "ignore_ce" and "check_interval". Furthermore, if "check_interval=0", then the new timer will run faster than expected. Create a new helper to check these conditions and use it when a CMCI storm ends. [ bp: Massage. ] Fixes: 7eae17c4add5 ("x86/mce: Add per-bank CMCI storm mitigation") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-2-236dd74f645f@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2924c1d8d809..e4c98229b054 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1704,6 +1704,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1727,7 +1732,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2112,11 +2117,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) -- Gitee From 9695ee5c69baa787de68d1b2f0b941b38272f31e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 24 Jun 2025 14:16:00 +0000 Subject: [PATCH 03/22] x86/mce/amd: Rename threshold restart function ANBZ: #28102 commit 9af8b441cf6953f683b825fbf241a979ea7521e8 upstream It operates per block rather than per bank. So rename it for clarity. No functional changes. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-5-236dd74f645f@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 79b8a3e14aad..d301cd9c7ebc 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -427,8 +427,8 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; -/* Reprogram MCx_MISC MSR behind this threshold bank. */ -static void threshold_restart_bank(void *_tr) +/* Reprogram MCx_MISC MSR behind this threshold block. */ +static void threshold_restart_block(void *_tr) { struct thresh_restart *tr = _tr; u32 hi, lo; @@ -486,7 +486,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) }; b->threshold_limit = THRESHOLD_MAX; - threshold_restart_bank(&tr); + threshold_restart_block(&tr); }; static int setup_APIC_mce_threshold(int reserved, int new) @@ -926,7 +926,7 @@ static void log_and_reset_block(struct threshold_block *block) /* Reset threshold block after logging error. */ memset(&tr, 0, sizeof(tr)); tr.b = block; - threshold_restart_bank(&tr); + threshold_restart_block(&tr); } /* @@ -1000,7 +1000,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) memset(&tr, 0, sizeof(tr)); tr.b = b; - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) return -ENODEV; return size; @@ -1025,7 +1025,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) b->threshold_limit = new; tr.b = b; - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) return -ENODEV; return size; -- Gitee From e76919858484e505e8996403e8d85ee61611c054 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 24 Jun 2025 14:16:01 +0000 Subject: [PATCH 04/22] x86/mce/amd: Remove return value for mce_threshold_{create,remove}_device() ANBZ: #28102 commit 4d2161b9e8ba64076f520ec2f00eefb00722c15e upstream The return values are not checked, so set return type to 'void'. Also, move function declarations to internal.h, since these functions are only used within the MCE subsystem. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-6-236dd74f645f@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mce.h | 6 ------ arch/x86/kernel/cpu/mce/amd.c | 22 ++++++++++------------ arch/x86/kernel/cpu/mce/internal.h | 4 ++++ 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 08aeb5853a98..741a791b16e8 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -355,15 +355,9 @@ enum smca_bank_types { extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); -extern int mce_threshold_create_device(unsigned int cpu); -extern int mce_threshold_remove_device(unsigned int cpu); - void mce_amd_feature_init(struct cpuinfo_x86 *c); enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); #else - -static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; -static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d301cd9c7ebc..bbdf0f86b26c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1301,12 +1301,12 @@ static void __threshold_remove_device(struct threshold_bank **bp) kfree(bp); } -int mce_threshold_remove_device(unsigned int cpu) +void mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); if (!bp) - return 0; + return; /* * Clear the pointer before cleaning up, so that the interrupt won't @@ -1315,7 +1315,7 @@ int mce_threshold_remove_device(unsigned int cpu) this_cpu_write(threshold_banks, NULL); __threshold_remove_device(bp); - return 0; + return; } /** @@ -1329,36 +1329,34 @@ int mce_threshold_remove_device(unsigned int cpu) * thread running on @cpu. The callback is invoked on all CPUs which are * online when the callback is installed or during a real hotplug event. */ -int mce_threshold_create_device(unsigned int cpu) +void mce_threshold_create_device(unsigned int cpu) { unsigned int numbanks, bank; struct threshold_bank **bp; - int err; if (!mce_flags.amd_threshold) - return 0; + return; bp = this_cpu_read(threshold_banks); if (bp) - return 0; + return; numbanks = this_cpu_read(mce_num_banks); bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL); if (!bp) - return -ENOMEM; + return; for (bank = 0; bank < numbanks; ++bank) { if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue; - err = threshold_create_bank(bp, cpu, bank); - if (err) { + if (threshold_create_bank(bp, cpu, bank)) { __threshold_remove_device(bp); - return err; + return; } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; - return 0; + return; } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index fd295bfc8b12..7c75faa509e2 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -263,6 +263,8 @@ void mce_prep_record_common(struct mce *m); void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m); #ifdef CONFIG_X86_MCE_AMD +void mce_threshold_create_device(unsigned int cpu); +void mce_threshold_remove_device(unsigned int cpu); extern bool amd_filter_mce(struct mce *m); bool amd_mce_usable_address(struct mce *m); @@ -291,6 +293,8 @@ static __always_inline void smca_extract_err_addr(struct mce *m) } #else +static inline void mce_threshold_create_device(unsigned int cpu) { } +static inline void mce_threshold_remove_device(unsigned int cpu) { } static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void smca_extract_err_addr(struct mce *m) { } -- Gitee From 88ee546e9dc5738d55a6e3ab70d3e2fb3e2002ee Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 25 Aug 2025 17:33:00 +0000 Subject: [PATCH 05/22] x86/mce/amd: Remove smca_banks_map ANBZ: #28102 commit b249288abde5190bb113ea5acef8af4ceac4957c upstream The MCx_MISC0[BlkPtr] field was used on legacy systems to hold a register offset for the next MCx_MISC* register. In this way, an implementation-specific number of registers can be discovered at runtime. The MCAX/SMCA register space simplifies this by always including the MCx_MISC[1-4] registers. The MCx_MISC0[BlkPtr] field is used to indicate (true/false) whether any MCx_MISC[1-4] registers are present. Currently, MCx_MISC0[BlkPtr] is checked early and cached to be used during sysfs init later. This is unnecessary as the MCx_MISC0 register is read again later anyway. Remove the smca_banks_map variable as it is effectively redundant, and use a direct register/bit check instead. [ bp: Zap smca_get_block_address() too. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Tested-by: Tony Luck Link: https://lore.kernel.org/20250825-wip-mca-updates-v5-3-865768a2eef8@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 50 +++++++---------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index bbdf0f86b26c..86c147044a58 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -260,9 +260,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); */ static DEFINE_PER_CPU(u64, bank_map); -/* Map of banks that have more than MCA_MISC0 available. */ -static DEFINE_PER_CPU(u64, smca_misc_banks_map); - static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -272,28 +269,6 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; -static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) -{ - u32 low, high; - - /* - * For SMCA enabled processors, BLKPTR field of the first MISC register - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). - */ - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return; - - if (!(low & MCI_CONFIG_MCAX)) - return; - - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) - return; - - if (low & MASK_BLKPTR_LO) - per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank); - -} - static void smca_configure(unsigned int bank, unsigned int cpu) { u8 *bank_counts = this_cpu_ptr(smca_bank_counts); @@ -334,8 +309,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) wrmsr(smca_config, low, high); } - smca_set_misc_banks_map(bank, cpu); - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; @@ -533,18 +506,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int bank, unsigned int block, - unsigned int cpu) -{ - if (!block) - return MSR_AMD64_SMCA_MCx_MISC(bank); - - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank))) - return 0; - - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); -} - static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block, unsigned int cpu) @@ -554,8 +515,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; - if (mce_flags.smca) - return smca_get_block_address(bank, block, cpu); + if (mce_flags.smca) { + if (!block) + return MSR_AMD64_SMCA_MCx_MISC(bank); + + if (!(low & MASK_BLKPTR_LO)) + return 0; + + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + } /* Fall back to method we used for older processors: */ switch (block) { -- Gitee From 41363286c3582bf4be7965e46b0c77434c56dcf9 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 24 Jun 2025 14:16:03 +0000 Subject: [PATCH 06/22] x86/mce/amd: Put list_head in threshold_bank ANBZ: #28102 commit c4bac5c640e3782bf30c07c4d82042d0202fe224 upstream The threshold_bank structure is a container for one or more threshold_block structures. Currently, the container has a single pointer to the 'first' threshold_block structure which then has a linked list of the remaining threshold_block structures. This results in an extra level of indirection where the 'first' block is checked before iterating over the remaining blocks. Remove the indirection by including the head of the block list in the threshold_bank structure which already acts as a container for all the bank's thresholding blocks. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Tested-by: Tony Luck Link: https://lore.kernel.org/20250624-wip-mca-updates-v4-8-236dd74f645f@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 43 ++++++++++------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 86c147044a58..d7168468399f 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -249,7 +249,8 @@ struct threshold_block { struct threshold_bank { struct kobject *kobj; - struct threshold_block *blocks; + /* List of threshold blocks within this MCA bank. */ + struct list_head miscj; }; static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); @@ -903,9 +904,9 @@ static void log_and_reset_block(struct threshold_block *block) */ static void amd_threshold_interrupt(void) { - struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; - struct threshold_bank **bp = this_cpu_read(threshold_banks); + struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank; unsigned int bank, cpu = smp_processor_id(); + struct threshold_block *block, *tmp; /* * Validate that the threshold bank has been initialized already. The @@ -919,16 +920,11 @@ static void amd_threshold_interrupt(void) if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) continue; - first_block = bp[bank]->blocks; - if (!first_block) + thr_bank = bp[bank]; + if (!thr_bank) continue; - /* - * The first block is also the head of the list. Check it first - * before iterating over the rest. - */ - log_and_reset_block(first_block); - list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj) + list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj) log_and_reset_block(block); } } @@ -1154,13 +1150,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb default_attrs[2] = NULL; } - INIT_LIST_HEAD(&b->miscj); - - /* This is safe as @tb is not visible yet */ - if (tb->blocks) - list_add(&b->miscj, &tb->blocks->miscj); - else - tb->blocks = b; + list_add(&b->miscj, &tb->miscj); err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) @@ -1211,6 +1201,8 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, goto out_free; } + INIT_LIST_HEAD(&b->miscj); + err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); if (err) goto out_kobj; @@ -1231,26 +1223,15 @@ static void threshold_block_release(struct kobject *kobj) kfree(to_block(kobj)); } -static void deallocate_threshold_blocks(struct threshold_bank *bank) +static void threshold_remove_bank(struct threshold_bank *bank) { struct threshold_block *pos, *tmp; - list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) { + list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) { list_del(&pos->miscj); kobject_put(&pos->kobj); } - kobject_put(&bank->blocks->kobj); -} - -static void threshold_remove_bank(struct threshold_bank *bank) -{ - if (!bank->blocks) - goto out_free; - - deallocate_threshold_blocks(bank); - -out_free: kobject_put(bank->kobj); kfree(bank); } -- Gitee From 046b6545b64d0d46ee6203f6b9aadf04ac85add9 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 25 Aug 2025 17:33:03 +0000 Subject: [PATCH 07/22] x86/mce: Remove __mcheck_cpu_init_early() ANBZ: #28102 commit 9f34032ec0deef58bd0eb7475f1981adfa998648 upstream The __mcheck_cpu_init_early() function was introduced so that some vendor-specific features are detected before the first MCA polling event done in __mcheck_cpu_init_generic(). Currently, __mcheck_cpu_init_early() is only used on AMD-based systems and additional code will be needed to support various system configurations. However, the current and future vendor-specific code should be done during vendor init. This keeps all the vendor code in a common location and simplifies the generic init flow. Move all the __mcheck_cpu_init_early() code into mce_amd_feature_init(). Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Tested-by: Tony Luck Link: https://lore.kernel.org/20250825-wip-mca-updates-v5-6-865768a2eef8@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 4 ++++ arch/x86/kernel/cpu/mce/core.c | 14 -------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d7168468399f..0c55cdd306d4 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -661,6 +661,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) u32 low = 0, high = 0, address = 0; int offset = -1; + mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV); + mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); + mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); + mce_flags.amd_threshold = 1; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e4c98229b054..4f54d49f6baf 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2029,19 +2029,6 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) return 0; } -/* - * Init basic CPU features needed for early decoding of MCEs. - */ -static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) -{ - if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { - mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); - mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); - mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); - mce_flags.amd_threshold = 1; - } -} - static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); @@ -2248,7 +2235,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) mca_cfg.initialized = 1; - __mcheck_cpu_init_early(c); __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); -- Gitee From 260505ec43bb9df7b7dec888f31315eb796b9866 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:00:57 +0800 Subject: [PATCH 08/22] x86/mce: Make several functions return bool ANBZ: #28102 commit c845cb8dbd2e1a804babfd13648026c3a7cfbc0b upstream Make several functions that return 0 or 1 return a boolean value for better readability. No functional changes are intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-2-qiuxu.zhuo@intel.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/mce/amd.c | 10 +++++----- arch/x86/kernel/cpu/mce/core.c | 22 +++++++++++----------- arch/x86/kernel/cpu/mce/intel.c | 9 +++++---- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 741a791b16e8..4458eac7dc70 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -254,7 +254,7 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -int mce_available(struct cpuinfo_x86 *c); +bool mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); bool mce_usable_address(struct mce *m); @@ -274,7 +274,7 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_irq(void); +bool mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0c55cdd306d4..4797bedbcacf 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -372,7 +372,7 @@ static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) return msr_high_bits & BIT(28); } -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; @@ -380,7 +380,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } if (apic != msr) { @@ -390,15 +390,15 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) * was set is reserved. Return early here: */ if (mce_flags.smca) - return 0; + return false; pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } - return 1; + return true; }; /* Reprogram MCx_MISC MSR behind this threshold block. */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4f54d49f6baf..a08037589cda 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -473,10 +473,10 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs } } -int mce_available(struct cpuinfo_x86 *c) +bool mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) - return 0; + return false; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -1769,7 +1769,7 @@ static void mce_timer_delete_all(void) * Can be called from interrupt context, but not from machine check/NMI * context. */ -int mce_notify_irq(void) +bool mce_notify_irq(void) { /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); @@ -1780,9 +1780,9 @@ int mce_notify_irq(void) if (__ratelimit(&ratelimit)) pr_info(HW_ERR "Machine check events logged\n"); - return 1; + return true; } - return 0; + return false; } EXPORT_SYMBOL_GPL(mce_notify_irq); @@ -2008,25 +2008,25 @@ static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) return true; } -static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) +static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) - return 0; + return false; switch (c->x86_vendor) { case X86_VENDOR_INTEL: intel_p5_mcheck_init(c); mce_flags.p5 = 1; - return 1; + return true; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); mce_flags.winchip = 1; - return 1; + return true; default: - return 0; + return false; } - return 0; + return false; } static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index aa8b07e9aa38..1b4dc49c5090 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -56,12 +56,12 @@ static DEFINE_SPINLOCK(cmci_poll_lock); #define CMCI_THRESHOLD 1 -static int cmci_supported(int *banks) +static bool cmci_supported(int *banks) { u64 cap; if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) - return 0; + return false; /* * Vendor check is not strictly needed, but the initial @@ -71,10 +71,11 @@ static int cmci_supported(int *banks) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) - return 0; + return false; if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) - return 0; + return false; + rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); return !!(cap & MCG_CMCI_P); -- Gitee From c1c59e70a83443d90c022c525898556af504d5d7 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:30 +0000 Subject: [PATCH 09/22] x86/mce: Set CR4.MCE last during init ANBZ: #28102 commit cfffcf97997bd35f4a59e035523d1762568bdbad upstream Set the CR4.MCE bit as the last step during init. This brings the MCA init order closer to what is described in the x86 docs. x86 docs: AMD Intel MCG_CTL MCA_CONFIG MCG_EXT_CTL MCi_CTL MCi_CTL MCG_CTL CR4.MCE CR4.MCE Current Linux: AMD Intel CR4.MCE CR4.MCE MCG_CTL MCG_CTL MCA_CONFIG MCG_EXT_CTL MCi_CTL MCi_CTL Updated Linux: AMD Intel MCG_CTL MCG_CTL MCA_CONFIG MCG_EXT_CTL MCi_CTL MCi_CTL CR4.MCE CR4.MCE The new init flow will match Intel's docs, but there will still be a mismatch for AMD regarding MCG_CTL. However, there is no known issue with this ordering, so leave it for now. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index a08037589cda..497ceda4ba10 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1852,8 +1852,6 @@ static void __mcheck_cpu_init_generic(void) bitmap_fill(all_banks, MAX_NR_BANKS); machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); - cr4_set_bits(X86_CR4_MCE); - rdmsrl(MSR_IA32_MCG_CAP, cap); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); @@ -2240,6 +2238,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_clear_banks(); __mcheck_cpu_check_banks(); __mcheck_cpu_setup_timer(); + cr4_set_bits(X86_CR4_MCE); } /* @@ -2408,6 +2407,7 @@ static void mce_syscore_resume(void) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); __mcheck_cpu_init_clear_banks(); + cr4_set_bits(X86_CR4_MCE); } static struct syscore_ops mce_syscore_ops = { @@ -2427,6 +2427,7 @@ static void mce_cpu_restart(void *data) __mcheck_cpu_init_generic(); __mcheck_cpu_init_clear_banks(); __mcheck_cpu_init_timer(); + cr4_set_bits(X86_CR4_MCE); } /* Reinit MCEs after user configuration changes */ -- Gitee From 65240dae34d58956c7a64c9aae9ccd54d293f6bf Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:31 +0000 Subject: [PATCH 10/22] x86/mce: Define BSP-only init ANBZ: #28102 commit 669ce4984b729ad5b4c6249d4a8721ae52398bfb upstream Currently, MCA initialization is executed identically on each CPU as they are brought online. However, a number of MCA initialization tasks only need to be done once. Define a function to collect all 'global' init tasks and call this from the BSP only. Start with CPU features. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Tested-by: Tony Luck Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mce.h | 2 ++ arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/mce/amd.c | 3 --- arch/x86/kernel/cpu/mce/core.c | 28 +++++++++++++++++++++------- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4458eac7dc70..7bdda0c7deb7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -219,12 +219,14 @@ struct cper_ia_proc_ctx; #ifdef CONFIG_X86_MCE int mcheck_init(void); +void mca_bsp_init(struct cpuinfo_x86 *c); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } +static inline void mca_bsp_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6a66ea5c4234..b27077af3bac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1701,6 +1701,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_LA57); detect_nopl(); + mca_bsp_init(c); } void __init init_cpu_devs(void) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 4797bedbcacf..adcca554c8db 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -661,9 +661,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) u32 low = 0, high = 0, address = 0; int offset = -1; - mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV); - mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); - mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); mce_flags.amd_threshold = 1; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 497ceda4ba10..7473864a785d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1826,13 +1826,6 @@ static void __mcheck_cpu_cap_init(void) this_cpu_write(mce_num_banks, b); __mcheck_cpu_mce_banks_init(); - - /* Use accurate RIP reporting if available. */ - if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) - mca_cfg.rip_msr = MSR_IA32_MCG_EIP; - - if (cap & MCG_SER_P) - mca_cfg.ser = 1; } static void __mcheck_cpu_init_generic(void) @@ -2203,6 +2196,27 @@ DEFINE_IDTENTRY_RAW(exc_machine_check) } #endif +void mca_bsp_init(struct cpuinfo_x86 *c) +{ + u64 cap; + + if (!mce_available(c)) + return; + + mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV); + mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); + mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); + + rdmsrl(MSR_IA32_MCG_CAP, cap); + + /* Use accurate RIP reporting if available. */ + if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) + mca_cfg.rip_msr = MSR_IA32_MCG_EIP; + + if (cap & MCG_SER_P) + mca_cfg.ser = 1; +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: -- Gitee From d0007282f36fdcbe545688ceeda97442b2a089b1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:32 +0000 Subject: [PATCH 11/22] x86/mce: Define BSP-only SMCA init ANBZ: #28102 commit c6e465b8d45a1bc717d196ee769ee5a9060de8e2 upstream Currently, on AMD systems, MCA interrupt handler functions are set during CPU init. However, the functions only need to be set once for the whole system. Assign the handlers only during BSP init. Do so only for SMCA systems to maintain the old behavior for legacy systems. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 6 ++++++ arch/x86/kernel/cpu/mce/core.c | 3 +++ arch/x86/kernel/cpu/mce/internal.h | 2 ++ 3 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index adcca554c8db..3de23784d50b 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -692,6 +692,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } +void smca_bsp_init(void) +{ + mce_threshold_vector = amd_threshold_interrupt; + deferred_error_int_vector = amd_deferred_error_interrupt; +} + /* * DRAM ECC errors are reported in the Northbridge (bank 4) with * Extended Error Code 8. diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 7473864a785d..8f4aba1ede4e 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2207,6 +2207,9 @@ void mca_bsp_init(struct cpuinfo_x86 *c) mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); + if (mce_flags.smca) + smca_bsp_init(); + rdmsrl(MSR_IA32_MCG_CAP, cap); /* Use accurate RIP reporting if available. */ diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 7c75faa509e2..029f5c7f3618 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -292,12 +292,14 @@ static __always_inline void smca_extract_err_addr(struct mce *m) m->addr &= GENMASK_ULL(55, lsb); } +void smca_bsp_init(void); #else static inline void mce_threshold_create_device(unsigned int cpu) { } static inline void mce_threshold_remove_device(unsigned int cpu) { } static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void smca_extract_err_addr(struct mce *m) { } +static inline void smca_bsp_init(void) { } #endif #ifdef CONFIG_X86_ANCIENT_MCE -- Gitee From f3fbc924b3c2439c8bd91d2789145fde9353acee Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 12 Dec 2024 22:01:00 +0800 Subject: [PATCH 12/22] x86/mce: Break up __mcheck_cpu_apply_quirks() ANBZ: #28102 commit 51a12c28bb9a043e9444db5bd214b00ec161a639 upstream Split each vendor specific part into its own helper function. [Backport Changes] In arch/x86/kernel/cpu/mce/core.c, within __mcheck_cpu_apply_quirks(), the vendor check for X86_VENDOR_CENTAUR and X86_VENDOR_ZHAOXIN was rewritten from an if condition to a switch-case style, while preserving the exact same vendor conditions and behavior. This is a structural change only and does not alter the existing behavior. Signed-off-by: Tony Luck Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Tested-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241212140103.66964-5-qiuxu.zhuo@intel.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 171 ++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 78 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 8f4aba1ede4e..fd585bdc53fb 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1892,103 +1892,118 @@ static void __mcheck_cpu_check_banks(void) } } -/* Add per CPU specific workarounds here */ -static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +static void apply_quirks_amd(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - struct mca_config *cfg = &mca_cfg; - - if (c->x86_vendor == X86_VENDOR_UNKNOWN) { - pr_info("unknown CPU type - not enabling MCE support\n"); - return false; - } /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); - } - if (c->x86 < 0x11 && cfg->bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - cfg->bootlog = 0; - } + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* - * Various K7s with broken bank 0 around. Always disable - * by default. + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].ctl = 0; + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); + } + if (c->x86 < 0x11 && mca_cfg.bootlog < 0) { /* - * overflow_recov is supported for F15h Models 00h-0fh - * even though we don't have a CPUID bit for it. + * Lots of broken BIOS around that don't clear them + * by default and leave crap in there. Don't log: */ - if (c->x86 == 0x15 && c->x86_model <= 0xf) - mce_flags.overflow_recov = 1; + mca_cfg.bootlog = 0; + } - if (c->x86 >= 0x17 && c->x86 <= 0x1A) - mce_flags.zen_ifu_quirk = 1; + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) + mce_banks[0].ctl = 0; - } + /* + * overflow_recov is supported for F15h Models 00h-0fh + * even though we don't have a CPUID bit for it. + */ + if (c->x86 == 0x15 && c->x86_model <= 0xf) + mce_flags.overflow_recov = 1; - if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. - */ + if (c->x86 >= 0x17 && c->x86 <= 0x1A) + mce_flags.zen_ifu_quirk = 1; +} - if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].init = false; +static void apply_quirks_intel(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - /* - * All newer Intel systems support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; + /* + * SDM documents that on family 6 bank 0 should not be written + * because it aliases to another special BIOS controlled + * register. + * But it's not aliased anymore on model 0x1a+ + * Don't ignore bank 0 completely because there could be a + * valid event later, merely don't write CTL0. + */ + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) + mce_banks[0].init = false; - /* - * There are also broken BIOSes on some Pentium M and - * earlier systems: - */ - if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) - cfg->bootlog = 0; + /* + * All newer Intel systems support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && + mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; - if (c->x86_vfm == INTEL_SANDYBRIDGE_X) - mce_flags.snb_ifu_quirk = 1; + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86 == 6 && c->x86_model <= 13 && mca_cfg.bootlog < 0) + mca_cfg.bootlog = 0; - /* - * Skylake, Cascacde Lake and Cooper Lake require a quirk on - * rep movs. - */ - if (c->x86_vfm == INTEL_SKYLAKE_X) - mce_flags.skx_repmov_quirk = 1; + if (c->x86_vfm == INTEL_SANDYBRIDGE_X) + mce_flags.snb_ifu_quirk = 1; + + /* + * Skylake, Cascacde Lake and Cooper Lake require a quirk on + * rep movs. + */ + if (c->x86_vfm == INTEL_SKYLAKE_X) + mce_flags.skx_repmov_quirk = 1; +} + +static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) +{ + /* + * All newer Zhaoxin CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { + if (mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; } +} - if (c->x86_vendor == X86_VENDOR_CENTAUR || - c->x86_vendor == X86_VENDOR_ZHAOXIN) { - /* - * All newer Zhaoxin CPUs support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { - if (cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - } - mca_cfg.bios_cmci_threshold = 1; +/* Add per CPU specific workarounds here */ +static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +{ + struct mca_config *cfg = &mca_cfg; + + switch (c->x86_vendor) { + case X86_VENDOR_UNKNOWN: + pr_info("unknown CPU type - not enabling MCE support\n"); + return false; + case X86_VENDOR_AMD: + apply_quirks_amd(c); + break; + case X86_VENDOR_INTEL: + apply_quirks_intel(c); + break; + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: + apply_quirks_zhaoxin(c); + break; } if (cfg->monarch_timeout < 0) -- Gitee From 1141ca2ab56d507e81ac10d2da5c1e1f18917888 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:33 +0000 Subject: [PATCH 13/22] x86/mce: Do 'UNKNOWN' vendor check early ANBZ: #28102 commit a46b2bbe1e36e7faab5010f68324b7d191c5c09f upstream The 'UNKNOWN' vendor check is handled as a quirk that is run on each online CPU. However, all CPUs are expected to have the same vendor. Move the 'UNKNOWN' vendor check to the BSP-only init so it is done early and once. Remove the unnecessary return value from the quirks check. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Tested-by: Tony Luck Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index fd585bdc53fb..9e47af9b7f78 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1986,14 +1986,11 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) } /* Add per CPU specific workarounds here */ -static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +static void __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { struct mca_config *cfg = &mca_cfg; switch (c->x86_vendor) { - case X86_VENDOR_UNKNOWN: - pr_info("unknown CPU type - not enabling MCE support\n"); - return false; case X86_VENDOR_AMD: apply_quirks_amd(c); break; @@ -2010,8 +2007,6 @@ static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) cfg->monarch_timeout = 0; if (cfg->bootlog != 0) cfg->panic_timeout = 30; - - return true; } static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) @@ -2218,6 +2213,12 @@ void mca_bsp_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { + mca_cfg.disabled = 1; + pr_info("unknown CPU type - not enabling MCE support\n"); + return; + } + mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV); mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); @@ -2252,10 +2253,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_cap_init(); - if (!__mcheck_cpu_apply_quirks(c)) { - mca_cfg.disabled = 1; - return; - } + __mcheck_cpu_apply_quirks(c); if (!mce_gen_pool_init()) { mca_cfg.disabled = 1; -- Gitee From 46f7f6b56cb5e10681f89205a9b2d370abb5e62e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 29 Aug 2024 15:00:42 -0700 Subject: [PATCH 14/22] x86/cpu/intel: Replace PAT erratum model/family magic numbers with symbolic IFM references ANBZ: #28102 commit fd82221a59fa5ce9dc7523e11c5e995104a28cb0 upstream There's an erratum that prevents the PAT from working correctly: https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/pentium-dual-core-specification-update.pdf # Document 316515 Version 010 The kernel currently disables PAT support on those CPUs, but it does it with some magic numbers. Replace the magic numbers with the new "IFM" macros. Make the check refer to the last affected CPU (INTEL_CORE_YONAH) rather than the first fixed one. This makes it easier to find the documentation of the erratum since Intel documents where it is broken and not where it is fixed. I don't think the Pentium Pro (or Pentium II) is actually affected. But the old check included them, so it can't hurt to keep doing the same. I'm also not completely sure about the "Pentium M" CPUs (models 0x9 and 0xd). But, again, they were included in in the old checks and were close Pentium III derivatives, so are likely affected. While we're at it, revise the comment referring to the erratum name and making sure it is a quote of the language from the actual errata doc. That should make it easier to find in the future when the URL inevitably changes. Why bother with this in the first place? It actually gets rid of one of the very few remaining direct references to c->x86{,_model}. No change in functionality intended. Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Cc: Len Brown Link: https://lore.kernel.org/r/20240829220042.1007820-1-dave.hansen@linux.intel.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/intel-family.h | 2 ++ arch/x86/kernel/cpu/intel.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f5020203de0b..eacddbc9e4d7 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -47,6 +47,8 @@ /* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */ #define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) +#define INTEL_PENTIUM_PRO IFM(6, 0x01) + #define INTEL_FAM6_CORE_YONAH 0x0E #define INTEL_CORE_YONAH IFM(6, 0x0E) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fa43abfe7fda..4605e6f73093 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -330,16 +330,18 @@ static void early_init_intel(struct cpuinfo_x86 *c) } /* - * There is a known erratum on Pentium III and Core Solo - * and Core Duo CPUs. - * " Page with PAT set to WC while associated MTRR is UC - * may consolidate to UC " - * Because of this erratum, it is better to stick with - * setting WC in MTRR rather than using PAT on these CPUs. + * PAT is broken on early family 6 CPUs, the last of which + * is "Yonah" where the erratum is named "AN7": * - * Enable PAT WC only on P4, Core 2 or later CPUs. + * Page with PAT (Page Attribute Table) Set to USWC + * (Uncacheable Speculative Write Combine) While + * Associated MTRR (Memory Type Range Register) Is UC + * (Uncacheable) May Consolidate to UC + * + * Disable PAT and fall back to MTRR on these CPUs. */ - if (c->x86 == 6 && c->x86_model < 15) + if (c->x86_vfm >= INTEL_PENTIUM_PRO && + c->x86_vfm <= INTEL_CORE_YONAH) clear_cpu_cap(c, X86_FEATURE_PAT); /* -- Gitee From b83547b1aa595e1d458a96e36d019c43eca52175 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:01:01 +0800 Subject: [PATCH 15/22] x86/mce: Convert family/model mixed checks to VFM-based checks ANBZ: #28102 commit 359d7a98e3e3f88dbf45411427b284bb3bbbaea5 upstream Convert family/model mixed checks to VFM-based checks to make the code more compact. Simplify. [ bp: Drop the "what" from the commit message - it should be visible from the diff alone. ] Suggested-by: Sohil Mehta Suggested-by: Dave Hansen Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-6-qiuxu.zhuo@intel.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 9e47af9b7f78..1e154afd003f 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1918,7 +1918,7 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) mce_banks[0].ctl = 0; /* @@ -1936,6 +1936,10 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + /* Older CPUs (prior to family 6) don't need quirks. */ + if (c->x86_vfm < INTEL_PENTIUM_PRO) + return; + /* * SDM documents that on family 6 bank 0 should not be written * because it aliases to another special BIOS controlled @@ -1944,22 +1948,21 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c) * Don't ignore bank 0 completely because there could be a * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) + if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) mce_banks[0].init = false; /* * All newer Intel systems support MCE broadcasting. Enable * synchronization with a one second timeout. */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - mca_cfg.monarch_timeout < 0) + if (c->x86_vfm >= INTEL_CORE_YONAH && mca_cfg.monarch_timeout < 0) mca_cfg.monarch_timeout = USEC_PER_SEC; /* * There are also broken BIOSes on some Pentium M and * earlier systems: */ - if (c->x86 == 6 && c->x86_model <= 13 && mca_cfg.bootlog < 0) + if (c->x86_vfm < INTEL_CORE_YONAH && mca_cfg.bootlog < 0) mca_cfg.bootlog = 0; if (c->x86_vfm == INTEL_SANDYBRIDGE_X) -- Gitee From f6439b8f1e1104e9c260064ccf92816df6d7c131 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 25 Aug 2025 17:33:02 +0000 Subject: [PATCH 16/22] x86/mce: Cleanup bank processing on init ANBZ: #28102 commit 0f134c53246366c00664b640f9edc9be5db255b3 upstream Unify the bank preparation into __mcheck_cpu_init_clear_banks(), rename that function to what it does now - prepares banks. Do this so that generic and vendor banks init goes first so that settings done during that init can take effect before the first bank polling takes place. Move __mcheck_cpu_check_banks() into __mcheck_cpu_init_prepare_banks() as it already loops over the banks. The MCP_DONTLOG flag is no longer needed, since the MCA polling function is now called only if boot-time logging should be done. [Backport Changes] 1. Since the commit 78255eb239733 ("Rename 'wrmsrl()' to 'wrmsrq()'"), which renamed wrmsrl() to wrmsrq() globally, is not available in the current source tree and backporting it would introduce large, unrelated changes. Therefore, the changes intended for wrmsrq() are instead applied to functionally equivalent wrmsrl() calls in this backport. Accordingly, in file arch/x86/kernel/cpu/mce/core.c, wrmsrl() calls are removed and added as needed to follow the upstream changes while preserving the original behavior in the following functions: 1. __mcheck_cpu_check_banks() 2. __mcheck_cpu_init_prepare_banks() Signed-off-by: Borislav Petkov Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Tested-by: Tony Luck Link: https://lore.kernel.org/20250825-wip-mca-updates-v5-5-865768a2eef8@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mce.h | 3 +- arch/x86/kernel/cpu/mce/core.c | 63 ++++++++++------------------------ 2 files changed, 19 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 7bdda0c7deb7..604ab940f393 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -270,8 +270,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ - MCP_DONTLOG = BIT(2), /* only clear, don't log */ - MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ + MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */ }; void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 1e154afd003f..555cf1acdbfb 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -772,9 +772,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) report_fault_event(-1, NULL, SLIGHT_FAULT, FE_MCE, "CE hardware failure"); - if (flags & MCP_DONTLOG) - goto clear_it; - mce_read_aux(&err, i); m->severity = mce_severity(m, NULL, NULL, false); /* @@ -1798,7 +1795,7 @@ static void __mcheck_cpu_mce_banks_init(void) /* * Init them all, __mcheck_cpu_apply_quirks() is going to apply * the required vendor quirks before - * __mcheck_cpu_init_clear_banks() does the final bank setup. + * __mcheck_cpu_init_prepare_banks() does the final bank setup. */ b->ctl = -1ULL; b->init = true; @@ -1830,56 +1827,30 @@ static void __mcheck_cpu_cap_init(void) static void __mcheck_cpu_init_generic(void) { - enum mcp_flags m_fl = 0; - mce_banks_t all_banks; u64 cap; - if (!mca_cfg.bootlog) - m_fl = MCP_DONTLOG; - - /* - * Log the machine checks left over from the previous reset. Log them - * only, do not start processing them. That will happen in mcheck_late_init() - * when all consumers have been registered on the notifier chain. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); - rdmsrl(MSR_IA32_MCG_CAP, cap); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); } -static void __mcheck_cpu_init_clear_banks(void) +static void __mcheck_cpu_init_prepare_banks(void) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u64 msrval; int i; - for (i = 0; i < this_cpu_read(mce_num_banks); i++) { - struct mce_bank *b = &mce_banks[i]; + /* + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. + */ + if (mca_cfg.bootlog) { + mce_banks_t all_banks; - if (!b->init) - continue; - wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); - wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + bitmap_fill(all_banks, MAX_NR_BANKS); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); } -} - -/* - * Do a final check to see if there are any unused/RAZ banks. - * - * This must be done after the banks have been initialized and any quirks have - * been applied. - * - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. - * Otherwise, a user who disables a bank will not be able to re-enable it - * without a system reboot. - */ -static void __mcheck_cpu_check_banks(void) -{ - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - u64 msrval; - int i; for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; @@ -1887,6 +1858,9 @@ static void __mcheck_cpu_check_banks(void) if (!b->init) continue; + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); b->init = !!msrval; } @@ -2268,8 +2242,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); - __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_check_banks(); + __mcheck_cpu_init_prepare_banks(); __mcheck_cpu_setup_timer(); cr4_set_bits(X86_CR4_MCE); } @@ -2439,7 +2412,7 @@ static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); - __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_init_prepare_banks(); cr4_set_bits(X86_CR4_MCE); } @@ -2458,7 +2431,7 @@ static void mce_cpu_restart(void *data) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); - __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_init_prepare_banks(); __mcheck_cpu_init_timer(); cr4_set_bits(X86_CR4_MCE); } -- Gitee From 55e4f2803875d9d47404551e86444610f225efba Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:34 +0000 Subject: [PATCH 17/22] x86/mce: Separate global and per-CPU quirks ANBZ: #28102 commit 7eee1e92684507f64ec6a75fecbd27e37174b888 upstream Many quirks are global configuration settings and a handful apply to each CPU. Move the per-CPU quirks to vendor init to execute them on each online CPU. Set the global quirks during BSP-only init so they're only executed once and early. [Backport Changes] The current upstream commit removes the __mcheck_cpu_apply_quirks() function and integrates the per-CPU case handling into mca_bsp_init(). However, the Anolis kernel contains an additional case statement to support CENTAUR systems that is not present upstream. Therefore, this commit retains the CENTAUR-specific case 'X86_VENDOR_CENTAUR' and integrats it into mca_bsp_init() along with the upstream changes, preserving the existing Anolis functionality. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Tested-by: Tony Luck Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 24 +++++++++ arch/x86/kernel/cpu/mce/core.c | 87 +++++++++------------------------ arch/x86/kernel/cpu/mce/intel.c | 18 +++++++ 3 files changed, 66 insertions(+), 63 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 3de23784d50b..5dc297ace4f2 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -654,6 +654,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) wrmsrl(MSR_K7_HWCR, hwcr); } +static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { + /* + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: + */ + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); + } + + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) + mce_banks[0].ctl = 0; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -661,6 +683,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) u32 low = 0, high = 0, address = 0; int offset = -1; + amd_apply_cpu_quirks(c); + mce_flags.amd_threshold = 1; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 555cf1acdbfb..ec16693b0082 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1793,8 +1793,9 @@ static void __mcheck_cpu_mce_banks_init(void) struct mce_bank *b = &mce_banks[i]; /* - * Init them all, __mcheck_cpu_apply_quirks() is going to apply - * the required vendor quirks before + * Init them all by default. + * + * The required vendor quirks will be applied before * __mcheck_cpu_init_prepare_banks() does the final bank setup. */ b->ctl = -1ULL; @@ -1866,20 +1867,8 @@ static void __mcheck_cpu_init_prepare_banks(void) } } -static void apply_quirks_amd(struct cpuinfo_x86 *c) +static void amd_apply_global_quirks(struct cpuinfo_x86 *c) { - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - - /* This should be disabled by the BIOS, but isn't always */ - if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); - } - if (c->x86 < 0x11 && mca_cfg.bootlog < 0) { /* * Lots of broken BIOS around that don't clear them @@ -1888,13 +1877,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c) mca_cfg.bootlog = 0; } - /* - * Various K7s with broken bank 0 around. Always disable - * by default. - */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks)) - mce_banks[0].ctl = 0; - /* * overflow_recov is supported for F15h Models 00h-0fh * even though we don't have a CPUID bit for it. @@ -1906,25 +1888,12 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c) mce_flags.zen_ifu_quirk = 1; } -static void apply_quirks_intel(struct cpuinfo_x86 *c) +static void intel_apply_global_quirks(struct cpuinfo_x86 *c) { - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - /* Older CPUs (prior to family 6) don't need quirks. */ if (c->x86_vfm < INTEL_PENTIUM_PRO) return; - /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. - */ - if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) - mce_banks[0].init = false; - /* * All newer Intel systems support MCE broadcasting. Enable * synchronization with a one second timeout. @@ -1950,7 +1919,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c) mce_flags.skx_repmov_quirk = 1; } -static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) +static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c) { /* * All newer Zhaoxin CPUs support MCE broadcasting. Enable @@ -1962,30 +1931,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) } } -/* Add per CPU specific workarounds here */ -static void __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) -{ - struct mca_config *cfg = &mca_cfg; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - apply_quirks_amd(c); - break; - case X86_VENDOR_INTEL: - apply_quirks_intel(c); - break; - case X86_VENDOR_CENTAUR: - case X86_VENDOR_ZHAOXIN: - apply_quirks_zhaoxin(c); - break; - } - - if (cfg->monarch_timeout < 0) - cfg->monarch_timeout = 0; - if (cfg->bootlog != 0) - cfg->panic_timeout = 30; -} - static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) @@ -2211,6 +2156,24 @@ void mca_bsp_init(struct cpuinfo_x86 *c) if (cap & MCG_SER_P) mca_cfg.ser = 1; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_apply_global_quirks(c); + break; + case X86_VENDOR_INTEL: + intel_apply_global_quirks(c); + break; + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: + zhaoxin_apply_global_quirks(c); + break; + } + + if (mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = 0; + if (mca_cfg.bootlog != 0) + mca_cfg.panic_timeout = 30; } /* @@ -2230,8 +2193,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_cap_init(); - __mcheck_cpu_apply_quirks(c); - if (!mce_gen_pool_init()) { mca_cfg.disabled = 1; pr_emerg("Couldn't allocate MCE records pool!\n"); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 1b4dc49c5090..94a1621a8171 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -364,8 +364,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c) } } +static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c) +{ + /* + * SDM documents that on family 6 bank 0 should not be written + * because it aliases to another special BIOS controlled + * register. + * But it's not aliased anymore on model 0x1a+ + * Don't ignore bank 0 completely because there could be a + * valid event later, merely don't write CTL0. + * + * Older CPUs (prior to family 6) can't reach this point and already + * return early due to the check of __mcheck_cpu_ancient_init(). + */ + if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) + this_cpu_ptr(mce_banks_array)[0].init = false; +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { + intel_apply_cpu_quirks(c); intel_init_cmci(); intel_init_lmce(); intel_imc_init(c); -- Gitee From ff7627e69870fdb68dd7c3207a7a63ae6bb28214 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:35 +0000 Subject: [PATCH 18/22] x86/mce: Move machine_check_poll() status checks to helper functions ANBZ: #28102 commit 91af6842e9945d064401ed2d6e91539a619760d1 upstream There are a number of generic and vendor-specific status checks in machine_check_poll(). These are used to determine if an error should be skipped. Move these into helper functions. Future vendor-specific checks will be added to the helpers. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Tested-by: Tony Luck Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 88 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ec16693b0082..63113361ba89 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -672,6 +672,52 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) DEFINE_PER_CPU(unsigned, mce_poll_count); +/* + * Newer Intel systems that support software error + * recovery need to make additional checks. Other + * CPUs should skip over uncorrected errors, but log + * everything else. + */ +static bool ser_should_log_poll_error(struct mce *m) +{ + /* Log "not enabled" (speculative) errors */ + if (!(m->status & MCI_STATUS_EN)) + return true; + + /* + * Log UCNA (SDM: 15.6.3 "UCR Error Classification") + * UC == 1 && PCC == 0 && S == 0 + */ + if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S)) + return true; + + return false; +} + +static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) +{ + struct mce *m = &err->m; + + /* If this entry is not valid, ignore it. */ + if (!(m->status & MCI_STATUS_VAL)) + return false; + + /* + * If we are logging everything (at CPU online) or this + * is a corrected error, then we must log it. + */ + if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC)) + return true; + + if (mca_cfg.ser) + return ser_should_log_poll_error(m); + + if (m->status & MCI_STATUS_UC) + return false; + + return true; +} + /* * Poll for corrected events or events that happened before reset. * Those are just logged through /dev/mcelog. @@ -723,48 +769,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (!mca_cfg.cmci_disabled) mce_track_storm(m); - /* If this entry is not valid, ignore it */ - if (!(m->status & MCI_STATUS_VAL)) + /* Verify that the error should be logged based on hardware conditions. */ + if (!should_log_poll_error(flags, &err)) continue; - /* - * If we are logging everything (at CPU online) or this - * is a corrected error, then we must log it. - */ - if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC)) - goto log_it; - - /* - * Newer Intel systems that support software error - * recovery need to make additional checks. Other - * CPUs should skip over uncorrected errors, but log - * everything else. - */ - if (!mca_cfg.ser) { - if (m->status & MCI_STATUS_UC) - continue; - goto log_it; - } - - /* Log "not enabled" (speculative) errors */ - if (!(m->status & MCI_STATUS_EN)) - goto log_it; - - /* - * Log UCNA (SDM: 15.6.3 "UCR Error Classification") - * UC == 1 && PCC == 0 && S == 0 - */ - if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S)) - goto log_it; - - /* - * Skip anything else. Presumption is that our read of this - * bank is racing with a machine check. Leave the log alone - * for do_machine_check() to deal with it. - */ - continue; - -log_it: if (m->status & MCI_STATUS_UC) report_fault_event(-1, NULL, SLIGHT_FAULT, FE_MCE, "UCE hardware failure"); -- Gitee From 73ac76e7489985162e73906ca9311a6e6c3da770 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Tue, 22 Oct 2024 19:36:29 +0000 Subject: [PATCH 19/22] x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers ANBZ: #28102 commit d4fca1358ea9096f2f6ed942e2cb3a820073dfc1 upstream Starting with Zen4, AMD's Scalable MCA systems incorporate two new registers: MCA_SYND1 and MCA_SYND2. These registers will include supplemental error information in addition to the existing MCA_SYND register. The data within these registers is considered valid if MCA_STATUS[SyndV] is set. Userspace error decoding tools like rasdaemon gather related hardware error information through the tracepoints. Therefore, export these two registers through the mce_record tracepoint so that tools like rasdaemon can parse them and output the supplemental error information like FRU text contained in them. [ bp: Massage. ] Signed-off-by: Yazen Ghannam Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241022194158.110073-4-avadhut.naik@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/include/asm/mce.h | 21 +++++++++++++++++++++ arch/x86/include/uapi/asm/mce.h | 3 ++- arch/x86/kernel/cpu/mce/amd.c | 5 ++++- arch/x86/kernel/cpu/mce/core.c | 9 ++++++++- drivers/edac/mce_amd.c | 8 ++++++-- include/trace/events/mce.h | 7 +++++-- 6 files changed, 46 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 604ab940f393..014147396b23 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -122,6 +122,9 @@ #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +/* Registers MISC2 to MISC4 are at offsets B to D. */ +#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e +#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) @@ -132,6 +135,8 @@ #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x)) #define XEC(x, mask) (((x) >> 16) & mask) @@ -190,9 +195,25 @@ enum mce_notifier_prios { /** * struct mce_hw_err - Hardware Error Record. * @m: Machine Check record. + * @vendor: Vendor-specific error information. + * + * Vendor-specific fields should not be added to struct mce. Instead, vendors + * should export their vendor-specific data through their structure in the + * vendor union below. + * + * AMD's vendor data is parsed by error decoding tools for supplemental error + * information. Thus, current offsets of existing fields must be maintained. + * Only add new fields at the end of AMD's vendor structure. */ struct mce_hw_err { struct mce m; + + union vendor_info { + struct { + u64 synd1; /* MCA_SYND1 MSR */ + u64 synd2; /* MCA_SYND2 MSR */ + } amd; + } vendor; }; #define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index db9adc081c5a..cb6b48a7c22b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -8,7 +8,8 @@ /* * Fields are zero when not available. Also, this struct is shared with * userspace mcelog and thus must keep existing fields at current offsets. - * Only add new fields to the end of the structure + * Only add new, shared fields to the end of the structure. + * Do not add vendor-specific fields. */ struct mce { __u64 status; /* Bank's MCi_STATUS MSR */ diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 5dc297ace4f2..53e1e2b38fa9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -814,8 +814,11 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) if (mce_flags.smca) { rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); - if (m->status & MCI_STATUS_SYNDV) + if (m->status & MCI_STATUS_SYNDV) { rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); + rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); + rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); + } } mce_log(&err); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 63113361ba89..38459a2e3d1d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -201,6 +201,10 @@ static void __print_mce(struct mce_hw_err *err) if (mce_flags.smca) { if (m->synd) pr_cont("SYND %llx ", m->synd); + if (err->vendor.amd.synd1) + pr_cont("SYND1 %llx ", err->vendor.amd.synd1); + if (err->vendor.amd.synd2) + pr_cont("SYND2 %llx ", err->vendor.amd.synd2); if (m->ipid) pr_cont("IPID %llx ", m->ipid); } @@ -665,8 +669,11 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) if (mce_flags.smca) { m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); - if (m->status & MCI_STATUS_SYNDV) + if (m->status & MCI_STATUS_SYNDV) { m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i)); + err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i)); + } } } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index e1b289e88cbf..0979e6009afe 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1292,6 +1292,7 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; + struct mce_hw_err *err = to_mce_hw_err(m); unsigned int fam = x86_family(m->cpuid); int ecc; @@ -1349,8 +1350,11 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (boot_cpu_has(X86_FEATURE_SMCA)) { pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); - if (m->status & MCI_STATUS_SYNDV) - pr_cont(", Syndrome: 0x%016llx", m->synd); + if (m->status & MCI_STATUS_SYNDV) { + pr_cont(", Syndrome: 0x%016llx\n", m->synd); + pr_emerg(HW_ERR "Syndrome1: 0x%016llx, Syndrome2: 0x%016llx", + err->vendor.amd.synd1, err->vendor.amd.synd2); + } pr_cont("\n"); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 65aba1afcd07..c1c50df9ecfd 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -43,6 +43,7 @@ TRACE_EVENT(mce_record, __field( u8, bank ) __field( u8, cpuvendor ) __field( u32, microcode ) + __dynamic_array(u8, v_data, sizeof(err->vendor)) ), TP_fast_assign( @@ -65,9 +66,10 @@ TRACE_EVENT(mce_record, __entry->bank = err->m.bank; __entry->cpuvendor = err->m.cpuvendor; __entry->microcode = err->m.microcode; + memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor)); ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, @@ -83,7 +85,8 @@ TRACE_EVENT(mce_record, __entry->walltime, __entry->socketid, __entry->apicid, - __entry->microcode) + __entry->microcode, + __print_dynamic_array(v_data, sizeof(u8))) ); #endif /* _TRACE_MCE_H */ -- Gitee From b9afb1284b2593fb3f1101b699f96a7d42b8dcd8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 22:29:02 +0200 Subject: [PATCH 20/22] x86/msr: Rename 'mce_rdmsrl()' to 'mce_rdmsrq()' ANBZ: #28102 commit ebe29309c4d2821d5fdccd5393eba9c77540e260 upstream Suggested-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Juergen Gross Cc: Dave Hansen Cc: Xin Li Cc: Linus Torvalds Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 32 +++++++++++++++--------------- arch/x86/kernel/cpu/mce/internal.h | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 38459a2e3d1d..d39b31d1a42d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -373,7 +373,7 @@ void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) } /* MSR access wrappers used for error injection */ -noinstr u64 mce_rdmsrl(u32 msr) +noinstr u64 mce_rdmsrq(u32 msr) { DECLARE_ARGS(val, low, high); @@ -429,7 +429,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v) low = (u32)v; high = (u32)(v >> 32); - /* See comment in mce_rdmsrl() */ + /* See comment in mce_rdmsrq() */ asm volatile("1: wrmsr\n" "2:\n" _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE) @@ -453,7 +453,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs instrumentation_end(); m = &err->m; - m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + m->mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS); if (regs) { /* * Get the address of the instruction at the time of @@ -473,7 +473,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs } /* Use accurate RIP reporting if available. */ if (mca_cfg.rip_msr) - m->ip = mce_rdmsrl(mca_cfg.rip_msr); + m->ip = mce_rdmsrq(mca_cfg.rip_msr); } } @@ -649,10 +649,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) struct mce *m = &err->m; if (m->status & MCI_STATUS_MISCV) - m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC)); + m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC)); if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR)); + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); /* * Mask the reported address by the reported granularity. @@ -667,12 +667,12 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) } if (mce_flags.smca) { - m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); + m->ipid = mce_rdmsrq(MSR_AMD64_SMCA_MCx_IPID(i)); if (m->status & MCI_STATUS_SYNDV) { - m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); - err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i)); - err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i)); + m->synd = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND(i)); + err->vendor.amd.synd1 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(i)); + err->vendor.amd.synd2 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(i)); } } } @@ -764,7 +764,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m->bank = i; barrier(); - m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS)); /* * Update storm tracking here, before checking for the @@ -864,8 +864,8 @@ quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) */ static noinstr bool quirk_skylake_repmov(void) { - u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE); + u64 mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS); + u64 misc_enable = mce_rdmsrq(MSR_IA32_MISC_ENABLE); u64 mc1_status; /* @@ -876,7 +876,7 @@ static noinstr bool quirk_skylake_repmov(void) !(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) return false; - mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1)); + mc1_status = mce_rdmsrq(MSR_IA32_MCx_STATUS(1)); /* Check for a software-recoverable data fetch error. */ if ((mc1_status & @@ -932,7 +932,7 @@ static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, un int i; for (i = 0; i < this_cpu_read(mce_num_banks); i++) { - m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS)); if (!(m->status & MCI_STATUS_VAL)) continue; @@ -1313,7 +1313,7 @@ __mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs, m->addr = 0; m->bank = i; - m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS)); if (!(m->status & MCI_STATUS_VAL)) continue; diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 029f5c7f3618..9f7ca6451235 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -316,7 +316,7 @@ static __always_inline void pentium_machine_check(struct pt_regs *regs) {} static __always_inline void winchip_machine_check(struct pt_regs *regs) {} #endif -noinstr u64 mce_rdmsrl(u32 msr); +noinstr u64 mce_rdmsrq(u32 msr); static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg) { -- Gitee From b0f4cee0d3b0db1e572b0f93cf5ec955165a63fd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 22:29:03 +0200 Subject: [PATCH 21/22] x86/msr: Rename 'mce_wrmsrl()' to 'mce_wrmsrq()' ANBZ: #28102 commit 8e44e83f57c3289a41507eb79a315400629978ae upstream Suggested-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Juergen Gross Cc: Dave Hansen Cc: Xin Li Cc: Linus Torvalds Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index d39b31d1a42d..3d87585ffa65 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -408,7 +408,7 @@ noinstr u64 mce_rdmsrq(u32 msr) return EAX_EDX_VAL(val, low, high); } -static noinstr void mce_wrmsrl(u32 msr, u64 v) +static noinstr void mce_wrmsrq(u32 msr, u64 v) { u32 low, high; @@ -806,7 +806,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) /* * Clear state for this bank. */ - mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); } /* @@ -887,8 +887,8 @@ static noinstr bool quirk_skylake_repmov(void) MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_AR | MCI_STATUS_S)) { misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; - mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0); + mce_wrmsrq(MSR_IA32_MISC_ENABLE, misc_enable); + mce_wrmsrq(MSR_IA32_MCx_STATUS(1), 0); instrumentation_begin(); pr_err_once("Erratum detected, disable fast string copy instructions.\n"); @@ -1251,7 +1251,7 @@ static __always_inline void mce_clear_state(unsigned long *toclear) for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (arch_test_bit(i, toclear)) - mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); } } @@ -1669,7 +1669,7 @@ noinstr void do_machine_check(struct pt_regs *regs) instrumentation_end(); clear: - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); + mce_wrmsrq(MSR_IA32_MCG_STATUS, 0); } EXPORT_SYMBOL_GPL(do_machine_check); -- Gitee From 388bec101a03c1035fead1a3209b037444702d37 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Sep 2025 15:40:36 +0000 Subject: [PATCH 22/22] x86/mce: Add a clear_bank() helper ANBZ: #28102 commit 5c6f123c419b6e20f84ac1683089a52f449273aa upstream Add a helper at the end of the MCA polling function to collect vendor and/or feature actions. Start with a basic skeleton for now. Actions for AMD thresholding and deferred errors will be added later. [ bp: Drop the obvious comment too. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com Signed-off-by: PrithivishS --- arch/x86/kernel/cpu/mce/amd.c | 5 +++++ arch/x86/kernel/cpu/mce/core.c | 15 ++++++++++----- arch/x86/kernel/cpu/mce/internal.h | 3 +++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 53e1e2b38fa9..8cb906044d0e 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -963,6 +963,11 @@ static void amd_threshold_interrupt(void) } } +void amd_clear_bank(struct mce *m) +{ + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); +} + /* * Sysfs Interface */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 3d87585ffa65..56cc26686541 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -408,7 +408,7 @@ noinstr u64 mce_rdmsrq(u32 msr) return EAX_EDX_VAL(val, low, high); } -static noinstr void mce_wrmsrq(u32 msr, u64 v) +noinstr void mce_wrmsrq(u32 msr, u64 v) { u32 low, high; @@ -725,6 +725,14 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) return true; } +static void clear_bank(struct mce *m) +{ + if (m->cpuvendor == X86_VENDOR_AMD) + return amd_clear_bank(m); + + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); +} + /* * Poll for corrected events or events that happened before reset. * Those are just logged through /dev/mcelog. @@ -803,10 +811,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_log(&err); clear_it: - /* - * Clear state for this bank. - */ - mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); + clear_bank(m); } /* diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 9f7ca6451235..eb2bbea12b07 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -267,6 +267,7 @@ void mce_threshold_create_device(unsigned int cpu); void mce_threshold_remove_device(unsigned int cpu); extern bool amd_filter_mce(struct mce *m); bool amd_mce_usable_address(struct mce *m); +void amd_clear_bank(struct mce *m); /* * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits @@ -298,6 +299,7 @@ static inline void mce_threshold_create_device(unsigned int cpu) { } static inline void mce_threshold_remove_device(unsigned int cpu) { } static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_mce_usable_address(struct mce *m) { return false; } +static inline void amd_clear_bank(struct mce *m) { } static inline void smca_extract_err_addr(struct mce *m) { } static inline void smca_bsp_init(void) { } #endif @@ -317,6 +319,7 @@ static __always_inline void winchip_machine_check(struct pt_regs *regs) {} #endif noinstr u64 mce_rdmsrq(u32 msr); +noinstr void mce_wrmsrq(u32 msr, u64 v); static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg) { -- Gitee