diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-19 21:00:10 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-19 21:00:10 +0100 |
commit | 77286b868f93a590313b449b25a49ef2ad0c0308 (patch) | |
tree | 091b77c0ec73e90cbd2d1ac38d576bc860de2e11 /drivers/edac | |
parent | Merge tag 'kcsan-20241112-v6.13-rc1' of git://git.kernel.org/pub/scm/linux/ke... (diff) | |
parent | Merge branch 'edac-misc' into edac-updates (diff) | |
download | linux-77286b868f93a590313b449b25a49ef2ad0c0308.tar.xz linux-77286b868f93a590313b449b25a49ef2ad0c0308.zip |
Merge tag 'edac_updates_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
- Add support for Bluefield-2 SOCs to bluefield_edac
- Add support for Intel Panther Lake-H to igen6_edac
- Add polling support to igen6_edac as some Intel M100 chips have
trouble with error interrupts
- Add Kaby Lake-S support to ie31200_edac
- Fix memory source detection in the SKX common module which is used by
a couple of Intel EDAC drivers
- Add support for the NXP i.MX9 memory controller to fsl_edac
- The usual fixes and cleanups all over the place
* tag 'edac_updates_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
EDAC/igen6: Add polling support
EDAC/igen6: Initialize edac_op_state according to the configuration data
EDAC/igen6: Avoid segmentation fault on module unload
EDAC/ie31200: Add Kaby Lake-S dual-core host bridge ID
MAINTAINERS: Change FSL DDR EDAC maintainership
EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator
EDAC/skx_common: Differentiate memory error sources
EDAC/fsl_ddr: Add support for i.MX9 DDR controller
dt-bindings: memory: fsl: Add compatible string nxp,imx9-memory-controller
EDAC/fsl_ddr: Fix bad bit shift operations
EDAC/fsl_ddr: Move global variables into struct fsl_mc_pdata
EDAC/fsl_ddr: Pass down fsl_mc_pdata in ddr_in32() and ddr_out32()
RAS/AMD/ATL: Add debug prints for DF register reads
EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2
EDAC/bluefield: Fix potential integer overflow
EDAC/igen6: Add Intel Panther Lake-H SoCs support
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/bluefield_edac.c | 170 | ||||
-rw-r--r-- | drivers/edac/fsl_ddr_edac.c | 141 | ||||
-rw-r--r-- | drivers/edac/fsl_ddr_edac.h | 13 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 1 | ||||
-rw-r--r-- | drivers/edac/ie31200_edac.c | 8 | ||||
-rw-r--r-- | drivers/edac/igen6_edac.c | 49 | ||||
-rw-r--r-- | drivers/edac/layerscape_edac.c | 1 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 57 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 8 |
9 files changed, 356 insertions, 92 deletions
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c index 5b3164560648..739132e5ed8a 100644 --- a/drivers/edac/bluefield_edac.c +++ b/drivers/edac/bluefield_edac.c @@ -47,13 +47,22 @@ #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 #define MLXBF_EDAC_ERROR_GRAIN 8 +#define MLXBF_WRITE_REG_32 (0x82000009) +#define MLXBF_READ_REG_32 (0x8200000A) +#define MLXBF_SIP_SVC_VERSION (0x8200ff03) + +#define MLXBF_SMCCC_ACCESS_VIOLATION (-4) + +#define MLXBF_SVC_REQ_MAJOR 0 +#define MLXBF_SVC_REQ_MINOR 3 + /* - * Request MLNX_SIP_GET_DIMM_INFO + * Request MLXBF_SIP_GET_DIMM_INFO * * Retrieve information about DIMM on a certain slot. * * Call register usage: - * a0: MLNX_SIP_GET_DIMM_INFO + * a0: MLXBF_SIP_GET_DIMM_INFO * a1: (Memory controller index) << 16 | (Dimm index in memory controller) * a2-7: not used. * @@ -61,7 +70,7 @@ * a0: MLXBF_DIMM_INFO defined below describing the DIMM. * a1-3: not used. */ -#define MLNX_SIP_GET_DIMM_INFO 0x82000008 +#define MLXBF_SIP_GET_DIMM_INFO 0x82000008 /* Format for the SMC response about the memory information */ #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) @@ -72,9 +81,15 @@ #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) struct bluefield_edac_priv { + /* pointer to device structure */ + struct device *dev; int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; void __iomem *emi_base; int dimm_per_mc; + /* access to secure regs supported */ + bool svc_sreg_support; + /* SMC table# for secure regs access */ + u32 sreg_tbl; }; static u64 smc_call1(u64 smc_op, u64 smc_arg) @@ -86,6 +101,71 @@ static u64 smc_call1(u64 smc_op, u64 smc_arg) return res.a0; } +static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, + 0, 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + + *result = (u32)res.a1; + return 0; +} + +static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, + 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + else + return 0; +} + +static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_readl(addr, result, priv->sreg_tbl); + else + *result = readl(addr); + + return err; +} + +static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_writel(addr, data, priv->sreg_tbl); + else + writel(data, addr); + + return err; +} + /* * Gather the ECC information from the External Memory Interface registers * and report it to the edac handler. @@ -99,7 +179,7 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; enum hw_event_mc_err_type ecc_type; u64 ecc_dimm_addr; - int ecc_dimm; + int ecc_dimm, err; ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : HW_EVENT_ERR_UNCORRECTED; @@ -109,14 +189,19 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, * registers with information about the last ECC error occurrence. */ ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; - writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); + err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); + if (err) + dev_err(priv->dev, "ECC latch select write failed.\n"); /* * Verify that the ECC reported info in the registers is of the * same type as the one asked to report. If not, just report the * error without the detailed information. */ - dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); + err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); + if (err) + dev_err(priv->dev, "DRAM syndrom read failed.\n"); + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); @@ -127,13 +212,21 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, return; } - dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); + err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); + if (err) + dev_err(priv->dev, "DRAM additional info read failed.\n"); + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; - edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); - edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); + if (err) + dev_err(priv->dev, "Error addr 0 read failed.\n"); + + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); + if (err) + dev_err(priv->dev, "Error addr 1 read failed.\n"); ecc_dimm_addr = ((u64)edea1 << 32) | edea0; @@ -147,6 +240,7 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) { struct bluefield_edac_priv *priv = mci->pvt_info; u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + int err; /* * The memory controller might not be initialized by the firmware @@ -155,7 +249,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) if (mci->edac_cap == EDAC_FLAG_NONE) return; - ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); + err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); + if (err) + dev_err(priv->dev, "ECC count read failed.\n"); + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); @@ -172,15 +269,18 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) } /* Write to clear reported errors. */ - if (ecc_count) - writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); + if (ecc_count) { + err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); + if (err) + dev_err(priv->dev, "ECC Error write failed.\n"); + } } /* Initialize the DIMMs information for the given memory controller. */ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) { struct bluefield_edac_priv *priv = mci->pvt_info; - int mem_ctrl_idx = mci->mc_idx; + u64 mem_ctrl_idx = mci->mc_idx; struct dimm_info *dimm; u64 smc_info, smc_arg; int is_empty = 1, i; @@ -189,7 +289,7 @@ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) dimm = mci->dimms[i]; smc_arg = mem_ctrl_idx << 16 | i; - smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); + smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { dimm->mtype = MEM_EMPTY; @@ -244,6 +344,7 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev) struct bluefield_edac_priv *priv; struct device *dev = &pdev->dev; struct edac_mc_layer layers[1]; + struct arm_smccc_res res; struct mem_ctl_info *mci; struct resource *emi_res; unsigned int mc_idx, dimm_count; @@ -279,13 +380,43 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev) return -ENOMEM; priv = mci->pvt_info; + priv->dev = dev; + + /* + * The "sec_reg_block" property in the ACPI table determines the method + * the driver uses to access the EMI registers: + * a) property is not present - directly access registers via readl/writel + * b) property is present - indirectly access registers via SMC calls + * (assuming required Silicon Provider service version found) + */ + if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { + priv->svc_sreg_support = false; + } else { + /* + * Check for minimum required Arm Silicon Provider (SiP) service + * version, ensuring support of required SMC function IDs. + */ + arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 == MLXBF_SVC_REQ_MAJOR && + res.a1 >= MLXBF_SVC_REQ_MINOR) { + priv->svc_sreg_support = true; + } else { + dev_err(dev, "Required SMCs are not supported.\n"); + ret = -EINVAL; + goto err; + } + } priv->dimm_per_mc = dimm_count; - priv->emi_base = devm_ioremap_resource(dev, emi_res); - if (IS_ERR(priv->emi_base)) { - dev_err(dev, "failed to map EMI IO resource\n"); - ret = PTR_ERR(priv->emi_base); - goto err; + if (!priv->svc_sreg_support) { + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + } else { + priv->emi_base = (void __iomem *)emi_res->start; } mci->pdev = dev; @@ -320,7 +451,6 @@ err: edac_mc_free(mci); return ret; - } static void bluefield_edac_mc_remove(struct platform_device *pdev) diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c index d148d262d0d4..e4eaec0aa81d 100644 --- a/drivers/edac/fsl_ddr_edac.c +++ b/drivers/edac/fsl_ddr_edac.c @@ -31,18 +31,30 @@ static int edac_mc_idx; -static u32 orig_ddr_err_disable; -static u32 orig_ddr_err_sbe; -static bool little_endian; +static inline void __iomem *ddr_reg_addr(struct fsl_mc_pdata *pdata, unsigned int off) +{ + if (pdata->flag == TYPE_IMX9 && off >= FSL_MC_DATA_ERR_INJECT_HI && off <= FSL_MC_ERR_SBE) + return pdata->inject_vbase + off - FSL_MC_DATA_ERR_INJECT_HI + + IMX9_MC_DATA_ERR_INJECT_OFF; + + if (pdata->flag == TYPE_IMX9 && off >= IMX9_MC_ERR_EN) + return pdata->inject_vbase + off - IMX9_MC_ERR_EN; -static inline u32 ddr_in32(void __iomem *addr) + return pdata->mc_vbase + off; +} + +static inline u32 ddr_in32(struct fsl_mc_pdata *pdata, unsigned int off) { - return little_endian ? ioread32(addr) : ioread32be(addr); + void __iomem *addr = ddr_reg_addr(pdata, off); + + return pdata->little_endian ? ioread32(addr) : ioread32be(addr); } -static inline void ddr_out32(void __iomem *addr, u32 value) +static inline void ddr_out32(struct fsl_mc_pdata *pdata, unsigned int off, u32 value) { - if (little_endian) + void __iomem *addr = ddr_reg_addr(pdata, off); + + if (pdata->little_endian) iowrite32(value, addr); else iowrite32be(value, addr); @@ -60,7 +72,7 @@ static ssize_t fsl_mc_inject_data_hi_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_HI)); } static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, @@ -70,7 +82,7 @@ static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_LO)); } static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, @@ -80,7 +92,7 @@ static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT)); + ddr_in32(pdata, FSL_MC_ECC_ERR_INJECT)); } static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, @@ -97,7 +109,7 @@ static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_HI, val); return count; } return 0; @@ -117,7 +129,7 @@ static ssize_t fsl_mc_inject_data_lo_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_LO, val); return count; } return 0; @@ -137,7 +149,7 @@ static ssize_t fsl_mc_inject_ctrl_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val); + ddr_out32(pdata, FSL_MC_ECC_ERR_INJECT, val); return count; } return 0; @@ -286,7 +298,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) int bad_data_bit; int bad_ecc_bit; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return; @@ -295,14 +307,14 @@ static void fsl_mc_check(struct mem_ctl_info *mci) /* no more processing if not ECC bit errors */ if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); return; } - syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC); + syndrome = ddr_in32(pdata, FSL_MC_CAPTURE_ECC); /* Mask off appropriate bits of syndrome based on bus width */ - bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) & + bus_width = (ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG) & DSC_DBW_MASK) ? 32 : 64; if (bus_width == 64) syndrome &= 0xff; @@ -310,8 +322,8 @@ static void fsl_mc_check(struct mem_ctl_info *mci) syndrome &= 0xffff; err_addr = make64( - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS), - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS)); + ddr_in32(pdata, FSL_MC_CAPTURE_EXT_ADDRESS), + ddr_in32(pdata, FSL_MC_CAPTURE_ADDRESS)); pfn = err_addr >> PAGE_SHIFT; for (row_index = 0; row_index < mci->nr_csrows; row_index++) { @@ -320,29 +332,33 @@ static void fsl_mc_check(struct mem_ctl_info *mci) break; } - cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI); - cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO); + cap_high = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_HI); + cap_low = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_LO); /* * Analyze single-bit errors on 64-bit wide buses * TODO: Add support for 32-bit wide buses */ if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + u64 cap = (u64)cap_high << 32 | cap_low; + u32 s = syndrome; + sbe_ecc_decode(cap_high, cap_low, syndrome, &bad_data_bit, &bad_ecc_bit); - if (bad_data_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty Data bit: %d\n", bad_data_bit); - if (bad_ecc_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty ECC bit: %d\n", bad_ecc_bit); + if (bad_data_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty Data bit: %d\n", bad_data_bit); + cap ^= 1ULL << bad_data_bit; + } + + if (bad_ecc_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty ECC bit: %d\n", bad_ecc_bit); + s ^= 1 << bad_ecc_bit; + } fsl_mc_printk(mci, KERN_ERR, "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high ^ (1 << (bad_data_bit - 32)), - cap_low ^ (1 << bad_data_bit), - syndrome ^ (1 << bad_ecc_bit)); + upper_32_bits(cap), lower_32_bits(cap), s); } fsl_mc_printk(mci, KERN_ERR, @@ -367,7 +383,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) row_index, 0, -1, mci->ctl_name, ""); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); } static irqreturn_t fsl_mc_isr(int irq, void *dev_id) @@ -376,7 +392,7 @@ static irqreturn_t fsl_mc_isr(int irq, void *dev_id) struct fsl_mc_pdata *pdata = mci->pvt_info; u32 err_detect; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return IRQ_NONE; @@ -396,7 +412,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) u32 cs_bnds; int index; - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); sdtype = sdram_ctl & DSC_SDTYPE_MASK; if (sdram_ctl & DSC_RD_EN) { @@ -431,6 +447,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) case 0x05000000: mtype = MEM_DDR4; break; + case 0x04000000: + mtype = MEM_LPDDR4; + break; default: mtype = MEM_UNKNOWN; break; @@ -444,7 +463,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) csrow = mci->csrows[index]; dimm = csrow->channels[0]->dimm; - cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 + + cs_bnds = ddr_in32(pdata, FSL_MC_CS_BNDS_0 + (index * FSL_MC_CS_BNDS_OFS)); start = (cs_bnds & 0xffff0000) >> 16; @@ -464,7 +483,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) dimm->grain = 8; dimm->mtype = mtype; dimm->dtype = DEV_UNKNOWN; - if (sdram_ctl & DSC_X32_EN) + if (pdata->flag == TYPE_IMX9) + dimm->dtype = DEV_X16; + else if (sdram_ctl & DSC_X32_EN) dimm->dtype = DEV_X32; dimm->edac_mode = EDAC_SECDED; } @@ -476,6 +497,7 @@ int fsl_mc_err_probe(struct platform_device *op) struct edac_mc_layer layers[2]; struct fsl_mc_pdata *pdata; struct resource r; + u32 ecc_en_mask; u32 sdram_ctl; int res; @@ -503,11 +525,13 @@ int fsl_mc_err_probe(struct platform_device *op) mci->ctl_name = pdata->name; mci->dev_name = pdata->name; + pdata->flag = (unsigned long)device_get_match_data(&op->dev); + /* * Get the endianness of DDR controller registers. * Default is big endian. */ - little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); + pdata->little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); res = of_address_to_resource(op->dev.of_node, 0, &r); if (res) { @@ -531,8 +555,23 @@ int fsl_mc_err_probe(struct platform_device *op) goto err; } - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); - if (!(sdram_ctl & DSC_ECC_EN)) { + if (pdata->flag == TYPE_IMX9) { + pdata->inject_vbase = devm_platform_ioremap_resource_byname(op, "inject"); + if (IS_ERR(pdata->inject_vbase)) { + res = -ENOMEM; + goto err; + } + } + + if (pdata->flag == TYPE_IMX9) { + sdram_ctl = ddr_in32(pdata, IMX9_MC_ERR_EN); + ecc_en_mask = ERR_ECC_EN | ERR_INLINE_ECC; + } else { + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); + ecc_en_mask = DSC_ECC_EN; + } + + if ((sdram_ctl & ecc_en_mask) != ecc_en_mask) { /* no ECC */ pr_warn("%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; @@ -543,7 +582,8 @@ int fsl_mc_err_probe(struct platform_device *op) mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR | MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 | MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 | - MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LPDDR4; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; @@ -558,11 +598,11 @@ int fsl_mc_err_probe(struct platform_device *op) fsl_ddr_init_csrows(mci); /* store the original error disable bits */ - orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0); + pdata->orig_ddr_err_disable = ddr_in32(pdata, FSL_MC_ERR_DISABLE); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, 0); /* clear all error bits */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0); + ddr_out32(pdata, FSL_MC_ERR_DETECT, ~0); res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups); if (res) { @@ -571,15 +611,15 @@ int fsl_mc_err_probe(struct platform_device *op) } if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, + ddr_out32(pdata, FSL_MC_ERR_INT_EN, DDR_EIE_MBEE | DDR_EIE_SBEE); /* store the original error management threshold */ - orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase + - FSL_MC_ERR_SBE) & 0xff0000; + pdata->orig_ddr_err_sbe = ddr_in32(pdata, + FSL_MC_ERR_SBE) & 0xff0000; /* set threshold to 1 error per interrupt */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000); + ddr_out32(pdata, FSL_MC_ERR_SBE, 0x10000); /* register interrupts */ pdata->irq = platform_get_irq(op, 0); @@ -620,12 +660,13 @@ void fsl_mc_err_remove(struct platform_device *op) edac_dbg(0, "\n"); if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0); + ddr_out32(pdata, FSL_MC_ERR_INT_EN, 0); } - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, - orig_ddr_err_disable); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, + pdata->orig_ddr_err_disable); + ddr_out32(pdata, FSL_MC_ERR_SBE, pdata->orig_ddr_err_sbe); + edac_mc_del_mc(&op->dev); edac_mc_free(mci); diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h index c0994a2a003c..73618f79e587 100644 --- a/drivers/edac/fsl_ddr_edac.h +++ b/drivers/edac/fsl_ddr_edac.h @@ -39,6 +39,9 @@ #define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54 #define FSL_MC_ERR_SBE 0x0e58 +#define IMX9_MC_ERR_EN 0x1000 +#define IMX9_MC_DATA_ERR_INJECT_OFF 0x100 + #define DSC_MEM_EN 0x80000000 #define DSC_ECC_EN 0x20000000 #define DSC_RD_EN 0x10000000 @@ -46,6 +49,9 @@ #define DSC_DBW_32 0x00080000 #define DSC_DBW_64 0x00000000 +#define ERR_ECC_EN 0x80000000 +#define ERR_INLINE_ECC 0x40000000 + #define DSC_SDTYPE_MASK 0x07000000 #define DSC_X32_EN 0x00000020 @@ -65,11 +71,18 @@ #define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ #define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ +#define TYPE_IMX9 0x1 /* MC used by iMX9 having registers changed */ + struct fsl_mc_pdata { char *name; int edac_idx; void __iomem *mc_vbase; + void __iomem *inject_vbase; int irq; + u32 orig_ddr_err_disable; + u32 orig_ddr_err_sbe; + bool little_endian; + unsigned long flag; }; int fsl_mc_err_probe(struct platform_device *op); void fsl_mc_err_remove(struct platform_device *op); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index e2a954de913b..51556c72a967 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1036,6 +1036,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 9ef13570f2e5..4fc16922dc1a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -19,7 +19,8 @@ * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers - * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 590f: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 5918: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers @@ -67,7 +68,8 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F #define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 #define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F -#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x590f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_12 0x5918 /* Coffee Lake-S */ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 @@ -88,6 +90,7 @@ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) @@ -587,6 +590,7 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 189a2fc29e74..fdf3a84fe698 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -263,6 +263,11 @@ static struct work_struct ecclog_work; #define DID_ARL_UH_SKU2 0x7d20 #define DID_ARL_UH_SKU3 0x7d30 +/* Compute die IDs for Panther Lake-H with IBECC */ +#define DID_PTL_H_SKU1 0xb000 +#define DID_PTL_H_SKU2 0xb001 +#define DID_PTL_H_SKU3 0xb002 + static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) { union { @@ -605,6 +610,9 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, { }, }; MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); @@ -1170,6 +1178,20 @@ fail: return -ENODEV; } +static void igen6_check(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u64 ecclog; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + return; + + if (!ecclog_gen_pool_add(imc->mc, ecclog)) + irq_work_queue(&ecclog_irq_work); +} + static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) { struct edac_mc_layer layers[2]; @@ -1211,6 +1233,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; mci->dev_name = pci_name(pdev); + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = igen6_check; mci->pvt_info = &igen6_pvt->imc[mc]; imc = mci->pvt_info; @@ -1245,6 +1269,7 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) imc->mci = mci; return 0; fail3: + mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: edac_mc_free(mci); @@ -1269,6 +1294,7 @@ static void igen6_unregister_mcis(void) edac_mc_del_mc(mci->pdev); kfree(mci->ctl_name); + mci->pvt_info = NULL; edac_mc_free(mci); iounmap(imc->window); } @@ -1348,6 +1374,25 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } +static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) +{ + /* + * Quirk: Certain SoCs' error reporting interrupts don't work. + * Force polling mode for them to ensure that memory error + * events can be handled. + */ + if (ent->device == DID_ADL_N_SKU4) { + edac_op_state = EDAC_OPSTATE_POLL; + return; + } + + /* Set the mode according to the configuration data. */ + if (cfg->machine_check) + edac_op_state = EDAC_OPSTATE_INT; + else + edac_op_state = EDAC_OPSTATE_NMI; +} + static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u64 mchbar; @@ -1365,6 +1410,8 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto fail; + opstate_set(res_cfg, ent); + for (i = 0; i < res_cfg->num_imc; i++) { rc = igen6_register_mci(i, mchbar, pdev); if (rc) @@ -1448,8 +1495,6 @@ static int __init igen6_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; - edac_op_state = EDAC_OPSTATE_NMI; - rc = pci_register_driver(&igen6_driver); if (rc) return rc; diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c index 0d42c1238908..9a0c92ebbc3c 100644 --- a/drivers/edac/layerscape_edac.c +++ b/drivers/edac/layerscape_edac.c @@ -21,6 +21,7 @@ static const struct of_device_id fsl_ddr_mc_err_of_match[] = { { .compatible = "fsl,qoriq-memory-controller", }, + { .compatible = "nxp,imx9-memory-controller", .data = (void *)TYPE_IMX9, }, {}, }; MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 85713646957b..6cf17af7d911 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -47,6 +47,7 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; int skx_adxl_get(void) { @@ -119,7 +120,7 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); -static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) +static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; int i, len = 0; @@ -135,8 +136,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me return false; } + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - if (error_in_1st_level_mem) { + if (err_src == ERR_SRC_2LM_NM) { res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? @@ -191,6 +208,12 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) } EXPORT_SYMBOL_GPL(skx_set_mem_cfg); +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { driver_decode = decode; @@ -620,31 +643,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } -static bool skx_error_in_1st_level_mem(const struct mce *m) +static enum error_source skx_error_source(const struct mce *m) { - u32 errcode; + u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if (!skx_mem_cfg_2lm) - return false; - - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - - return errcode == MCACOD_EXT_MEM_ERR; -} + if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) + return ERR_SRC_NOT_MEMORY; -static bool skx_error_in_mem(const struct mce *m) -{ - u32 errcode; + if (!skx_mem_cfg_2lm) + return ERR_SRC_1LM; - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + if (errcode == MCACOD_EXT_MEM_ERR) + return ERR_SRC_2LM_NM; - return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); + return ERR_SRC_2LM_FM; } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; + enum error_source err_src; struct decoded_addr res; struct mem_ctl_info *mci; char *type; @@ -652,8 +671,10 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; + err_src = skx_error_source(mce); + /* Ignore unless this is memory related with an address */ - if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) + if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); @@ -667,7 +688,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, /* Try driver decoder first */ if (!(driver_decode && driver_decode(&res))) { /* Then try firmware decoder (ACPI DSM methods) */ - if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) + if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) return NOTIFY_DONE; } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index f945c1bf5ca4..54bba8a62f72 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -146,6 +146,13 @@ enum { INDEX_MAX }; +enum error_source { + ERR_SRC_1LM, + ERR_SRC_2LM_NM, + ERR_SRC_2LM_FM, + ERR_SRC_NOT_MEMORY, +}; + #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) @@ -234,6 +241,7 @@ int skx_adxl_get(void); void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); |