diff options
author | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2021-07-27 12:48:17 +0200 |
---|---|---|
committer | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2021-07-27 12:48:17 +0200 |
commit | ca31fef11dc83e672415d5925a134749761329bd (patch) | |
tree | 8eb6a489e2d6dd117300f40ed8fc945a06bb6eee /drivers/edac | |
parent | drm/plane: Move drm_plane_enable_fb_damage_clips into core (diff) | |
parent | efi: sysfb_efi: fix build when EFI is not set (diff) | |
download | linux-ca31fef11dc83e672415d5925a134749761329bd.tar.xz linux-ca31fef11dc83e672415d5925a134749761329bd.zip |
Backmerge remote-tracking branch 'drm/drm-next' into drm-misc-next
Required bump from v5.13-rc3 to v5.14-rc3, and to pick up sysfb compilation fixes.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 3 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 1 | ||||
-rw-r--r-- | drivers/edac/aspeed_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 174 | ||||
-rw-r--r-- | drivers/edac/igen6_edac.c | 374 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 70 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/skx_base.c | 3 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 82 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 34 | ||||
-rw-r--r-- | drivers/edac/thunderx_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/ti_edac.c | 1 |
13 files changed, 696 insertions, 60 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 1e836e320edd..2fc4c3f91fd5 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -270,7 +270,8 @@ config EDAC_PND2 config EDAC_IGEN6 tristate "Intel client SoC Integrated MC" - depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG + depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG + depends on X86_64 && X86_MCE_INTEL help Support for error detection and correction on the Intel client SoC Integrated Memory Controller using In-Band ECC IP. diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 5f7fd79ec82f..61c21bd880a4 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -20,6 +20,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/panic_notifier.h> #include <linux/platform_device.h> #include <linux/regmap.h> #include <linux/types.h> diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c index a46da56d6d54..6bd5f8815919 100644 --- a/drivers/edac/aspeed_edac.c +++ b/drivers/edac/aspeed_edac.c @@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci) return rc; } - dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n", - r.start, resource_size(&r), PAGE_SHIFT); + dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n", + &r, PAGE_SHIFT); csrow->first_page = r.start >> PAGE_SHIFT; nr_pages = resource_size(&r) >> PAGE_SHIFT; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 238a4ad1e526..6ce0ed2ffaaf 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -13,7 +13,7 @@ #include "edac_module.h" #include "skx_common.h" -#define I10NM_REVISION "v0.0.4" +#define I10NM_REVISION "v0.0.5" #define EDAC_MOD_STR "i10nm_edac" /* Debug macros */ @@ -24,19 +24,39 @@ pci_read_config_dword((d)->uracu, 0xd0, &(reg)) #define I10NM_GET_IMC_BAR(d, i, reg) \ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) +#define I10NM_GET_SAD(d, offset, i, reg)\ + pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) +#define I10NM_GET_HBM_IMC_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd4, &(reg)) +#define I10NM_GET_CAPID3_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4) + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i, j) \ - readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4) + readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCMTR(m, i) \ - readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz) + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz) + readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ GET_BITFIELD(reg, 0, 10) + 1) << 12) +#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ + ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) + +#define I10NM_HBM_IMC_MMIO_SIZE 0x9000 +#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) +#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) + +#define I10NM_MAX_SAD 16 +#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) +#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) static struct list_head *i10nm_edac_list; @@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, return pdev; } -static int i10nm_get_all_munits(void) +static bool i10nm_check_2lm(struct res_config *cfg) +{ + struct skx_dev *d; + u32 reg; + int i; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1], + PCI_SLOT(cfg->sad_all_devfn), + PCI_FUNC(cfg->sad_all_devfn)); + if (!d->sad_all) + continue; + + for (i = 0; i < I10NM_MAX_SAD; i++) { + I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); + if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { + edac_dbg(2, "2-level memory configuration.\n"); + return true; + } + } + } + + return false; +} + +static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; void __iomem *mbase; @@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void) edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", j++, base, reg); - for (i = 0; i < I10NM_NUM_IMC; i++) { + for (i = 0; i < I10NM_NUM_DDR_IMC; i++) { mdev = pci_get_dev_wrapper(d->seg, d->bus[0], 12 + i, 0); if (i == 0 && !mdev) { @@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void) return 0; } +static bool i10nm_check_hbm_imc(struct skx_dev *d) +{ + u32 reg; + + if (I10NM_GET_CAPID3_CFG(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n"); + return false; + } + + return I10NM_IS_HBM_PRESENT(reg) != 0; +} + +static int i10nm_get_hbm_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + u32 reg, off, mcmtr; + struct skx_dev *d; + int i, lmc; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3); + if (!d->pcu_cr3) + return -ENODEV; + + if (!i10nm_check_hbm_imc(d)) { + i10nm_printk(KERN_DEBUG, "No hbm memory\n"); + return -ENODEV; + } + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); + return -ENODEV; + } + base = I10NM_GET_SCK_MMIO_BASE(reg); + + if (I10NM_GET_HBM_IMC_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); + return -ENODEV; + } + base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); + + lmc = I10NM_NUM_DDR_IMC; + + for (i = 0; i < I10NM_NUM_HBM_IMC; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[0], + 12 + i / 4, 1 + i % 4); + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No hbm mc found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + d->imc[lmc].mdev = mdev; + off = i * I10NM_HBM_IMC_MMIO_SIZE; + + edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n", + lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); + + mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); + if (!mbase) { + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", + base + off); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].hbm_mc = true; + + mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); + if (!I10NM_IS_HBM_IMC(mcmtr)) { + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); + return -ENODEV; + } + + lmc++; + } + } + + return 0; +} + static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xcc, .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, }; static struct res_config i10nm_cfg1 = { @@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = { .decs_did = 0x3452, .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, }; static struct res_config spr_cfg = { @@ -146,7 +279,10 @@ static struct res_config spr_cfg = { .decs_did = 0x3252, .busno_cfg_offset = 0xd0, .ddr_chan_mmio_sz = 0x8000, + .hbm_chan_mmio_sz = 0x4000, .support_ddr5 = true, + .sad_all_devfn = PCI_DEVFN(10, 0), + .sad_all_offset = 0x300, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, struct dimm_info *dimm; int i, j, ndimms; - for (i = 0; i < I10NM_NUM_CHANNELS; i++) { + for (i = 0; i < imc->num_channels; i++) { if (!imc->mbase) continue; ndimms = 0; amap = I10NM_GET_AMAP(imc, i); - for (j = 0; j < I10NM_NUM_DIMMS; j++) { + for (j = 0; j < imc->num_dimms; j++) { dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j); @@ -278,6 +414,9 @@ static int __init i10nm_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(i10nm_cpuids); if (!id) return -ENODEV; @@ -296,8 +435,11 @@ static int __init i10nm_init(void) return -ENODEV; } - rc = i10nm_get_all_munits(); - if (rc < 0) + skx_set_mem_cfg(i10nm_check_2lm(cfg)); + + rc = i10nm_get_ddr_munits(); + + if (i10nm_get_hbm_munits() && rc) goto fail; list_for_each_entry(d, i10nm_edac_list, list) { @@ -318,7 +460,15 @@ static int __init i10nm_init(void) d->imc[i].lmc = i; d->imc[i].src_id = src_id; d->imc[i].node_id = node_id; - d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + if (d->imc[i].hbm_mc) { + d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS; + } else { + d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS; + } rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, "Intel_10nm Socket", EDAC_MOD_STR, diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 6be9986fc6bd..a07bbfd075d0 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -22,11 +22,12 @@ #include <linux/io.h> #include <asm/mach_traps.h> #include <asm/nmi.h> +#include <asm/mce.h> #include "edac_mc.h" #include "edac_module.h" -#define IGEN6_REVISION "v2.4" +#define IGEN6_REVISION "v2.5" #define EDAC_MOD_STR "igen6_edac" #define IGEN6_NMI_NAME "igen6_ibecc" @@ -40,7 +41,7 @@ #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) -#define NUM_IMC 1 /* Max memory controllers */ +#define NUM_IMC 2 /* Max memory controllers */ #define NUM_CHANNELS 2 /* Max channels */ #define NUM_DIMMS 2 /* Max DIMMs per channel */ @@ -54,6 +55,10 @@ #define CAPID_C_OFFSET 0xec #define CAPID_C_IBECC BIT(15) +/* Capability register E */ +#define CAPID_E_OFFSET 0xf0 +#define CAPID_E_IBECC BIT(12) + /* Error Status */ #define ERRSTS_OFFSET 0xc8 #define ERRSTS_CE BIT_ULL(6) @@ -70,7 +75,7 @@ #define IBECC_ACTIVATE_EN BIT(0) /* IBECC error log */ -#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170) +#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) #define ECC_ERROR_LOG_CE BIT_ULL(62) #define ECC_ERROR_LOG_UE BIT_ULL(63) #define ECC_ERROR_LOG_ADDR_SHIFT 5 @@ -84,39 +89,54 @@ #define MCHBAR_SIZE 0x10000 /* Parameters for the channel decode stage */ -#define MAD_INTER_CHANNEL_OFFSET 0x5000 +#define IMC_BASE (res_cfg->imc_base) +#define MAD_INTER_CHANNEL_OFFSET IMC_BASE #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) /* Parameters for DRAM decode stage */ -#define MAD_INTRA_CH0_OFFSET 0x5004 +#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) /* DIMM characteristics */ -#define MAD_DIMM_CH0_OFFSET 0x500c +#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) +/* Hash for memory controller selection */ +#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) +#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) + /* Hash for channel selection */ -#define CHANNEL_HASH_OFFSET 0X5024 +#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) /* Hash for enhanced channel selection */ -#define CHANNEL_EHASH_OFFSET 0X5028 +#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) +/* Parameters for memory slice decode stage */ +#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) +#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) + static struct res_config { + bool machine_check; int num_imc; + u32 imc_base; + u32 cmf_base; + u32 cmf_size; + u32 ms_hash_offset; u32 ibecc_base; + u32 ibecc_error_log_offset; bool (*ibecc_available)(struct pci_dev *pdev); /* Convert error address logged in IBECC to system physical address */ - u64 (*err_addr_to_sys_addr)(u64 eaddr); + u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); /* Convert error address logged in IBECC to integrated memory controller address */ - u64 (*err_addr_to_imc_addr)(u64 eaddr); + u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); } *res_cfg; struct igen6_imc { @@ -125,6 +145,7 @@ struct igen6_imc { struct pci_dev *pdev; struct device dev; void __iomem *window; + u64 size; u64 ch_s_size; int ch_l_map; u64 dimm_s_size[NUM_CHANNELS]; @@ -134,6 +155,9 @@ struct igen6_imc { static struct igen6_pvt { struct igen6_imc imc[NUM_IMC]; + u64 ms_hash; + u64 ms_s_size; + int ms_l_map; } *igen6_pvt; /* The top of low usable DRAM */ @@ -183,6 +207,21 @@ static struct work_struct ecclog_work; #define DID_EHL_SKU14 0x4534 #define DID_EHL_SKU15 0x4536 +/* Compute die IDs for ICL-NNPI with IBECC */ +#define DID_ICL_SKU8 0x4581 +#define DID_ICL_SKU10 0x4585 +#define DID_ICL_SKU11 0x4589 +#define DID_ICL_SKU12 0x458d + +/* Compute die IDs for Tiger Lake with IBECC */ +#define DID_TGL_SKU 0x9a14 + +/* Compute die IDs for Alder Lake with IBECC */ +#define DID_ADL_SKU1 0x4601 +#define DID_ADL_SKU2 0x4602 +#define DID_ADL_SKU3 0x4621 +#define DID_ADL_SKU4 0x4641 + static bool ehl_ibecc_available(struct pci_dev *pdev) { u32 v; @@ -193,12 +232,12 @@ static bool ehl_ibecc_available(struct pci_dev *pdev) return !!(CAPID_C_IBECC & v); } -static u64 ehl_err_addr_to_sys_addr(u64 eaddr) +static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) { return eaddr; } -static u64 ehl_err_addr_to_imc_addr(u64 eaddr) +static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) { if (eaddr < igen6_tolud) return eaddr; @@ -212,12 +251,156 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr) return eaddr; } +static bool icl_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) + return false; + + return !(CAPID_C_IBECC & v) && + (boot_cpu_data.x86_stepping >= 1); +} + +static bool tgl_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) + return false; + + return !(CAPID_E_IBECC & v); +} + +static u64 mem_addr_to_sys_addr(u64 maddr) +{ + if (maddr < igen6_tolud) + return maddr; + + if (igen6_tom <= _4GB) + return maddr - igen6_tolud + _4GB; + + if (maddr < _4GB) + return maddr - igen6_tolud + igen6_tom; + + return maddr; +} + +static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) +{ + u64 hash_addr = addr & mask, hash = hash_init; + u64 intlv = (addr >> intlv_bit) & 1; + int i; + + for (i = 6; i < 20; i++) + hash ^= (hash_addr >> i) & 1; + + return hash ^ intlv; +} + +static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) +{ + u64 maddr, hash, mask, ms_s_size; + int intlv_bit; + u32 ms_hash; + + ms_s_size = igen6_pvt->ms_s_size; + if (eaddr >= ms_s_size) + return eaddr + ms_s_size; + + ms_hash = igen6_pvt->ms_hash; + + mask = MEM_SLICE_HASH_MASK(ms_hash); + intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; + + maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | + GET_BITFIELD(eaddr, 0, intlv_bit - 1); + + hash = mem_slice_hash(maddr, mask, mc, intlv_bit); + + return maddr | (hash << intlv_bit); +} + +static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) +{ + u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); + + return mem_addr_to_sys_addr(maddr); +} + +static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) +{ + return eaddr; +} + +static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) +{ + return mem_addr_to_sys_addr(eaddr); +} + +static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) +{ + u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; + struct igen6_imc *imc = &igen6_pvt->imc[mc]; + int intlv_bit; + u32 mc_hash; + + if (eaddr >= 2 * ms_s_size) + return eaddr - ms_s_size; + + mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); + + intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; + + imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | + GET_BITFIELD(eaddr, 0, intlv_bit - 1); + + return imc_addr; +} + static struct res_config ehl_cfg = { - .num_imc = 1, - .ibecc_base = 0xdc00, - .ibecc_available = ehl_ibecc_available, - .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, - .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, + .num_imc = 1, + .imc_base = 0x5000, + .ibecc_base = 0xdc00, + .ibecc_available = ehl_ibecc_available, + .ibecc_error_log_offset = 0x170, + .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, +}; + +static struct res_config icl_cfg = { + .num_imc = 1, + .imc_base = 0x5000, + .ibecc_base = 0xd800, + .ibecc_error_log_offset = 0x170, + .ibecc_available = icl_ibecc_available, + .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, +}; + +static struct res_config tgl_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0x5000, + .cmf_base = 0x11000, + .cmf_size = 0x800, + .ms_hash_offset = 0xac, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, +}; + +static struct res_config adl_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; static const struct pci_device_id igen6_pci_tbl[] = { @@ -232,6 +415,15 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, { }, }; MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); @@ -490,8 +682,8 @@ static void ecclog_work_cb(struct work_struct *work) eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << ECC_ERROR_LOG_ADDR_SHIFT; res.mc = node->mc; - res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr); - res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr); + res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); + res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); mci = igen6_pvt->imc[res.mc].mci; @@ -540,6 +732,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) return NMI_HANDLED; } +static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, + * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + /* + * We just use the Machine Check for the memory error notification. + * Each memory controller is associated with an IBECC instance. + * Directly read and clear the error information(error address and + * error type) on all the IBECC instances so that we know on which + * memory controller the memory error(s) occurred. + */ + if (!ecclog_handler()) + return NOTIFY_DONE; + + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ecclog_mce_dec = { + .notifier_call = ecclog_mce_handler, + .priority = MCE_PRIO_EDAC, +}; + static bool igen6_check_ecc(struct igen6_imc *imc) { u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); @@ -573,6 +816,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci) imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); + imc->size += imc->dimm_s_size[i]; + imc->size += imc->dimm_l_size[i]; ndimms = 0; for (j = 0; j < NUM_DIMMS; j++) { @@ -608,6 +853,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci) } } + edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); + return 0; } @@ -857,6 +1104,80 @@ static void igen6_unregister_mcis(void) } } +static int igen6_mem_slice_setup(u64 mchbar) +{ + struct igen6_imc *imc = &igen6_pvt->imc[0]; + u64 base = mchbar + res_cfg->cmf_base; + u32 offset = res_cfg->ms_hash_offset; + u32 size = res_cfg->cmf_size; + u64 ms_s_size, ms_hash; + void __iomem *cmf; + int ms_l_map; + + edac_dbg(2, "\n"); + + if (imc[0].size < imc[1].size) { + ms_s_size = imc[0].size; + ms_l_map = 1; + } else { + ms_s_size = imc[1].size; + ms_l_map = 0; + } + + igen6_pvt->ms_s_size = ms_s_size; + igen6_pvt->ms_l_map = ms_l_map; + + edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", + ms_s_size >> 20, ms_l_map); + + if (!size) + return 0; + + cmf = ioremap(base, size); + if (!cmf) { + igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); + return -ENODEV; + } + + ms_hash = readq(cmf + offset); + igen6_pvt->ms_hash = ms_hash; + + edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); + + iounmap(cmf); + + return 0; +} + +static int register_err_handler(void) +{ + int rc; + + if (res_cfg->machine_check) { + mce_register_decode_chain(&ecclog_mce_dec); + return 0; + } + + rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, + 0, IGEN6_NMI_NAME); + if (rc) { + igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); + return rc; + } + + return 0; +} + +static void unregister_err_handler(void) +{ + if (res_cfg->machine_check) { + mce_unregister_decode_chain(&ecclog_mce_dec); + return; + } + + unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); +} + static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u64 mchbar; @@ -880,6 +1201,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto fail2; } + if (res_cfg->num_imc > 1) { + rc = igen6_mem_slice_setup(mchbar); + if (rc) + goto fail2; + } + ecclog_pool = ecclog_gen_pool_create(); if (!ecclog_pool) { rc = -ENOMEM; @@ -892,12 +1219,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Check if any pending errors before registering the NMI handler */ ecclog_handler(); - rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, - 0, IGEN6_NMI_NAME); - if (rc) { - igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); + rc = register_err_handler(); + if (rc) goto fail3; - } /* Enable error reporting */ rc = errcmd_enable_error_reporting(true); @@ -925,7 +1249,7 @@ static void igen6_remove(struct pci_dev *pdev) igen6_debug_teardown(); errcmd_enable_error_reporting(false); - unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); + unregister_err_handler(); irq_work_sync(&ecclog_irq_work); flush_work(&ecclog_work); gen_pool_destroy(ecclog_pool); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 5dd905a3f30c..27d56920b469 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -323,6 +323,21 @@ static const char * const smca_umc_mce_desc[] = { "AES SRAM ECC error", }; +static const char * const smca_umc2_mce_desc[] = { + "DRAM ECC error", + "Data poison error", + "SDP parity error", + "Reserved", + "Address/Command parity error", + "Write data parity error", + "DCQ SRAM ECC error", + "Reserved", + "Read data parity error", + "Rdb SRAM ECC error", + "RdRsp SRAM ECC error", + "LM32 MP errors", +}; + static const char * const smca_pb_mce_desc[] = { "An ECC error in the Parameter Block RAM array", }; @@ -400,6 +415,56 @@ static const char * const smca_pcie_mce_desc[] = { "CCIX Non-okay write response with data error", }; +static const char * const smca_pcie2_mce_desc[] = { + "SDP Parity Error logging", +}; + +static const char * const smca_xgmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", + "Flow Control Acknowledge Error", + "Rx Fifo Underflow Error", + "Rx Fifo Overflow Error", + "CRC Error", + "BER Exceeded Error", + "Tx Vcid Data Error", + "Replay Buffer Parity Error", + "Data Parity Error", + "Replay Fifo Overflow Error", + "Replay Fifo Underflow Error", + "Elastic Fifo Overflow Error", + "Deskew Error", + "Flow Control CRC Error", + "Data Startup Limit Error", + "FC Init Timeout Error", + "Recovery Timeout Error", + "Ready Serial Timeout Error", + "Ready Serial Attempt Error", + "Recovery Attempt Error", + "Recovery Relock Attempt Error", + "Replay Attempt Error", + "Sync Header Error", + "Tx Replay Timeout Error", + "Rx Replay Timeout Error", + "LinkSub Tx Timeout Error", + "LinkSub Rx Timeout Error", + "Rx CMD Pocket Error", +}; + +static const char * const smca_xgmiphy_mce_desc[] = { + "RAM ECC Error", + "ARC instruction buffer parity error", + "ARC data buffer parity error", + "PHY APB error", +}; + +static const char * const smca_waflphy_mce_desc[] = { + "RAM ECC Error", + "ARC instruction buffer parity error", + "ARC data buffer parity error", + "PHY APB error", +}; + struct smca_mce_desc { const char * const *descs; unsigned int num_descs; @@ -418,6 +483,7 @@ static struct smca_mce_desc smca_mce_descs[] = { [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) }, @@ -426,6 +492,10 @@ static struct smca_mce_desc smca_mce_descs[] = { [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, + [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, + [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, + [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, }; static bool f12h_mc0_mce(u16 ec, u8 xec) diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 928f63a374c7..c94ca1f790c4 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1554,6 +1554,9 @@ static int __init pnd2_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(pnd2_cpuids); if (!id) return -ENODEV; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 93daa4297f2e..4c626fcd4dcb 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3510,6 +3510,9 @@ static int __init sbridge_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(sbridge_cpuids); if (!id) return -ENODEV; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 6a4f0b27c654..4dbd46575bfb 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -656,6 +656,9 @@ static int __init skx_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(skx_cpuids); if (!id) return -ENODEV; diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 81c3e2ec6f56..5e83f59bef8a 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -23,10 +23,13 @@ #include "skx_common.h" static const char * const component_names[] = { - [INDEX_SOCKET] = "ProcessorSocketId", - [INDEX_MEMCTRL] = "MemoryControllerId", - [INDEX_CHANNEL] = "ChannelId", - [INDEX_DIMM] = "DimmSlotId", + [INDEX_SOCKET] = "ProcessorSocketId", + [INDEX_MEMCTRL] = "MemoryControllerId", + [INDEX_CHANNEL] = "ChannelId", + [INDEX_DIMM] = "DimmSlotId", + [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", + [INDEX_NM_CHANNEL] = "NmChannelId", + [INDEX_NM_DIMM] = "NmDimmSlotId", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -34,12 +37,14 @@ static int adxl_component_count; static const char * const *adxl_component_names; static u64 *adxl_values; static char *adxl_msg; +static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; static skx_decode_f skx_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); +static bool skx_mem_cfg_2lm; int __init skx_adxl_get(void) { @@ -56,14 +61,25 @@ int __init skx_adxl_get(void) for (j = 0; names[j]; j++) { if (!strcmp(component_names[i], names[j])) { component_indices[i] = j; + + if (i >= INDEX_NM_FIRST) + adxl_nm_bitmap |= 1 << i; + break; } } - if (!names[j]) + if (!names[j] && i < INDEX_NM_FIRST) goto err; } + if (skx_mem_cfg_2lm) { + if (!adxl_nm_bitmap) + skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); + else + edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); + } + adxl_component_names = names; while (*names++) adxl_component_count++; @@ -99,7 +115,7 @@ void __exit skx_adxl_put(void) kfree(adxl_msg); } -static bool skx_adxl_decode(struct decoded_addr *res) +static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) { struct skx_dev *d; int i, len = 0; @@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res) } res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; - res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; - res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + if (error_in_1st_level_mem) { + res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? + (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; + res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? + (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; + res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? + (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + } else { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + } - if (res->imc > NUM_IMC - 1) { + if (res->imc > NUM_IMC - 1 || res->imc < 0) { skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); return false; } @@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res) return true; } +void skx_set_mem_cfg(bool mem_cfg_2lm) +{ + skx_mem_cfg_2lm = mem_cfg_2lm; +} + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { skx_decode = decode; @@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, ranks = numrank(mtr); rows = numrow(mtr); - cols = numcol(mtr); + cols = imc->hbm_mc ? 6 : numcol(mtr); - if (cfg->support_ddr5 && (amap & 0x8)) { + if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) { banks = 32; mtype = MEM_DDR5; } else { @@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, dimm->dtype = get_width(mtr); dimm->mtype = mtype; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", - imc->src_id, imc->lmc, chan, dimmno); + + if (imc->hbm_mc) + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", + imc->src_id, imc->lmc, chan); + else + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); return 1; } @@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } +static bool skx_error_in_1st_level_mem(const struct mce *m) +{ + u32 errcode; + + if (!skx_mem_cfg_2lm) + return false; + + errcode = GET_BITFIELD(m->status, 0, 15); + + if ((errcode & 0xef80) != 0x280) + return false; + + return true; +} + int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { @@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, res.addr = mce->addr; if (adxl_component_count) { - if (!skx_adxl_decode(&res)) + if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) return NOTIFY_DONE; } else if (!skx_decode || !skx_decode(&res)) { return NOTIFY_DONE; @@ -658,6 +708,8 @@ void skx_remove(void) } if (d->util_all) pci_dev_put(d->util_all); + if (d->pcu_cr3) + pci_dev_put(d->pcu_cr3); if (d->sad_all) pci_dev_put(d->sad_all); if (d->uracu) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index bf56bebff138..01f67e731766 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -9,6 +9,8 @@ #ifndef _SKX_COMM_EDAC_H #define _SKX_COMM_EDAC_H +#include <linux/bits.h> + #define MSG_SIZE 1024 /* @@ -30,9 +32,17 @@ #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ -#define I10NM_NUM_IMC 4 -#define I10NM_NUM_CHANNELS 2 -#define I10NM_NUM_DIMMS 2 +#define I10NM_NUM_DDR_IMC 4 +#define I10NM_NUM_DDR_CHANNELS 2 +#define I10NM_NUM_DDR_DIMMS 2 + +#define I10NM_NUM_HBM_IMC 16 +#define I10NM_NUM_HBM_CHANNELS 2 +#define I10NM_NUM_HBM_DIMMS 1 + +#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) +#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) +#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) @@ -54,12 +64,16 @@ struct skx_dev { struct pci_dev *sad_all; struct pci_dev *util_all; struct pci_dev *uracu; /* for i10nm CPU */ + struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ void __iomem *mbase; /* for i10nm CPU */ int chan_mmio_sz; /* for i10nm CPU */ + int num_channels; /* channels per memory controller */ + int num_dimms; /* dimms per channel */ + bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ u8 src_id, node_id; @@ -92,9 +106,17 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_NM_FIRST, + INDEX_NM_MEMCTRL = INDEX_NM_FIRST, + INDEX_NM_CHANNEL, + INDEX_NM_DIMM, INDEX_MAX }; +#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) +#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) +#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) + struct decoded_addr { struct skx_dev *dev; u64 addr; @@ -122,7 +144,12 @@ struct res_config { int busno_cfg_offset; /* Per DDR channel memory-mapped I/O size */ int ddr_chan_mmio_sz; + /* Per HBM channel memory-mapped I/O size */ + int hbm_chan_mmio_sz; bool support_ddr5; + /* SAD device number and function number */ + unsigned int sad_all_devfn; + int sad_all_offset; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, @@ -133,6 +160,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le int __init skx_adxl_get(void); void __exit skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); +void skx_set_mem_cfg(bool mem_cfg_2lm); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index 0eb5eb97fd74..f13674081cb6 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev, name, 1, "CCPI", 1, 0, NULL, 0, idx); if (!edac_dev) { - dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); return -ENOMEM; } ocx = edac_dev->pvt_info; @@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev, ocx->regs = pcim_iomap_table(pdev)[0]; if (!ocx->regs) { - dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + dev_err(&pdev->dev, "Cannot map PCI resources\n"); ret = -ENODEV; goto err_free; } diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c index e7eae20f83d1..169f96e51c29 100644 --- a/drivers/edac/ti_edac.c +++ b/drivers/edac/ti_edac.c @@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = { { .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 }, {}, }; +MODULE_DEVICE_TABLE(of, ti_edac_of_match); static int _emif_get_id(struct device_node *node) { |