summaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorMaarten Lankhorst <maarten.lankhorst@linux.intel.com>2021-07-27 12:48:17 +0200
committerMaarten Lankhorst <maarten.lankhorst@linux.intel.com>2021-07-27 12:48:17 +0200
commitca31fef11dc83e672415d5925a134749761329bd (patch)
tree8eb6a489e2d6dd117300f40ed8fc945a06bb6eee /drivers/edac
parentdrm/plane: Move drm_plane_enable_fb_damage_clips into core (diff)
parentefi: sysfb_efi: fix build when EFI is not set (diff)
downloadlinux-ca31fef11dc83e672415d5925a134749761329bd.tar.xz
linux-ca31fef11dc83e672415d5925a134749761329bd.zip
Backmerge remote-tracking branch 'drm/drm-next' into drm-misc-next
Required bump from v5.13-rc3 to v5.14-rc3, and to pick up sysfb compilation fixes. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig3
-rw-r--r--drivers/edac/altera_edac.c1
-rw-r--r--drivers/edac/aspeed_edac.c4
-rw-r--r--drivers/edac/i10nm_base.c174
-rw-r--r--drivers/edac/igen6_edac.c374
-rw-r--r--drivers/edac/mce_amd.c70
-rw-r--r--drivers/edac/pnd2_edac.c3
-rw-r--r--drivers/edac/sb_edac.c3
-rw-r--r--drivers/edac/skx_base.c3
-rw-r--r--drivers/edac/skx_common.c82
-rw-r--r--drivers/edac/skx_common.h34
-rw-r--r--drivers/edac/thunderx_edac.c4
-rw-r--r--drivers/edac/ti_edac.c1
13 files changed, 696 insertions, 60 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 1e836e320edd..2fc4c3f91fd5 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -270,7 +270,8 @@ config EDAC_PND2
config EDAC_IGEN6
tristate "Intel client SoC Integrated MC"
- depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
+ depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
+ depends on X86_64 && X86_MCE_INTEL
help
Support for error detection and correction on the Intel
client SoC Integrated Memory Controller using In-Band ECC IP.
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 5f7fd79ec82f..61c21bd880a4 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -20,6 +20,7 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
+#include <linux/panic_notifier.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/types.h>
diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index a46da56d6d54..6bd5f8815919 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci)
return rc;
}
- dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
- r.start, resource_size(&r), PAGE_SHIFT);
+ dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n",
+ &r, PAGE_SHIFT);
csrow->first_page = r.start >> PAGE_SHIFT;
nr_pages = resource_size(&r) >> PAGE_SHIFT;
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 238a4ad1e526..6ce0ed2ffaaf 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -13,7 +13,7 @@
#include "edac_module.h"
#include "skx_common.h"
-#define I10NM_REVISION "v0.0.4"
+#define I10NM_REVISION "v0.0.5"
#define EDAC_MOD_STR "i10nm_edac"
/* Debug macros */
@@ -24,19 +24,39 @@
pci_read_config_dword((d)->uracu, 0xd0, &(reg))
#define I10NM_GET_IMC_BAR(d, i, reg) \
pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_SAD(d, offset, i, reg)\
+ pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
+#define I10NM_GET_HBM_IMC_BAR(d, reg) \
+ pci_read_config_dword((d)->uracu, 0xd4, &(reg))
+#define I10NM_GET_CAPID3_CFG(d, reg) \
+ pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
#define I10NM_GET_DIMMMTR(m, i, j) \
- readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4)
+ readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
+ (i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCDDRTCFG(m, i, j) \
- readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4)
+ readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
+ (i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCMTR(m, i) \
- readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz)
+ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
+ (i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i) \
- readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz)
+ readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
+ (i) * (m)->chan_mmio_sz)
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
GET_BITFIELD(reg, 0, 10) + 1) << 12)
+#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
+ ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
+
+#define I10NM_HBM_IMC_MMIO_SIZE 0x9000
+#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
+#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
+
+#define I10NM_MAX_SAD 16
+#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
+#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
static struct list_head *i10nm_edac_list;
@@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
return pdev;
}
-static int i10nm_get_all_munits(void)
+static bool i10nm_check_2lm(struct res_config *cfg)
+{
+ struct skx_dev *d;
+ u32 reg;
+ int i;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
+ PCI_SLOT(cfg->sad_all_devfn),
+ PCI_FUNC(cfg->sad_all_devfn));
+ if (!d->sad_all)
+ continue;
+
+ for (i = 0; i < I10NM_MAX_SAD; i++) {
+ I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
+ if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
+ edac_dbg(2, "2-level memory configuration.\n");
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int i10nm_get_ddr_munits(void)
{
struct pci_dev *mdev;
void __iomem *mbase;
@@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void)
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
j++, base, reg);
- for (i = 0; i < I10NM_NUM_IMC; i++) {
+ for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
12 + i, 0);
if (i == 0 && !mdev) {
@@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void)
return 0;
}
+static bool i10nm_check_hbm_imc(struct skx_dev *d)
+{
+ u32 reg;
+
+ if (I10NM_GET_CAPID3_CFG(d, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
+ return false;
+ }
+
+ return I10NM_IS_HBM_PRESENT(reg) != 0;
+}
+
+static int i10nm_get_hbm_munits(void)
+{
+ struct pci_dev *mdev;
+ void __iomem *mbase;
+ u32 reg, off, mcmtr;
+ struct skx_dev *d;
+ int i, lmc;
+ u64 base;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
+ if (!d->pcu_cr3)
+ return -ENODEV;
+
+ if (!i10nm_check_hbm_imc(d)) {
+ i10nm_printk(KERN_DEBUG, "No hbm memory\n");
+ return -ENODEV;
+ }
+
+ if (I10NM_GET_SCK_BAR(d, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
+ return -ENODEV;
+ }
+ base = I10NM_GET_SCK_MMIO_BASE(reg);
+
+ if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
+ return -ENODEV;
+ }
+ base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
+
+ lmc = I10NM_NUM_DDR_IMC;
+
+ for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
+ mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+ 12 + i / 4, 1 + i % 4);
+ if (i == 0 && !mdev) {
+ i10nm_printk(KERN_ERR, "No hbm mc found\n");
+ return -ENODEV;
+ }
+ if (!mdev)
+ continue;
+
+ d->imc[lmc].mdev = mdev;
+ off = i * I10NM_HBM_IMC_MMIO_SIZE;
+
+ edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
+ lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
+
+ mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
+ if (!mbase) {
+ i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
+ base + off);
+ return -ENOMEM;
+ }
+
+ d->imc[lmc].mbase = mbase;
+ d->imc[lmc].hbm_mc = true;
+
+ mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
+ if (!I10NM_IS_HBM_IMC(mcmtr)) {
+ i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
+ return -ENODEV;
+ }
+
+ lmc++;
+ }
+ }
+
+ return 0;
+}
+
static struct res_config i10nm_cfg0 = {
.type = I10NM,
.decs_did = 0x3452,
.busno_cfg_offset = 0xcc,
.ddr_chan_mmio_sz = 0x4000,
+ .sad_all_devfn = PCI_DEVFN(29, 0),
+ .sad_all_offset = 0x108,
};
static struct res_config i10nm_cfg1 = {
@@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = {
.decs_did = 0x3452,
.busno_cfg_offset = 0xd0,
.ddr_chan_mmio_sz = 0x4000,
+ .sad_all_devfn = PCI_DEVFN(29, 0),
+ .sad_all_offset = 0x108,
};
static struct res_config spr_cfg = {
@@ -146,7 +279,10 @@ static struct res_config spr_cfg = {
.decs_did = 0x3252,
.busno_cfg_offset = 0xd0,
.ddr_chan_mmio_sz = 0x8000,
+ .hbm_chan_mmio_sz = 0x4000,
.support_ddr5 = true,
+ .sad_all_devfn = PCI_DEVFN(10, 0),
+ .sad_all_offset = 0x300,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
struct dimm_info *dimm;
int i, j, ndimms;
- for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+ for (i = 0; i < imc->num_channels; i++) {
if (!imc->mbase)
continue;
ndimms = 0;
amap = I10NM_GET_AMAP(imc, i);
- for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+ for (j = 0; j < imc->num_dimms; j++) {
dimm = edac_get_dimm(mci, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j);
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
@@ -278,6 +414,9 @@ static int __init i10nm_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
id = x86_match_cpu(i10nm_cpuids);
if (!id)
return -ENODEV;
@@ -296,8 +435,11 @@ static int __init i10nm_init(void)
return -ENODEV;
}
- rc = i10nm_get_all_munits();
- if (rc < 0)
+ skx_set_mem_cfg(i10nm_check_2lm(cfg));
+
+ rc = i10nm_get_ddr_munits();
+
+ if (i10nm_get_hbm_munits() && rc)
goto fail;
list_for_each_entry(d, i10nm_edac_list, list) {
@@ -318,7 +460,15 @@ static int __init i10nm_init(void)
d->imc[i].lmc = i;
d->imc[i].src_id = src_id;
d->imc[i].node_id = node_id;
- d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+ if (d->imc[i].hbm_mc) {
+ d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
+ d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
+ d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
+ } else {
+ d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+ d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
+ d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
+ }
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
"Intel_10nm Socket", EDAC_MOD_STR,
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 6be9986fc6bd..a07bbfd075d0 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -22,11 +22,12 @@
#include <linux/io.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
+#include <asm/mce.h>
#include "edac_mc.h"
#include "edac_module.h"
-#define IGEN6_REVISION "v2.4"
+#define IGEN6_REVISION "v2.5"
#define EDAC_MOD_STR "igen6_edac"
#define IGEN6_NMI_NAME "igen6_ibecc"
@@ -40,7 +41,7 @@
#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
-#define NUM_IMC 1 /* Max memory controllers */
+#define NUM_IMC 2 /* Max memory controllers */
#define NUM_CHANNELS 2 /* Max channels */
#define NUM_DIMMS 2 /* Max DIMMs per channel */
@@ -54,6 +55,10 @@
#define CAPID_C_OFFSET 0xec
#define CAPID_C_IBECC BIT(15)
+/* Capability register E */
+#define CAPID_E_OFFSET 0xf0
+#define CAPID_E_IBECC BIT(12)
+
/* Error Status */
#define ERRSTS_OFFSET 0xc8
#define ERRSTS_CE BIT_ULL(6)
@@ -70,7 +75,7 @@
#define IBECC_ACTIVATE_EN BIT(0)
/* IBECC error log */
-#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170)
+#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
#define ECC_ERROR_LOG_CE BIT_ULL(62)
#define ECC_ERROR_LOG_UE BIT_ULL(63)
#define ECC_ERROR_LOG_ADDR_SHIFT 5
@@ -84,39 +89,54 @@
#define MCHBAR_SIZE 0x10000
/* Parameters for the channel decode stage */
-#define MAD_INTER_CHANNEL_OFFSET 0x5000
+#define IMC_BASE (res_cfg->imc_base)
+#define MAD_INTER_CHANNEL_OFFSET IMC_BASE
#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
/* Parameters for DRAM decode stage */
-#define MAD_INTRA_CH0_OFFSET 0x5004
+#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
/* DIMM characteristics */
-#define MAD_DIMM_CH0_OFFSET 0x500c
+#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
+/* Hash for memory controller selection */
+#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
+#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
+
/* Hash for channel selection */
-#define CHANNEL_HASH_OFFSET 0X5024
+#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
/* Hash for enhanced channel selection */
-#define CHANNEL_EHASH_OFFSET 0X5028
+#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
+/* Parameters for memory slice decode stage */
+#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
+#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
+
static struct res_config {
+ bool machine_check;
int num_imc;
+ u32 imc_base;
+ u32 cmf_base;
+ u32 cmf_size;
+ u32 ms_hash_offset;
u32 ibecc_base;
+ u32 ibecc_error_log_offset;
bool (*ibecc_available)(struct pci_dev *pdev);
/* Convert error address logged in IBECC to system physical address */
- u64 (*err_addr_to_sys_addr)(u64 eaddr);
+ u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
/* Convert error address logged in IBECC to integrated memory controller address */
- u64 (*err_addr_to_imc_addr)(u64 eaddr);
+ u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
} *res_cfg;
struct igen6_imc {
@@ -125,6 +145,7 @@ struct igen6_imc {
struct pci_dev *pdev;
struct device dev;
void __iomem *window;
+ u64 size;
u64 ch_s_size;
int ch_l_map;
u64 dimm_s_size[NUM_CHANNELS];
@@ -134,6 +155,9 @@ struct igen6_imc {
static struct igen6_pvt {
struct igen6_imc imc[NUM_IMC];
+ u64 ms_hash;
+ u64 ms_s_size;
+ int ms_l_map;
} *igen6_pvt;
/* The top of low usable DRAM */
@@ -183,6 +207,21 @@ static struct work_struct ecclog_work;
#define DID_EHL_SKU14 0x4534
#define DID_EHL_SKU15 0x4536
+/* Compute die IDs for ICL-NNPI with IBECC */
+#define DID_ICL_SKU8 0x4581
+#define DID_ICL_SKU10 0x4585
+#define DID_ICL_SKU11 0x4589
+#define DID_ICL_SKU12 0x458d
+
+/* Compute die IDs for Tiger Lake with IBECC */
+#define DID_TGL_SKU 0x9a14
+
+/* Compute die IDs for Alder Lake with IBECC */
+#define DID_ADL_SKU1 0x4601
+#define DID_ADL_SKU2 0x4602
+#define DID_ADL_SKU3 0x4621
+#define DID_ADL_SKU4 0x4641
+
static bool ehl_ibecc_available(struct pci_dev *pdev)
{
u32 v;
@@ -193,12 +232,12 @@ static bool ehl_ibecc_available(struct pci_dev *pdev)
return !!(CAPID_C_IBECC & v);
}
-static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
+static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
{
return eaddr;
}
-static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
+static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
{
if (eaddr < igen6_tolud)
return eaddr;
@@ -212,12 +251,156 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
return eaddr;
}
+static bool icl_ibecc_available(struct pci_dev *pdev)
+{
+ u32 v;
+
+ if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
+ return false;
+
+ return !(CAPID_C_IBECC & v) &&
+ (boot_cpu_data.x86_stepping >= 1);
+}
+
+static bool tgl_ibecc_available(struct pci_dev *pdev)
+{
+ u32 v;
+
+ if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
+ return false;
+
+ return !(CAPID_E_IBECC & v);
+}
+
+static u64 mem_addr_to_sys_addr(u64 maddr)
+{
+ if (maddr < igen6_tolud)
+ return maddr;
+
+ if (igen6_tom <= _4GB)
+ return maddr - igen6_tolud + _4GB;
+
+ if (maddr < _4GB)
+ return maddr - igen6_tolud + igen6_tom;
+
+ return maddr;
+}
+
+static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
+{
+ u64 hash_addr = addr & mask, hash = hash_init;
+ u64 intlv = (addr >> intlv_bit) & 1;
+ int i;
+
+ for (i = 6; i < 20; i++)
+ hash ^= (hash_addr >> i) & 1;
+
+ return hash ^ intlv;
+}
+
+static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
+{
+ u64 maddr, hash, mask, ms_s_size;
+ int intlv_bit;
+ u32 ms_hash;
+
+ ms_s_size = igen6_pvt->ms_s_size;
+ if (eaddr >= ms_s_size)
+ return eaddr + ms_s_size;
+
+ ms_hash = igen6_pvt->ms_hash;
+
+ mask = MEM_SLICE_HASH_MASK(ms_hash);
+ intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
+
+ maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
+ GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+ hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
+
+ return maddr | (hash << intlv_bit);
+}
+
+static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+ u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
+
+ return mem_addr_to_sys_addr(maddr);
+}
+
+static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
+{
+ return eaddr;
+}
+
+static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+ return mem_addr_to_sys_addr(eaddr);
+}
+
+static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
+{
+ u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
+ struct igen6_imc *imc = &igen6_pvt->imc[mc];
+ int intlv_bit;
+ u32 mc_hash;
+
+ if (eaddr >= 2 * ms_s_size)
+ return eaddr - ms_s_size;
+
+ mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
+
+ intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
+
+ imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
+ GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+ return imc_addr;
+}
+
static struct res_config ehl_cfg = {
- .num_imc = 1,
- .ibecc_base = 0xdc00,
- .ibecc_available = ehl_ibecc_available,
- .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
- .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+ .num_imc = 1,
+ .imc_base = 0x5000,
+ .ibecc_base = 0xdc00,
+ .ibecc_available = ehl_ibecc_available,
+ .ibecc_error_log_offset = 0x170,
+ .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+};
+
+static struct res_config icl_cfg = {
+ .num_imc = 1,
+ .imc_base = 0x5000,
+ .ibecc_base = 0xd800,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = icl_ibecc_available,
+ .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+};
+
+static struct res_config tgl_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0x5000,
+ .cmf_base = 0x11000,
+ .cmf_size = 0x800,
+ .ms_hash_offset = 0xac,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
+};
+
+static struct res_config adl_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x68,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static const struct pci_device_id igen6_pci_tbl[] = {
@@ -232,6 +415,15 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
{ },
};
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -490,8 +682,8 @@ static void ecclog_work_cb(struct work_struct *work)
eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
ECC_ERROR_LOG_ADDR_SHIFT;
res.mc = node->mc;
- res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
- res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
+ res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
+ res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
mci = igen6_pvt->imc[res.mc].mci;
@@ -540,6 +732,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
return NMI_HANDLED;
}
+static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ char *type;
+
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
+ /*
+ * Ignore unless this is a memory related error.
+ * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
+ * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
+ */
+ if ((mce->status & 0xefff) >> 7 != 1)
+ return NOTIFY_DONE;
+
+ if (mce->mcgstatus & MCG_STATUS_MCIP)
+ type = "Exception";
+ else
+ type = "Event";
+
+ edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
+ mce->extcpu, type, mce->mcgstatus,
+ mce->bank, mce->status);
+ edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
+ edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
+ edac_dbg(0, "MISC 0x%llx\n", mce->misc);
+ edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
+ mce->cpuvendor, mce->cpuid, mce->time,
+ mce->socketid, mce->apicid);
+ /*
+ * We just use the Machine Check for the memory error notification.
+ * Each memory controller is associated with an IBECC instance.
+ * Directly read and clear the error information(error address and
+ * error type) on all the IBECC instances so that we know on which
+ * memory controller the memory error(s) occurred.
+ */
+ if (!ecclog_handler())
+ return NOTIFY_DONE;
+
+ mce->kflags |= MCE_HANDLED_EDAC;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ecclog_mce_dec = {
+ .notifier_call = ecclog_mce_handler,
+ .priority = MCE_PRIO_EDAC,
+};
+
static bool igen6_check_ecc(struct igen6_imc *imc)
{
u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
@@ -573,6 +816,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
+ imc->size += imc->dimm_s_size[i];
+ imc->size += imc->dimm_l_size[i];
ndimms = 0;
for (j = 0; j < NUM_DIMMS; j++) {
@@ -608,6 +853,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
}
}
+ edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
+
return 0;
}
@@ -857,6 +1104,80 @@ static void igen6_unregister_mcis(void)
}
}
+static int igen6_mem_slice_setup(u64 mchbar)
+{
+ struct igen6_imc *imc = &igen6_pvt->imc[0];
+ u64 base = mchbar + res_cfg->cmf_base;
+ u32 offset = res_cfg->ms_hash_offset;
+ u32 size = res_cfg->cmf_size;
+ u64 ms_s_size, ms_hash;
+ void __iomem *cmf;
+ int ms_l_map;
+
+ edac_dbg(2, "\n");
+
+ if (imc[0].size < imc[1].size) {
+ ms_s_size = imc[0].size;
+ ms_l_map = 1;
+ } else {
+ ms_s_size = imc[1].size;
+ ms_l_map = 0;
+ }
+
+ igen6_pvt->ms_s_size = ms_s_size;
+ igen6_pvt->ms_l_map = ms_l_map;
+
+ edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
+ ms_s_size >> 20, ms_l_map);
+
+ if (!size)
+ return 0;
+
+ cmf = ioremap(base, size);
+ if (!cmf) {
+ igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
+ return -ENODEV;
+ }
+
+ ms_hash = readq(cmf + offset);
+ igen6_pvt->ms_hash = ms_hash;
+
+ edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
+
+ iounmap(cmf);
+
+ return 0;
+}
+
+static int register_err_handler(void)
+{
+ int rc;
+
+ if (res_cfg->machine_check) {
+ mce_register_decode_chain(&ecclog_mce_dec);
+ return 0;
+ }
+
+ rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
+ 0, IGEN6_NMI_NAME);
+ if (rc) {
+ igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void unregister_err_handler(void)
+{
+ if (res_cfg->machine_check) {
+ mce_unregister_decode_chain(&ecclog_mce_dec);
+ return;
+ }
+
+ unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+}
+
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
@@ -880,6 +1201,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto fail2;
}
+ if (res_cfg->num_imc > 1) {
+ rc = igen6_mem_slice_setup(mchbar);
+ if (rc)
+ goto fail2;
+ }
+
ecclog_pool = ecclog_gen_pool_create();
if (!ecclog_pool) {
rc = -ENOMEM;
@@ -892,12 +1219,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Check if any pending errors before registering the NMI handler */
ecclog_handler();
- rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
- 0, IGEN6_NMI_NAME);
- if (rc) {
- igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+ rc = register_err_handler();
+ if (rc)
goto fail3;
- }
/* Enable error reporting */
rc = errcmd_enable_error_reporting(true);
@@ -925,7 +1249,7 @@ static void igen6_remove(struct pci_dev *pdev)
igen6_debug_teardown();
errcmd_enable_error_reporting(false);
- unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+ unregister_err_handler();
irq_work_sync(&ecclog_irq_work);
flush_work(&ecclog_work);
gen_pool_destroy(ecclog_pool);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 5dd905a3f30c..27d56920b469 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -323,6 +323,21 @@ static const char * const smca_umc_mce_desc[] = {
"AES SRAM ECC error",
};
+static const char * const smca_umc2_mce_desc[] = {
+ "DRAM ECC error",
+ "Data poison error",
+ "SDP parity error",
+ "Reserved",
+ "Address/Command parity error",
+ "Write data parity error",
+ "DCQ SRAM ECC error",
+ "Reserved",
+ "Read data parity error",
+ "Rdb SRAM ECC error",
+ "RdRsp SRAM ECC error",
+ "LM32 MP errors",
+};
+
static const char * const smca_pb_mce_desc[] = {
"An ECC error in the Parameter Block RAM array",
};
@@ -400,6 +415,56 @@ static const char * const smca_pcie_mce_desc[] = {
"CCIX Non-okay write response with data error",
};
+static const char * const smca_pcie2_mce_desc[] = {
+ "SDP Parity Error logging",
+};
+
+static const char * const smca_xgmipcs_mce_desc[] = {
+ "Data Loss Error",
+ "Training Error",
+ "Flow Control Acknowledge Error",
+ "Rx Fifo Underflow Error",
+ "Rx Fifo Overflow Error",
+ "CRC Error",
+ "BER Exceeded Error",
+ "Tx Vcid Data Error",
+ "Replay Buffer Parity Error",
+ "Data Parity Error",
+ "Replay Fifo Overflow Error",
+ "Replay Fifo Underflow Error",
+ "Elastic Fifo Overflow Error",
+ "Deskew Error",
+ "Flow Control CRC Error",
+ "Data Startup Limit Error",
+ "FC Init Timeout Error",
+ "Recovery Timeout Error",
+ "Ready Serial Timeout Error",
+ "Ready Serial Attempt Error",
+ "Recovery Attempt Error",
+ "Recovery Relock Attempt Error",
+ "Replay Attempt Error",
+ "Sync Header Error",
+ "Tx Replay Timeout Error",
+ "Rx Replay Timeout Error",
+ "LinkSub Tx Timeout Error",
+ "LinkSub Rx Timeout Error",
+ "Rx CMD Pocket Error",
+};
+
+static const char * const smca_xgmiphy_mce_desc[] = {
+ "RAM ECC Error",
+ "ARC instruction buffer parity error",
+ "ARC data buffer parity error",
+ "PHY APB error",
+};
+
+static const char * const smca_waflphy_mce_desc[] = {
+ "RAM ECC Error",
+ "ARC instruction buffer parity error",
+ "ARC data buffer parity error",
+ "PHY APB error",
+};
+
struct smca_mce_desc {
const char * const *descs;
unsigned int num_descs;
@@ -418,6 +483,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
+ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
@@ -426,6 +492,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
+ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
+ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
+ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
+ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
};
static bool f12h_mc0_mce(u16 ec, u8 xec)
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 928f63a374c7..c94ca1f790c4 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1554,6 +1554,9 @@ static int __init pnd2_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
id = x86_match_cpu(pnd2_cpuids);
if (!id)
return -ENODEV;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 93daa4297f2e..4c626fcd4dcb 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3510,6 +3510,9 @@ static int __init sbridge_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
id = x86_match_cpu(sbridge_cpuids);
if (!id)
return -ENODEV;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 6a4f0b27c654..4dbd46575bfb 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -656,6 +656,9 @@ static int __init skx_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
id = x86_match_cpu(skx_cpuids);
if (!id)
return -ENODEV;
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 81c3e2ec6f56..5e83f59bef8a 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -23,10 +23,13 @@
#include "skx_common.h"
static const char * const component_names[] = {
- [INDEX_SOCKET] = "ProcessorSocketId",
- [INDEX_MEMCTRL] = "MemoryControllerId",
- [INDEX_CHANNEL] = "ChannelId",
- [INDEX_DIMM] = "DimmSlotId",
+ [INDEX_SOCKET] = "ProcessorSocketId",
+ [INDEX_MEMCTRL] = "MemoryControllerId",
+ [INDEX_CHANNEL] = "ChannelId",
+ [INDEX_DIMM] = "DimmSlotId",
+ [INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
+ [INDEX_NM_CHANNEL] = "NmChannelId",
+ [INDEX_NM_DIMM] = "NmDimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
@@ -34,12 +37,14 @@ static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
+static unsigned long adxl_nm_bitmap;
static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
+static bool skx_mem_cfg_2lm;
int __init skx_adxl_get(void)
{
@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
+
+ if (i >= INDEX_NM_FIRST)
+ adxl_nm_bitmap |= 1 << i;
+
break;
}
}
- if (!names[j])
+ if (!names[j] && i < INDEX_NM_FIRST)
goto err;
}
+ if (skx_mem_cfg_2lm) {
+ if (!adxl_nm_bitmap)
+ skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
+ else
+ edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
+ }
+
adxl_component_names = names;
while (*names++)
adxl_component_count++;
@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
kfree(adxl_msg);
}
-static bool skx_adxl_decode(struct decoded_addr *res)
+static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
{
struct skx_dev *d;
int i, len = 0;
@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
- res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
- res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
- res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+ if (error_in_1st_level_mem) {
+ res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
+ (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
+ res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
+ (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
+ res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
+ (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
+ } else {
+ res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+ res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+ res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+ }
- if (res->imc > NUM_IMC - 1) {
+ if (res->imc > NUM_IMC - 1 || res->imc < 0) {
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
return false;
}
@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
return true;
}
+void skx_set_mem_cfg(bool mem_cfg_2lm)
+{
+ skx_mem_cfg_2lm = mem_cfg_2lm;
+}
+
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
skx_decode = decode;
@@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
ranks = numrank(mtr);
rows = numrow(mtr);
- cols = numcol(mtr);
+ cols = imc->hbm_mc ? 6 : numcol(mtr);
- if (cfg->support_ddr5 && (amap & 0x8)) {
+ if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
banks = 32;
mtype = MEM_DDR5;
} else {
@@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
dimm->dtype = get_width(mtr);
dimm->mtype = mtype;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
- snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
- imc->src_id, imc->lmc, chan, dimmno);
+
+ if (imc->hbm_mc)
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
+ imc->src_id, imc->lmc, chan);
+ else
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+ imc->src_id, imc->lmc, chan, dimmno);
return 1;
}
@@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
optype, skx_msg);
}
+static bool skx_error_in_1st_level_mem(const struct mce *m)
+{
+ u32 errcode;
+
+ if (!skx_mem_cfg_2lm)
+ return false;
+
+ errcode = GET_BITFIELD(m->status, 0, 15);
+
+ if ((errcode & 0xef80) != 0x280)
+ return false;
+
+ return true;
+}
+
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
res.addr = mce->addr;
if (adxl_component_count) {
- if (!skx_adxl_decode(&res))
+ if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
return NOTIFY_DONE;
} else if (!skx_decode || !skx_decode(&res)) {
return NOTIFY_DONE;
@@ -658,6 +708,8 @@ void skx_remove(void)
}
if (d->util_all)
pci_dev_put(d->util_all);
+ if (d->pcu_cr3)
+ pci_dev_put(d->pcu_cr3);
if (d->sad_all)
pci_dev_put(d->sad_all);
if (d->uracu)
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index bf56bebff138..01f67e731766 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -9,6 +9,8 @@
#ifndef _SKX_COMM_EDAC_H
#define _SKX_COMM_EDAC_H
+#include <linux/bits.h>
+
#define MSG_SIZE 1024
/*
@@ -30,9 +32,17 @@
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
-#define I10NM_NUM_IMC 4
-#define I10NM_NUM_CHANNELS 2
-#define I10NM_NUM_DIMMS 2
+#define I10NM_NUM_DDR_IMC 4
+#define I10NM_NUM_DDR_CHANNELS 2
+#define I10NM_NUM_DDR_DIMMS 2
+
+#define I10NM_NUM_HBM_IMC 16
+#define I10NM_NUM_HBM_CHANNELS 2
+#define I10NM_NUM_HBM_DIMMS 1
+
+#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
+#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
+#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
@@ -54,12 +64,16 @@ struct skx_dev {
struct pci_dev *sad_all;
struct pci_dev *util_all;
struct pci_dev *uracu; /* for i10nm CPU */
+ struct pci_dev *pcu_cr3; /* for HBM memory detection */
u32 mcroute;
struct skx_imc {
struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */
void __iomem *mbase; /* for i10nm CPU */
int chan_mmio_sz; /* for i10nm CPU */
+ int num_channels; /* channels per memory controller */
+ int num_dimms; /* dimms per channel */
+ bool hbm_mc;
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
u8 src_id, node_id;
@@ -92,9 +106,17 @@ enum {
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
+ INDEX_NM_FIRST,
+ INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
+ INDEX_NM_CHANNEL,
+ INDEX_NM_DIMM,
INDEX_MAX
};
+#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
+#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
+#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
+
struct decoded_addr {
struct skx_dev *dev;
u64 addr;
@@ -122,7 +144,12 @@ struct res_config {
int busno_cfg_offset;
/* Per DDR channel memory-mapped I/O size */
int ddr_chan_mmio_sz;
+ /* Per HBM channel memory-mapped I/O size */
+ int hbm_chan_mmio_sz;
bool support_ddr5;
+ /* SAD device number and function number */
+ unsigned int sad_all_devfn;
+ int sad_all_offset;
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
@@ -133,6 +160,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le
int __init skx_adxl_get(void);
void __exit skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
+void skx_set_mem_cfg(bool mem_cfg_2lm);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
int skx_get_node_id(struct skx_dev *d, u8 *id);
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 0eb5eb97fd74..f13674081cb6 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
name, 1, "CCPI", 1,
0, NULL, 0, idx);
if (!edac_dev) {
- dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
+ dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
return -ENOMEM;
}
ocx = edac_dev->pvt_info;
@@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
ocx->regs = pcim_iomap_table(pdev)[0];
if (!ocx->regs) {
- dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+ dev_err(&pdev->dev, "Cannot map PCI resources\n");
ret = -ENODEV;
goto err_free;
}
diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index e7eae20f83d1..169f96e51c29 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = {
{ .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 },
{},
};
+MODULE_DEVICE_TABLE(of, ti_edac_of_match);
static int _emif_get_id(struct device_node *node)
{