summaryrefslogtreecommitdiffstats
path: root/drivers/edac/skx_common.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/skx_common.c')
-rw-r--r--drivers/edac/skx_common.c82
1 files changed, 67 insertions, 15 deletions
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 81c3e2ec6f56..5e83f59bef8a 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -23,10 +23,13 @@
#include "skx_common.h"
static const char * const component_names[] = {
- [INDEX_SOCKET] = "ProcessorSocketId",
- [INDEX_MEMCTRL] = "MemoryControllerId",
- [INDEX_CHANNEL] = "ChannelId",
- [INDEX_DIMM] = "DimmSlotId",
+ [INDEX_SOCKET] = "ProcessorSocketId",
+ [INDEX_MEMCTRL] = "MemoryControllerId",
+ [INDEX_CHANNEL] = "ChannelId",
+ [INDEX_DIMM] = "DimmSlotId",
+ [INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
+ [INDEX_NM_CHANNEL] = "NmChannelId",
+ [INDEX_NM_DIMM] = "NmDimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
@@ -34,12 +37,14 @@ static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
+static unsigned long adxl_nm_bitmap;
static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
+static bool skx_mem_cfg_2lm;
int __init skx_adxl_get(void)
{
@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
+
+ if (i >= INDEX_NM_FIRST)
+ adxl_nm_bitmap |= 1 << i;
+
break;
}
}
- if (!names[j])
+ if (!names[j] && i < INDEX_NM_FIRST)
goto err;
}
+ if (skx_mem_cfg_2lm) {
+ if (!adxl_nm_bitmap)
+ skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
+ else
+ edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
+ }
+
adxl_component_names = names;
while (*names++)
adxl_component_count++;
@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
kfree(adxl_msg);
}
-static bool skx_adxl_decode(struct decoded_addr *res)
+static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
{
struct skx_dev *d;
int i, len = 0;
@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
- res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
- res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
- res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+ if (error_in_1st_level_mem) {
+ res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
+ (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
+ res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
+ (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
+ res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
+ (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
+ } else {
+ res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+ res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+ res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+ }
- if (res->imc > NUM_IMC - 1) {
+ if (res->imc > NUM_IMC - 1 || res->imc < 0) {
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
return false;
}
@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
return true;
}
+void skx_set_mem_cfg(bool mem_cfg_2lm)
+{
+ skx_mem_cfg_2lm = mem_cfg_2lm;
+}
+
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
skx_decode = decode;
@@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
ranks = numrank(mtr);
rows = numrow(mtr);
- cols = numcol(mtr);
+ cols = imc->hbm_mc ? 6 : numcol(mtr);
- if (cfg->support_ddr5 && (amap & 0x8)) {
+ if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
banks = 32;
mtype = MEM_DDR5;
} else {
@@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
dimm->dtype = get_width(mtr);
dimm->mtype = mtype;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
- snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
- imc->src_id, imc->lmc, chan, dimmno);
+
+ if (imc->hbm_mc)
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
+ imc->src_id, imc->lmc, chan);
+ else
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+ imc->src_id, imc->lmc, chan, dimmno);
return 1;
}
@@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
optype, skx_msg);
}
+static bool skx_error_in_1st_level_mem(const struct mce *m)
+{
+ u32 errcode;
+
+ if (!skx_mem_cfg_2lm)
+ return false;
+
+ errcode = GET_BITFIELD(m->status, 0, 15);
+
+ if ((errcode & 0xef80) != 0x280)
+ return false;
+
+ return true;
+}
+
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
res.addr = mce->addr;
if (adxl_component_count) {
- if (!skx_adxl_decode(&res))
+ if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
return NOTIFY_DONE;
} else if (!skx_decode || !skx_decode(&res)) {
return NOTIFY_DONE;
@@ -658,6 +708,8 @@ void skx_remove(void)
}
if (d->util_all)
pci_dev_put(d->util_all);
+ if (d->pcu_cr3)
+ pci_dev_put(d->pcu_cr3);
if (d->sad_all)
pci_dev_put(d->sad_all);
if (d->uracu)