diff options
author | Shashank Gupta <shashank.gupta@intel.com> | 2023-10-20 12:32:52 +0200 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2023-10-27 12:04:27 +0200 |
commit | 532d7f6bc458042571752168bcb5e1fdc576b8c4 (patch) | |
tree | 9a16682c85b3778ef75da42b1ae535017a0a88b3 /drivers/crypto | |
parent | crypto: qat - add handling of errors from ERRSOU3 for QAT GEN4 (diff) | |
download | linux-532d7f6bc458042571752168bcb5e1fdc576b8c4.tar.xz linux-532d7f6bc458042571752168bcb5e1fdc576b8c4.zip |
crypto: qat - add error counters
Introduce ras counters interface for counting QAT specific device
errors and expose them through the newly created qat_ras sysfs
group attribute.
This adds the following attributes:
- errors_correctable: number of correctable errors
- errors_nonfatal: number of uncorrectable non fatal errors
- errors_fatal: number of uncorrectable fatal errors
- reset_error_counters: resets all counters
These counters are initialized during device bring up and cleared
during device shutdown and are applicable only to QAT GEN4 devices.
Signed-off-by: Shashank Gupta <shashank.gupta@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Tero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
6 files changed, 158 insertions, 0 deletions
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 2ccd1223f1ef..8f483d1197dd 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -418,6 +418,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err; } + accel_dev->ras_errors.enabled = true; adf_dbgfs_init(accel_dev); ret = adf_dev_up(accel_dev, true); diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 151fd3c01f62..58f3c181b2ce 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \ adf_admin.o \ adf_hw_arbiter.o \ adf_sysfs.o \ + adf_sysfs_ras_counters.o \ adf_gen2_hw_data.o \ adf_gen2_config.o \ adf_gen4_hw_data.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 65d52a07e435..197e10eb5534 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -7,6 +7,7 @@ #include <linux/list.h> #include <linux/io.h> #include <linux/ratelimit.h> +#include <linux/types.h> #include "adf_cfg_common.h" #include "adf_pfvf_msg.h" @@ -81,6 +82,18 @@ enum dev_sku_info { DEV_SKU_UNKNOWN, }; +enum ras_errors { + ADF_RAS_CORR, + ADF_RAS_UNCORR, + ADF_RAS_FATAL, + ADF_RAS_ERRORS, +}; + +struct adf_error_counters { + atomic_t counter[ADF_RAS_ERRORS]; + bool enabled; +}; + static inline const char *get_sku_info(enum dev_sku_info info) { switch (info) { @@ -360,6 +373,7 @@ struct adf_accel_dev { u8 pf_compat_ver; } vf; }; + struct adf_error_counters ras_errors; struct mutex state_lock; /* protect state of the device */ bool is_vf; u32 accel_id; diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c index b3cf0720cf9a..00a32efdfc59 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_init.c +++ b/drivers/crypto/intel/qat/qat_common/adf_init.c @@ -9,6 +9,7 @@ #include "adf_common_drv.h" #include "adf_dbgfs.h" #include "adf_heartbeat.h" +#include "adf_sysfs_ras_counters.h" static LIST_HEAD(service_table); static DEFINE_MUTEX(service_lock); @@ -242,6 +243,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status); adf_dbgfs_add(accel_dev); + adf_sysfs_start_ras(accel_dev); return 0; } @@ -268,6 +270,7 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev) return; adf_dbgfs_rm(accel_dev); + adf_sysfs_stop_ras(accel_dev); clear_bit(ADF_STATUS_STARTING, &accel_dev->status); clear_bit(ADF_STATUS_STARTED, &accel_dev->status); diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.c new file mode 100644 index 000000000000..cffe2d722995 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ + +#include <linux/sysfs.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include "adf_common_drv.h" +#include "adf_sysfs_ras_counters.h" + +static ssize_t errors_correctable_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct adf_accel_dev *accel_dev; + unsigned long counter; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_CORR); + return scnprintf(buf, PAGE_SIZE, "%ld\n", counter); +} + +static ssize_t errors_nonfatal_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct adf_accel_dev *accel_dev; + unsigned long counter; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_UNCORR); + return scnprintf(buf, PAGE_SIZE, "%ld\n", counter); +} + +static ssize_t errors_fatal_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct adf_accel_dev *accel_dev; + unsigned long counter; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_FATAL); + return scnprintf(buf, PAGE_SIZE, "%ld\n", counter); +} + +static ssize_t reset_error_counters_store(struct device *dev, + struct device_attribute *dev_attr, + const char *buf, size_t count) +{ + struct adf_accel_dev *accel_dev; + + if (buf[0] != '1' || count != 2) + return -EINVAL; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors); + + return count; +} + +static DEVICE_ATTR_RO(errors_correctable); +static DEVICE_ATTR_RO(errors_nonfatal); +static DEVICE_ATTR_RO(errors_fatal); +static DEVICE_ATTR_WO(reset_error_counters); + +static struct attribute *qat_ras_attrs[] = { + &dev_attr_errors_correctable.attr, + &dev_attr_errors_nonfatal.attr, + &dev_attr_errors_fatal.attr, + &dev_attr_reset_error_counters.attr, + NULL, +}; + +static struct attribute_group qat_ras_group = { + .attrs = qat_ras_attrs, + .name = "qat_ras", +}; + +void adf_sysfs_start_ras(struct adf_accel_dev *accel_dev) +{ + if (!accel_dev->ras_errors.enabled) + return; + + ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors); + + if (device_add_group(&GET_DEV(accel_dev), &qat_ras_group)) + dev_err(&GET_DEV(accel_dev), + "Failed to create qat_ras attribute group.\n"); +} + +void adf_sysfs_stop_ras(struct adf_accel_dev *accel_dev) +{ + if (!accel_dev->ras_errors.enabled) + return; + + device_remove_group(&GET_DEV(accel_dev), &qat_ras_group); + + ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors); +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.h b/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.h new file mode 100644 index 000000000000..99e9d9cf57f8 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_ras_counters.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation */ + +#ifndef ADF_RAS_H +#define ADF_RAS_H + +#include <linux/bitops.h> +#include <linux/atomic.h> + +struct adf_accel_dev; + +void adf_sysfs_start_ras(struct adf_accel_dev *accel_dev); +void adf_sysfs_stop_ras(struct adf_accel_dev *accel_dev); + +#define ADF_RAS_ERR_CTR_READ(ras_errors, ERR) \ + atomic_read(&(ras_errors).counter[ERR]) + +#define ADF_RAS_ERR_CTR_CLEAR(ras_errors) \ + do { \ + for (int err = 0; err < ADF_RAS_ERRORS; ++err) \ + atomic_set(&(ras_errors).counter[err], 0); \ + } while (0) + +#define ADF_RAS_ERR_CTR_INC(ras_errors, ERR) \ + atomic_inc(&(ras_errors).counter[ERR]) + +#endif /* ADF_RAS_H */ |