diff options
author | Vallari Agrawal <vallari.agrawal@ibm.com> | 2024-10-30 15:02:41 +0100 |
---|---|---|
committer | Vallari Agrawal <vallari.agrawal@ibm.com> | 2024-11-11 12:53:04 +0100 |
commit | 7994fea43630b052fabb79c39241927dc7bb6678 (patch) | |
tree | 8540aec935070527214496efaab3d98eae082386 /monitoring | |
parent | monitoring: add 2 nvmeof alerts to prometheus_alerts.yaml (diff) | |
download | ceph-7994fea43630b052fabb79c39241927dc7bb6678.tar.xz ceph-7994fea43630b052fabb79c39241927dc7bb6678.zip |
monitoring: add 2 new nvmeof alerts
Add NVMeoFMissingListener and NVMeoFZeroListenerSubsystem
alerts to prometheus_alerts.libsonnet.
Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
Diffstat (limited to 'monitoring')
-rw-r--r-- | monitoring/ceph-mixin/prometheus_alerts.libsonnet | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet index cde1a736f8c..30b6b07d463 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet +++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet @@ -936,6 +936,26 @@ }, }, { + alert: 'NVMeoFMissingListener', + 'for': '10m', + expr: 'ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0', + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'No listener added for {{ $labels.instance }} NVMe-oF Gateway to {{ $labels.nqn }} subsystem', + description: 'For every subsystem, each gateway should have a listener to balance traffic between gateways.', + }, + }, + { + alert: 'NVMeoFZeroListenerSubsystem', + 'for': '10m', + expr: 'sum(ceph_nvmeof_subsystem_listener_count) by (nqn) == 0', + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'No listeners added to {{ $labels.nqn }} subsystem', + description: 'NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners.', + }, + }, + { alert: 'NVMeoFHighHostCPU', 'for': '10m', expr: '100-((100*(avg by(cluster,host) (label_replace(rate(node_cpu_seconds_total{mode="idle"}[5m]),"host","$1","instance","(.*):.*")) * on(cluster, host) group_right label_replace(ceph_nvmeof_gateway_info,"host","$1","instance","(.*):.*")))) >= %.2f' % [$._config.NVMeoFHighHostCPU], |