summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVallari Agrawal <vallari.agrawal@ibm.com>2024-10-30 15:02:41 +0100
committerVallari Agrawal <vallari.agrawal@ibm.com>2024-11-11 12:53:04 +0100
commit7994fea43630b052fabb79c39241927dc7bb6678 (patch)
tree8540aec935070527214496efaab3d98eae082386
parentmonitoring: add 2 nvmeof alerts to prometheus_alerts.yaml (diff)
downloadceph-7994fea43630b052fabb79c39241927dc7bb6678.tar.xz
ceph-7994fea43630b052fabb79c39241927dc7bb6678.zip
monitoring: add 2 new nvmeof alerts
Add NVMeoFMissingListener and NVMeoFZeroListenerSubsystem alerts to prometheus_alerts.libsonnet. Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
-rw-r--r--monitoring/ceph-mixin/prometheus_alerts.libsonnet20
1 files changed, 20 insertions, 0 deletions
diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet
index cde1a736f8c..30b6b07d463 100644
--- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet
+++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet
@@ -936,6 +936,26 @@
},
},
{
+ alert: 'NVMeoFMissingListener',
+ 'for': '10m',
+ expr: 'ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'No listener added for {{ $labels.instance }} NVMe-oF Gateway to {{ $labels.nqn }} subsystem',
+ description: 'For every subsystem, each gateway should have a listener to balance traffic between gateways.',
+ },
+ },
+ {
+ alert: 'NVMeoFZeroListenerSubsystem',
+ 'for': '10m',
+ expr: 'sum(ceph_nvmeof_subsystem_listener_count) by (nqn) == 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'No listeners added to {{ $labels.nqn }} subsystem',
+ description: 'NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners.',
+ },
+ },
+ {
alert: 'NVMeoFHighHostCPU',
'for': '10m',
expr: '100-((100*(avg by(cluster,host) (label_replace(rate(node_cpu_seconds_total{mode="idle"}[5m]),"host","$1","instance","(.*):.*")) * on(cluster, host) group_right label_replace(ceph_nvmeof_gateway_info,"host","$1","instance","(.*):.*")))) >= %.2f' % [$._config.NVMeoFHighHostCPU],