diff options
Diffstat (limited to 'monitoring/ceph-mixin/prometheus_alerts.yml')
-rw-r--r-- | monitoring/ceph-mixin/prometheus_alerts.yml | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml index 3eb8a8db4fa..7c0da4d51a4 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.yml +++ b/monitoring/ceph-mixin/prometheus_alerts.yml @@ -765,20 +765,29 @@ groups: labels: severity: "warning" type: "ceph_default" + - alert: "NVMeoFMultipleNamespacesOfRBDImage" + annotations: + description: "Each NVMeoF namespace must have a unique RBD pool and image, across all different gateway groups." + summary: "RBD image {{ $labels.pool_name }}/{{ $labels.rbd_name }} cannot be reused for multiple NVMeoF namespace " + expr: "count by(pool_name, rbd_name) (count by(bdev_name, pool_name, rbd_name) (ceph_nvmeof_bdev_metadata and on (bdev_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" - alert: "NVMeoFTooManyGateways" annotations: - description: "You may create many gateways, but 4 is the tested limit" + description: "You may create many gateways, but 32 is the tested limit" summary: "Max supported gateways exceeded on cluster {{ $labels.cluster }}" - expr: "count(ceph_nvmeof_gateway_info) by (cluster) > 4.00" + expr: "count(ceph_nvmeof_gateway_info) by (cluster) > 32.00" for: "1m" labels: severity: "warning" type: "ceph_default" - alert: "NVMeoFMaxGatewayGroupSize" annotations: - description: "You may create many gateways in a gateway group, but 4 is the tested limit" + description: "You may create many gateways in a gateway group, but 8 is the tested limit" summary: "Max gateways within a gateway group ({{ $labels.group }}) exceeded on cluster {{ $labels.cluster }}" - expr: "count(ceph_nvmeof_gateway_info) by (cluster,group) > 4.00" + expr: "count(ceph_nvmeof_gateway_info) by (cluster,group) > 8.00" for: "1m" labels: severity: "warning" @@ -823,7 +832,7 @@ groups: annotations: description: "Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported" summary: "The number of namespaces defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}" - expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 1024.00" + expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 2048.00" for: "1m" labels: severity: "warning" @@ -839,9 +848,9 @@ groups: type: "ceph_default" - alert: "NVMeoFHighClientCount" annotations: - description: "The supported limit for clients connecting to a subsystem is 32" + description: "The supported limit for clients connecting to a subsystem is 128" summary: "The number of clients connected to {{ $labels.nqn }} is too high on cluster {{ $labels.cluster }}" - expr: "ceph_nvmeof_subsystem_host_count > 32.00" + expr: "ceph_nvmeof_subsystem_host_count > 128.00" for: "1m" labels: severity: "warning" |