summaryrefslogtreecommitdiffstats
path: root/qa
diff options
context:
space:
mode:
authorAdam King <47704447+adk3798@users.noreply.github.com>2023-11-10 23:09:12 +0100
committerGitHub <noreply@github.com>2023-11-10 23:09:12 +0100
commit9e2d5c4972edc38186fd497109915744d6f69d92 (patch)
tree313618802e9df275e55f40b2dcbaf9092f51e3cc /qa
parentMerge pull request #54398 from phlogistonjohn/jjm-cephadm-reorg-ctr-binds (diff)
parentqa/cephadm: basic test for monitoring stack (diff)
downloadceph-9e2d5c4972edc38186fd497109915744d6f69d92.tar.xz
ceph-9e2d5c4972edc38186fd497109915744d6f69d92.zip
Merge pull request #49914 from adk3798/monitoring-teuth-test
qa/cephadm: basic test for monitoring stack Reviewed-by: Ernesto Puerta <epuertat@redhat.com> Reviewed-by: Juan Miguel Olmo Martínez <jolmomar@redhat.com> Reviewed-by: Redouane Kachach <rkachach@redhat.com>
Diffstat (limited to 'qa')
-rw-r--r--qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml55
1 files changed, 55 insertions, 0 deletions
diff --git a/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml b/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml
new file mode 100644
index 00000000000..62947ef65d9
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml
@@ -0,0 +1,55 @@
+roles:
+- - host.a
+ - mon.a
+ - mgr.a
+ - osd.0
+- - host.b
+ - mon.b
+ - mgr.b
+ - osd.1
+- - host.c
+ - mon.c
+ - osd.2
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+ host.a:
+ - |
+ set -e
+ set -x
+ ceph orch apply node-exporter
+ ceph orch apply grafana
+ ceph orch apply alertmanager
+ ceph orch apply prometheus
+ sleep 240
+ ceph orch ls
+ ceph orch ps
+ ceph orch host ls
+ MON_DAEMON=$(ceph orch ps --daemon-type mon -f json | jq -r 'last | .daemon_name')
+ GRAFANA_HOST=$(ceph orch ps --daemon-type grafana -f json | jq -e '.[]' | jq -r '.hostname')
+ PROM_HOST=$(ceph orch ps --daemon-type prometheus -f json | jq -e '.[]' | jq -r '.hostname')
+ ALERTM_HOST=$(ceph orch ps --daemon-type alertmanager -f json | jq -e '.[]' | jq -r '.hostname')
+ GRAFANA_IP=$(ceph orch host ls -f json | jq -r --arg GRAFANA_HOST "$GRAFANA_HOST" '.[] | select(.hostname==$GRAFANA_HOST) | .addr')
+ PROM_IP=$(ceph orch host ls -f json | jq -r --arg PROM_HOST "$PROM_HOST" '.[] | select(.hostname==$PROM_HOST) | .addr')
+ ALERTM_IP=$(ceph orch host ls -f json | jq -r --arg ALERTM_HOST "$ALERTM_HOST" '.[] | select(.hostname==$ALERTM_HOST) | .addr')
+ # check each host node-exporter metrics endpoint is responsive
+ ALL_HOST_IPS=$(ceph orch host ls -f json | jq -r '.[] | .addr')
+ for ip in $ALL_HOST_IPS; do
+ curl -s http://${ip}:9100/metric
+ done
+ # check grafana endpoints are responsive and database health is okay
+ curl -k -s https://${GRAFANA_IP}:3000/api/health
+ curl -k -s https://${GRAFANA_IP}:3000/api/health | jq -e '.database == "ok"'
+ # stop mon daemon in order to trigger an alert
+ ceph orch daemon stop $MON_DAEMON
+ sleep 120
+ # check prometheus endpoints are responsive and mon down alert is firing
+ curl -s http://${PROM_IP}:9095/api/v1/status/config
+ curl -s http://${PROM_IP}:9095/api/v1/status/config | jq -e '.status == "success"'
+ curl -s http://${PROM_IP}:9095/api/v1/alerts
+ curl -s http://${PROM_IP}:9095/api/v1/alerts | jq -e '.data | .alerts | .[] | select(.labels | .alertname == "CephMonDown") | .state == "firing"'
+ # check alertmanager endpoints are responsive and mon down alert is active
+ curl -s http://${ALERTM_IP}:9093/api/v1/status
+ curl -s http://${ALERTM_IP}:9093/api/v1/alerts
+ curl -s http://${ALERTM_IP}:9093/api/v1/alerts | jq -e '.data | .[] | select(.labels | .alertname == "CephMonDown") | .status | .state == "active"'