diff options
author | Adam King <47704447+adk3798@users.noreply.github.com> | 2023-11-10 23:09:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-10 23:09:12 +0100 |
commit | 9e2d5c4972edc38186fd497109915744d6f69d92 (patch) | |
tree | 313618802e9df275e55f40b2dcbaf9092f51e3cc /qa | |
parent | Merge pull request #54398 from phlogistonjohn/jjm-cephadm-reorg-ctr-binds (diff) | |
parent | qa/cephadm: basic test for monitoring stack (diff) | |
download | ceph-9e2d5c4972edc38186fd497109915744d6f69d92.tar.xz ceph-9e2d5c4972edc38186fd497109915744d6f69d92.zip |
Merge pull request #49914 from adk3798/monitoring-teuth-test
qa/cephadm: basic test for monitoring stack
Reviewed-by: Ernesto Puerta <epuertat@redhat.com>
Reviewed-by: Juan Miguel Olmo MartÃnez <jolmomar@redhat.com>
Reviewed-by: Redouane Kachach <rkachach@redhat.com>
Diffstat (limited to 'qa')
-rw-r--r-- | qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml b/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml new file mode 100644 index 00000000000..62947ef65d9 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml @@ -0,0 +1,55 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 +- - host.b + - mon.b + - mgr.b + - osd.1 +- - host.c + - mon.c + - osd.2 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch apply node-exporter + ceph orch apply grafana + ceph orch apply alertmanager + ceph orch apply prometheus + sleep 240 + ceph orch ls + ceph orch ps + ceph orch host ls + MON_DAEMON=$(ceph orch ps --daemon-type mon -f json | jq -r 'last | .daemon_name') + GRAFANA_HOST=$(ceph orch ps --daemon-type grafana -f json | jq -e '.[]' | jq -r '.hostname') + PROM_HOST=$(ceph orch ps --daemon-type prometheus -f json | jq -e '.[]' | jq -r '.hostname') + ALERTM_HOST=$(ceph orch ps --daemon-type alertmanager -f json | jq -e '.[]' | jq -r '.hostname') + GRAFANA_IP=$(ceph orch host ls -f json | jq -r --arg GRAFANA_HOST "$GRAFANA_HOST" '.[] | select(.hostname==$GRAFANA_HOST) | .addr') + PROM_IP=$(ceph orch host ls -f json | jq -r --arg PROM_HOST "$PROM_HOST" '.[] | select(.hostname==$PROM_HOST) | .addr') + ALERTM_IP=$(ceph orch host ls -f json | jq -r --arg ALERTM_HOST "$ALERTM_HOST" '.[] | select(.hostname==$ALERTM_HOST) | .addr') + # check each host node-exporter metrics endpoint is responsive + ALL_HOST_IPS=$(ceph orch host ls -f json | jq -r '.[] | .addr') + for ip in $ALL_HOST_IPS; do + curl -s http://${ip}:9100/metric + done + # check grafana endpoints are responsive and database health is okay + curl -k -s https://${GRAFANA_IP}:3000/api/health + curl -k -s https://${GRAFANA_IP}:3000/api/health | jq -e '.database == "ok"' + # stop mon daemon in order to trigger an alert + ceph orch daemon stop $MON_DAEMON + sleep 120 + # check prometheus endpoints are responsive and mon down alert is firing + curl -s http://${PROM_IP}:9095/api/v1/status/config + curl -s http://${PROM_IP}:9095/api/v1/status/config | jq -e '.status == "success"' + curl -s http://${PROM_IP}:9095/api/v1/alerts + curl -s http://${PROM_IP}:9095/api/v1/alerts | jq -e '.data | .alerts | .[] | select(.labels | .alertname == "CephMonDown") | .state == "firing"' + # check alertmanager endpoints are responsive and mon down alert is active + curl -s http://${ALERTM_IP}:9093/api/v1/status + curl -s http://${ALERTM_IP}:9093/api/v1/alerts + curl -s http://${ALERTM_IP}:9093/api/v1/alerts | jq -e '.data | .[] | select(.labels | .alertname == "CephMonDown") | .status | .state == "active"' |