summaryrefslogtreecommitdiffstats
path: root/monitoring
diff options
context:
space:
mode:
Diffstat (limited to 'monitoring')
-rw-r--r--monitoring/ceph-mixin/dashboards/rgw.libsonnet12
-rw-r--r--monitoring/ceph-mixin/dashboards_out/radosgw-overview.json12
-rw-r--r--monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature6
3 files changed, 18 insertions, 12 deletions
diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet
index 79a4b7a14eb..c0c548b79c8 100644
--- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet
+++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet
@@ -298,7 +298,7 @@ local g = import 'grafonnet/grafana.libsonnet';
label_replace(
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
||| % $.matchers(),
@@ -314,7 +314,7 @@ local g = import 'grafonnet/grafana.libsonnet';
label_replace(
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
||| % $.matchers(),
@@ -331,7 +331,7 @@ local g = import 'grafonnet/grafana.libsonnet';
sum by (rgw_host) (
label_replace(
rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
)
@@ -351,7 +351,7 @@ local g = import 'grafonnet/grafana.libsonnet';
label_replace(
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
||| % $.matchers(),
@@ -385,7 +385,7 @@ local g = import 'grafonnet/grafana.libsonnet';
label_replace(sum by (instance_id) (
rate(ceph_rgw_op_get_obj_bytes{%(matchers)s}[$__rate_interval]) +
rate(ceph_rgw_op_put_obj_bytes{%(matchers)s}[$__rate_interval])) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
||| % $.matchers(),
@@ -404,7 +404,7 @@ local g = import 'grafonnet/grafana.libsonnet';
label_replace(
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
||| % $.matchers(),
diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
index 5e185b63b7f..5bf8279c27c 100644
--- a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
+++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
@@ -108,14 +108,14 @@
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{rgw_host}}",
"refId": "A"
},
{
- "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{rgw_host}}",
@@ -210,7 +210,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
+ "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
@@ -305,7 +305,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
@@ -502,7 +502,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
@@ -597,7 +597,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
index 8d96dcdd610..a34d5759437 100644
--- a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
+++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
@@ -7,6 +7,7 @@ Scenario: "Test Average GET Latencies"
| ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
| metrics | values |
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
@@ -18,6 +19,7 @@ Scenario: "Test Average PUT Latencies"
| ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 |
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
| metrics | values |
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |
@@ -28,6 +30,7 @@ Scenario: "Test Total Requests/sec by RGW Instance"
| ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
| ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
+ And variable `rgw_servers` is `rgw.1`
Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
| {rgw_host="1"} | 1.5 |
@@ -39,6 +42,7 @@ Scenario: "Test GET Latencies by RGW Instance"
| ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
@@ -71,6 +75,7 @@ Scenario: "Test Bandwidth by RGW Instance"
| ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When evaluation time is `1m`
And interval is `30s`
+ And variable `rgw_servers` is `rgw.1`
Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
| {ceph_daemon="rgw.1", instance_id="92806566", rgw_host="1"} | 2.25 |
@@ -83,6 +88,7 @@ Scenario: "Test PUT Latencies by RGW Instance"
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When evaluation time is `1m`
And interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |