diff options
author | Nizamudeen A <nia@redhat.com> | 2024-05-02 10:25:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-02 10:25:37 +0200 |
commit | a8d01fff0050c90c634d1f645d10698a4d92ca47 (patch) | |
tree | a7413094ac41e72b63ccdeb59146101a725f51aa /monitoring | |
parent | Merge pull request #57151 from rhcs-dashboard/fix-65698-main (diff) | |
parent | Cleanup of variables, queries and tests to enable showMultiCluster=True (diff) | |
download | ceph-a8d01fff0050c90c634d1f645d10698a4d92ca47.tar.xz ceph-a8d01fff0050c90c634d1f645d10698a4d92ca47.zip |
Merge pull request #55495 from frittentheke/issue_64321
monitoring/ceph-mixin: Cleanup of variables, queries and tests (to fix showMultiCluster=True)
Reviewed-by: Aashish Sharma <aasharma@redhat.com>
Reviewed-by: Ankush Behl <cloudbehl@gmail.com>
Reviewed-by: Nizamudeen A <nia@redhat.com>
Diffstat (limited to 'monitoring')
38 files changed, 1457 insertions, 1692 deletions
diff --git a/monitoring/ceph-mixin/config.libsonnet b/monitoring/ceph-mixin/config.libsonnet index b35e98957f0..c0af859e459 100644 --- a/monitoring/ceph-mixin/config.libsonnet +++ b/monitoring/ceph-mixin/config.libsonnet @@ -3,7 +3,7 @@ dashboardTags: ['ceph-mixin'], clusterLabel: 'cluster', - showMultiCluster: false, + showMultiCluster: true, CephNodeNetworkPacketDropsThreshold: 0.005, CephNodeNetworkPacketDropsPerSec: 10, diff --git a/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet b/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet index 4e916d32427..a6991f54cde 100644 --- a/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet +++ b/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet @@ -1,18 +1,17 @@ local g = import 'grafonnet/grafana.libsonnet'; -local u = import 'utils.libsonnet'; (import 'utils.libsonnet') { - 'ceph-cluster-advanced.json': u.dashboardSchema( + 'ceph-cluster-advanced.json': $.dashboardSchema( 'Ceph Cluster - Advanced', 'Ceph cluster overview', - '', + 'dn13KBeTv', 'now-6h', '1m', 38, $._config.dashboardTags, '' ).addAnnotation( - u.addAnnotationSchema( + $.addAnnotationSchema( 1, '-- Grafana --', true, // enable @@ -29,10 +28,12 @@ local u = import 'utils.libsonnet'; type='panel', id='heatmap', name='Heatmap', version='5.0.0' ).addRequired( type='panel', id='singlestat', name='Singlestat', version='5.0.0' - ). - addTemplate(g.template.datasource('DS_PROMETHEUS', 'prometheus', 'Prometheus', label='Data Source')). - addTemplate( - u.addCustomTemplate( + ).addTemplate( + g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') + ).addTemplate( + $.addClusterTemplate() + ).addTemplate( + $.addCustomTemplate( name='interval', query='5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d', current='$__auto_interval_interval', @@ -60,11 +61,11 @@ local u = import 'utils.libsonnet'; ) ).addPanels( [ - u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATE') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, - u.addStatPanel( + $.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATE') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, + $.addStatPanel( title='Ceph health status', unit='none', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 0, y: 1, w: 3, h: 3 }, colorMode='value', interval='1m', @@ -88,15 +89,15 @@ local u = import 'utils.libsonnet'; { color: 'rgba(237, 129, 40, 0.89)', value: 1 }, { color: 'rgba(245, 54, 54, 0.9)', value: 2 }, ]) - .addTarget(u.addTargetSchema( - expr='ceph_health_status{}', + .addTarget($.addTargetSchema( + expr='ceph_health_status{%(matchers)s}' % $.matchers(), instant=true, interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', step=300, )), - u.addGaugePanel( + $.addGaugePanel( title='Available Capacity', gridPosition={ h: 6, w: 3, x: 3, y: 1 }, unit='percentunit', @@ -112,18 +113,18 @@ local u = import 'utils.libsonnet'; { color: 'rgba(237, 129, 40, 0.89)', value: 0.1 }, { color: 'rgba(50, 172, 45, 0.97)', value: 0.3 }, ]) - .addTarget(u.addTargetSchema( - expr='(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}', + .addTarget($.addTargetSchema( + expr='(ceph_cluster_total_bytes{%(matchers)s}-ceph_cluster_total_used_bytes{%(matchers)s})/ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(), instant=true, interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', step=300 )), - u.addStatPanel( + $.addStatPanel( title='Cluster Capacity', unit='decbytes', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 6, y: 1, w: 3, h: 3 }, graphMode='area', decimals=2, @@ -138,18 +139,18 @@ local u = import 'utils.libsonnet'; { color: 'rgba(237, 129, 40, 0.89)', value: 0.025 }, { color: 'rgba(245, 54, 54, 0.9)', value: 1.0 }, ]) - .addTarget(u.addTargetSchema( - expr='ceph_cluster_total_bytes{}', + .addTarget($.addTargetSchema( + expr='ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(), instant=true, interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', step=300 )), - u.addStatPanel( + $.addStatPanel( title='Write Throughput', unit='Bps', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 9, y: 1, w: 3, h: 3 }, decimals=1, color={ mode: 'thresholds' }, @@ -160,17 +161,17 @@ local u = import 'utils.libsonnet'; ]).addThresholds([ { color: 'green' }, ]) - .addTarget(u.addTargetSchema( - expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))', + .addTarget($.addTargetSchema( + expr='sum(irate(ceph_osd_op_w_in_bytes{%(matchers)s}[5m]))' % $.matchers(), instant=true, interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', )), - u.addStatPanel( + $.addStatPanel( title='Read Throughput', unit='Bps', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 12, y: 1, w: 3, h: 3 }, decimals=1, color={ mode: 'thresholds' }, @@ -183,16 +184,16 @@ local u = import 'utils.libsonnet'; { color: 'rgba(237, 129, 40, 0.89)', value: 0 }, { color: '#9ac48a', value: 0 }, ]) - .addTarget(u.addTargetSchema( - expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))', + .addTarget($.addTargetSchema( + expr='sum(irate(ceph_osd_op_r_out_bytes{%(matchers)s}[5m]))' % $.matchers(), instant=true, interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', )), - u.addStatPanel( + $.addStatPanel( title='OSDs', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 6, x: 15, y: 1 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -222,7 +223,7 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='All', decimals=2, @@ -231,12 +232,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ceph_osd_metadata)', + expr='count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), legendFormat='All', interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='In', decimals=2, @@ -245,12 +246,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ceph_osd_in)', + expr='count(ceph_osd_in{%(matchers)s})' % $.matchers(), legendFormat='In', interval='$interval', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Out', decimals=2, @@ -259,13 +260,13 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='sum(ceph_osd_in == bool 0)', + expr='sum(ceph_osd_in{%(matchers)s} == bool 0)' % $.matchers(), legendFormat='Out', interval='', warn=1, - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Up', decimals=2, @@ -274,12 +275,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='sum(ceph_osd_up)', + expr='sum(ceph_osd_up{%(matchers)s})' % $.matchers(), legendFormat='Up', interval='', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Down', decimals=2, @@ -288,17 +289,17 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='sum(ceph_osd_up == bool 0)', + expr='sum(ceph_osd_up{%(matchers)s} == bool 0)' % $.matchers(), legendFormat='Down', interval='', warn=1, - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), ]), - u.addStatPanel( + $.addStatPanel( title='MGRs', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 6, w: 3, x: 21, y: 1 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -328,7 +329,7 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Active', decimals=2, @@ -337,12 +338,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ceph_mgr_status == 1) or vector(0)', + expr='count(ceph_mgr_status{%(matchers)s} == 1) or vector(0)' % $.matchers(), legendFormat='Active', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Standby', decimals=2, @@ -351,16 +352,16 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ceph_mgr_status == 0) or vector(0)', + expr='count(ceph_mgr_status{%(matchers)s} == 0) or vector(0)' % $.matchers(), legendFormat='Standby', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), ]), - u.addStatPanel( + $.addStatPanel( title='Firing Alerts', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 3, x: 0, y: 4 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -398,7 +399,7 @@ local u = import 'utils.libsonnet'; ] }, ]) .addTargets([ - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Active', decimals=2, @@ -407,12 +408,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="critical"}) OR vector(0)', + expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="critical", %(matchers)s}) OR vector(0)' % $.matchers(), legendFormat='Critical', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Standby', decimals=2, @@ -421,16 +422,16 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Number Threshold', - expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="warning"}) OR vector(0)', + expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="warning", %(matchers)s}) OR vector(0)' % $.matchers(), legendFormat='Warning', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), ]), - u.addStatPanel( + $.addStatPanel( title='Used Capacity', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 3, x: 6, y: 4 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -451,17 +452,17 @@ local u = import 'utils.libsonnet'; { color: 'rgba(245, 54, 54, 0.9)', value: 0.1 }, ]) .addTargets([ - u.addTargetSchema( - expr='ceph_cluster_total_used_bytes{}', + $.addTargetSchema( + expr='ceph_cluster_total_used_bytes{%(matchers)s}' % $.matchers(), legendFormat='', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), ]), - u.addStatPanel( + $.addStatPanel( title='Write IOPS', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 3, x: 9, y: 4 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -480,17 +481,17 @@ local u = import 'utils.libsonnet'; { color: 'green', value: null }, ]) .addTargets([ - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_w{}[1m]))', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_w{%(matchers)s}[1m]))' % $.matchers(), legendFormat='', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), ]), - u.addStatPanel( + $.addStatPanel( title='Read IOPS', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 3, x: 12, y: 4 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -511,17 +512,17 @@ local u = import 'utils.libsonnet'; { color: '#9ac48a', value: 0 }, ]) .addTargets([ - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_r{}[1m]))', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_r{%(matchers)s}[1m]))' % $.matchers(), legendFormat='', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=true, ), ]), - u.addStatPanel( + $.addStatPanel( title='Monitors', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 3, w: 6, x: 15, y: 4 }, color={ mode: 'thresholds' }, thresholdsMode='absolute', @@ -551,7 +552,7 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='In Quorum', decimals=2, @@ -560,11 +561,11 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Text Only', - expr='sum(ceph_mon_quorum_status)', + expr='sum(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(), legendFormat='In Quorum', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='Total', crit=1, @@ -574,12 +575,12 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='When Alias Displayed', units='none', valueHandler='Text Only', - expr='count(ceph_mon_quorum_status)', + expr='count(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(), legendFormat='Total', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', warn=2, ), - u.addTargetSchema( + $.addTargetSchema( aggregation='Last', alias='MONs out of Quorum', crit=1.6, @@ -589,22 +590,22 @@ local u = import 'utils.libsonnet'; displayValueWithAlias='Never', units='none', valueHandler='Number Threshold', - expr='count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)', + expr='count(ceph_mon_quorum_status{%(matchers)s}) - sum(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(), legendFormat='MONs out of Quorum', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', warn=1.1, range=true, ), ]), - u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATS') + { gridPos: { x: 0, y: 7, w: 24, h: 1 } }, - u.addAlertListPanel( + $.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATS') + { gridPos: { x: 0, y: 7, w: 24, h: 1 } }, + $.addAlertListPanel( title='Alerts', datasource={ type: 'datasource', uid: 'grafana', }, gridPosition={ h: 8, w: 8, x: 0, y: 8 }, - alertInstanceLabelFilter='{alertname=~"^Ceph.+"}', + alertInstanceLabelFilter='{alertname=~"^Ceph.+", %(matchers)s}' % $.matchers(), alertName='', dashboardAlerts=false, groupBy=[], @@ -620,9 +621,9 @@ local u = import 'utils.libsonnet'; }, ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Capacity', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 8, x: 8, y: 8 }, fillOpacity=40, pointSize=5, @@ -670,18 +671,18 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema( - expr='ceph_cluster_total_bytes{}', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(), + datasource='$datasource', interval='$interval', instant=false, legendFormat='Total Capacity', step=300, range=true, ), - u.addTargetSchema( - expr='ceph_cluster_total_used_bytes{}', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='ceph_cluster_total_used_bytes{%(matchers)s}' % $.matchers(), + datasource='$datasource', interval='$interval', instant=false, legendFormat='Used', @@ -691,9 +692,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Cluster Throughput', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 8, x: 16, y: 8 }, fillOpacity=10, pointSize=5, @@ -714,17 +715,17 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_w_in_bytes{%(matchers)s}[5m]))' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Write', step=300, range=true, ), - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_r_out_bytes{%(matchers)s}[5m]))' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Read', step=300, @@ -733,9 +734,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='IOPS', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 8, x: 0, y: 16 }, fillOpacity=10, pointSize=5, @@ -757,17 +758,17 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_w{}[1m]))', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_w{%(matchers)s}[1m]))' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Write', step=300, range=true, ), - u.addTargetSchema( - expr='sum(irate(ceph_osd_op_r{}[1m]))', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(irate(ceph_osd_op_r{%(matchers)s}[1m]))' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Read', step=300, @@ -776,9 +777,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Pool Used Bytes', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 8, x: 8, y: 16 }, fillOpacity=10, pointSize=5, @@ -800,9 +801,9 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='(ceph_pool_bytes_used{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='{{name}}', step=300, @@ -810,9 +811,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Pool Used RAW Bytes', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 8, x: 16, y: 16 }, fillOpacity=10, pointSize=5, @@ -844,9 +845,9 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema( - expr='(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='(ceph_pool_stored_raw{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='', legendFormat='{{name}}', step=300, @@ -856,9 +857,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Pool Objects Quota', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 0, y: 24 }, fillOpacity=10, pointSize=5, @@ -880,9 +881,9 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='(ceph_pool_quota_objects{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='', legendFormat='{{name}}', step=300, @@ -890,9 +891,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Pool Quota Bytes', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 8, y: 24 }, fillOpacity=10, pointSize=5, @@ -914,9 +915,9 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='(ceph_pool_quota_bytes{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='', legendFormat='{{name}}', step=300, @@ -924,9 +925,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Objects Per Pool', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 16, y: 24 }, fillOpacity=10, pointSize=5, @@ -948,20 +949,20 @@ local u = import 'utils.libsonnet'; ]) .addTargets( [ - u.addTargetSchema( - expr='(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='(ceph_pool_objects{%(matchers)s}) * on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='', legendFormat='{{name}}', ), ] ), - u.addRowSchema(collapse=false, showTitle=true, title='OBJECTS') + { gridPos: { x: 0, y: 31, w: 24, h: 1 } }, + $.addRowSchema(collapse=false, showTitle=true, title='OBJECTS') + { gridPos: { x: 0, y: 31, w: 24, h: 1 } }, - u.timeSeriesPanel( + $.timeSeriesPanel( title='OSD Type Count', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 12, w: 6, x: 0, y: 32 }, fillOpacity=10, pointSize=5, @@ -995,9 +996,9 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema( - expr='sum(ceph_pool_objects)', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pool_objects{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Total', range=true, @@ -1006,9 +1007,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='PGs State', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 12, w: 8, x: 6, y: 32 }, fillOpacity=10, pointSize=5, @@ -1043,153 +1044,153 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema( - expr='sum(ceph_pg_active{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_active{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Active', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_clean{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_clean{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Clean', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_peering{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_peering{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Peering', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_degraded{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_degraded{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Degraded', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_pg_stale{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_stale{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Stale', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_unclean_pgs{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_unclean_pgs{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Unclean', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_pg_undersized{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_undersized{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Undersized', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_pg_incomplete{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_incomplete{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Incomplete', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_forced_backfill{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_forced_backfill{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Forced Backfill', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_forced_recovery{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_forced_recovery{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Forced Recovery', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_creating{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_creating{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Creating', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_wait_backfill{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_wait_backfill{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Wait Backfill', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_deep{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_deep{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Deep', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_scrubbing{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_scrubbing{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Scrubbing', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_recovering{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_recovering{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Recovering', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_repair{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_repair{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Repair', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_down{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_down{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Down', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_peered{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_peered{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Peered', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_backfill{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_backfill{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Backfill', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_remapped{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_remapped{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Remapped', range=true, ), - u.addTargetSchema( - expr='sum(ceph_pg_backfill_toofull{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_backfill_toofull{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Backfill Toofull', range=true, @@ -1197,9 +1198,9 @@ local u = import 'utils.libsonnet'; ] ), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Stuck PGs', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 6, w: 10, x: 14, y: 32 }, fillOpacity=10, pointSize=5, @@ -1233,25 +1234,25 @@ local u = import 'utils.libsonnet'; ] ) .addTargets([ - u.addTargetSchema( - expr='sum(ceph_pg_degraded{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_degraded{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Degraded', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_pg_stale{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_stale{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Stale', range=true, step=300, ), - u.addTargetSchema( - expr='sum(ceph_pg_undersized{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(ceph_pg_undersized{%(matchers)s})' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='Undersized', range=true, @@ -1259,9 +1260,9 @@ local u = import 'utils.libsonnet'; ), ]), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Recovery Operations', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 6, w: 10, x: 14, y: 38 }, fillOpacity=10, pointSize=5, @@ -1283,19 +1284,19 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( - expr='sum(irate(ceph_osd_recovery_ops{}[$interval]))', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='sum(irate(ceph_osd_recovery_ops{%(matchers)s}[$interval]))' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='OPS', step=300, ), ]), - u.addRowSchema(false, true, 'LATENCY', collapsed=true) + $.addRowSchema(false, true, 'LATENCY', collapsed=true) .addPanels([ - u.heatMapPanel( + $.heatMapPanel( title='OSD Apply Latency Distribution', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 12, x: 0, y: 42 }, colorMode='opacity', legendShow=true, @@ -1329,15 +1330,15 @@ local u = import 'utils.libsonnet'; yAxisMin='0', yBucketSize=10, pluginVersion='9.4.7', - ).addTarget(u.addTargetSchema( - expr='ceph_osd_apply_latency_ms{}', - datasource='${DS_PROMETHEUS}', + ).addTarget($.addTargetSchema( + expr='ceph_osd_apply_latency_ms{%(matchers)s}' % $.matchers(), + datasource='$datasource', interval='$interval', instant=false, )), - u.heatMapPanel( + $.heatMapPanel( title='OSD Commit Latency Distribution', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 12, x: 12, y: 42 }, colorMode='opacity', legendShow=true, @@ -1378,15 +1379,15 @@ local u = import 'utils.libsonnet'; yAxisMin='0', yBucketSize=10, pluginVersion='9.4.7', - ).addTarget(u.addTargetSchema( - expr='ceph_osd_commit_latency_ms{}', - datasource='${DS_PROMETHEUS}', + ).addTarget($.addTargetSchema( + expr='ceph_osd_commit_latency_ms{%(matchers)s}' % $.matchers(), + datasource='$datasource', interval='$interval', instant=false, )), - u.heatMapPanel( + $.heatMapPanel( title='OSD Read Op Latency Distribution', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 12, x: 0, y: 50 }, colorMode='opacity', legendShow=true, @@ -1428,16 +1429,16 @@ local u = import 'utils.libsonnet'; yAxisMin='0', yBucketSize=null, pluginVersion='9.4.7', - ).addTarget(u.addTargetSchema( - expr='rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0', - datasource='${DS_PROMETHEUS}', + ).addTarget($.addTargetSchema( + expr='rate(ceph_osd_op_r_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_r_latency_count{%(matchers)s}[5m]) >= 0' % $.matchers(), + datasource='$datasource', interval='$interval', instant=false, )), - u.heatMapPanel( + $.heatMapPanel( title='OSD Write Op Latency Distribution', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 8, w: 12, x: 12, y: 50 }, colorMode='opacity', legendShow=true, @@ -1479,16 +1480,16 @@ local u = import 'utils.libsonnet'; yAxisMin='0', yBucketSize=null, pluginVersion='9.4.7', - ).addTarget(u.addTargetSchema( - expr='rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0', - datasource='${DS_PROMETHEUS}', + ).addTarget($.addTargetSchema( + expr='rate(ceph_osd_op_w_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_w_latency_count{%(matchers)s}[5m]) >= 0' % $.matchers(), + datasource='$datasource', interval='$interval', legendFormat='', instant=false, )), - u.timeSeriesPanel( + $.timeSeriesPanel( title='Recovery Operations', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 12, x: 0, y: 58 }, fillOpacity=10, pointSize=5, @@ -1510,21 +1511,21 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( - expr='avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='avg(rate(ceph_osd_op_r_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_r_latency_count{%(matchers)s}[5m]) >= 0)' % $.matchers(), + datasource='$datasource', legendFormat='Read', ), - u.addTargetSchema( - expr='avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='avg(rate(ceph_osd_op_w_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_w_latency_count{%(matchers)s}[5m]) >= 0)' % $.matchers(), + datasource='$datasource', legendFormat='Write', ), ]), - u.timeSeriesPanel( + $.timeSeriesPanel( title='AVG OSD Apply + Commit Latency', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 12, x: 12, y: 58 }, fillOpacity=10, pointSize=5, @@ -1547,17 +1548,17 @@ local u = import 'utils.libsonnet'; { color: 'red', value: 80 }, ]) .addTargets([ - u.addTargetSchema( - expr='avg(ceph_osd_apply_latency_ms{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='avg(ceph_osd_apply_latency_ms{%(matchers)s})' % $.matchers(), + datasource='$datasource', legendFormat='apply', interval='$interval', metric='ceph_osd_perf_apply_latency_seconds', step=4, ), - u.addTargetSchema( - expr='avg(ceph_osd_commit_latency_ms{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='avg(ceph_osd_commit_latency_ms{%(matchers)s})' % $.matchers(), + datasource='$datasource', legendFormat='commit', interval='$interval', metric='ceph_osd_perf_commit_latency_seconds', @@ -1566,10 +1567,10 @@ local u = import 'utils.libsonnet'; ]), ]) + { gridPos: { x: 0, y: 44, w: 24, h: 1 } }, - u.addRowSchema(collapse=true, showTitle=true, title='', collapsed=false) + { gridPos: { x: 0, y: 45, w: 24, h: 1 } }, + $.addRowSchema(collapse=true, showTitle=true, title='', collapsed=false) + { gridPos: { x: 0, y: 45, w: 24, h: 1 } }, - u.addTableExtended( - datasource='${DS_PROMETHEUS}', + $.addTableExtended( + datasource='$datasource', title='Ceph Versions', gridPosition={ h: 6, w: 24, x: 0, y: 46 }, options={ @@ -1620,9 +1621,9 @@ local u = import 'utils.libsonnet'; }, }, ]).addTargets([ - u.addTargetSchema( - expr='count by (ceph_version)(ceph_osd_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='count by (ceph_version)(ceph_osd_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', format='table', hide=false, exemplar=false, @@ -1631,9 +1632,9 @@ local u = import 'utils.libsonnet'; legendFormat='OSD Services', range=false, ), - u.addTargetSchema( - expr='count by (ceph_version)(ceph_mon_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='count by (ceph_version)(ceph_mon_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', format='table', hide=false, exemplar=false, @@ -1642,9 +1643,9 @@ local u = import 'utils.libsonnet'; legendFormat='Mon Services', range=false, ), - u.addTargetSchema( - expr='count by (ceph_version)(ceph_mds_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='count by (ceph_version)(ceph_mds_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', format='table', hide=false, exemplar=false, @@ -1652,9 +1653,9 @@ local u = import 'utils.libsonnet'; legendFormat='MDS Services', range=false, ), - u.addTargetSchema( - expr='count by (ceph_version)(ceph_rgw_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='count by (ceph_version)(ceph_rgw_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', format='table', hide=false, exemplar=false, @@ -1663,9 +1664,9 @@ local u = import 'utils.libsonnet'; legendFormat='RGW Services', range=false, ), - u.addTargetSchema( - expr='count by (ceph_version)(ceph_mgr_metadata{})', - datasource='${DS_PROMETHEUS}', + $.addTargetSchema( + expr='count by (ceph_version)(ceph_mgr_metadata{%(matchers)s})' % $.matchers(), + datasource='$datasource', format='table', hide=false, exemplar=false, diff --git a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet index d12d9f4ddc1..11548ef2a9d 100644 --- a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet +++ b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet @@ -36,9 +36,6 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('mds_servers', '$datasource', 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(), @@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'none', 'Reads(-) / Writes (+)', 0, - 'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(), + 'sum(rate(ceph_objecter_op_r{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(), 'Read Ops', 0, 1, @@ -65,7 +62,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 9 ) .addTarget($.addTargetSchema( - 'sum(rate(ceph_objecter_op_w{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(), + 'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(), 'Write Ops' )) .addSeriesOverride( @@ -78,7 +75,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'none', 'Client Requests', 0, - 'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % $.matchers(), + 'ceph_mds_server_handle_client_request{ceph_daemon=~"($mds_servers).*", %(matchers)s}' % $.matchers(), '{{ceph_daemon}}', 12, 1, diff --git a/monitoring/ceph-mixin/dashboards/host.libsonnet b/monitoring/ceph-mixin/dashboards/host.libsonnet index 674f678a170..cf7e045695a 100644 --- a/monitoring/ceph-mixin/dashboards/host.libsonnet +++ b/monitoring/ceph-mixin/dashboards/host.libsonnet @@ -42,12 +42,9 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('osd_hosts', '$datasource', - 'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % $.matchers(), + 'label_values(ceph_osd_metadata{%(matchers)s}, hostname)' % $.matchers(), 1, true, 1, @@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTemplate( $.addTemplateSchema('mon_hosts', '$datasource', - 'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(), + 'label_values(ceph_mon_metadata{%(matchers)s}, hostname)' % $.matchers(), 1, true, 1, @@ -67,7 +64,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTemplate( $.addTemplateSchema('mds_hosts', '$datasource', - 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(), + 'label_values(ceph_mds_inodes{hostname, %(matchers)s})' % $.matchers(), 1, true, 1, @@ -77,7 +74,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTemplate( $.addTemplateSchema('rgw_hosts', '$datasource', - 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(), + 'label_values(ceph_rgw_metadata{hostname, %(matchers)s})' % $.matchers(), 1, true, 1, @@ -188,7 +185,7 @@ local g = import 'grafonnet/grafana.libsonnet'; "instance", "$1", "instance", "([^.:]*).*" ) * on(instance, device) group_left(ceph_daemon) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"}, + ceph_disk_occupation_human{instance=~"($osd_hosts).*", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^.:]*).*" ) @@ -209,17 +206,17 @@ local g = import 'grafonnet/grafana.libsonnet'; ||| sum ( ( - rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or - rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) + rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or + rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) ) unless on (device, instance) - label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)") + label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)") ) + sum ( ( - rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or - rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) + rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or + rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) ) unless on (device, instance) - label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)") + label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)") ) |||, true, @@ -271,7 +268,7 @@ local g = import 'grafonnet/grafana.libsonnet'; rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) ) unless on (device, instance) - label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")) + label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)")) )) |||, '{{instance}}', @@ -313,17 +310,14 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('ceph_hosts', '$datasource', - if $._config.showMultiCluster then ('label_values({%(clusterMatcher)s}, instance)' % $.matchers()) else 'label_values(instance)', + 'label_values({__name__=~"ceph_.+_metadata", %(matchers)s}, hostname)' % $.matchers(), 1, - false, - 3, - 'Hostname', - '([^.:]*).*') + true, + 1, + null, + '([^.]*).*') ) .addPanels([ $.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, @@ -332,7 +326,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'OSDs', '', 'current', - "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(), + 'count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(), null, 'time_series', 0, @@ -532,7 +526,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ||| sum( ceph_osd_stat_bytes{%(matchers)s} and - on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"} + on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s} ) ||| % $.matchers(), null, @@ -709,7 +703,7 @@ local g = import 'grafonnet/grafana.libsonnet'; rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100 ), "instance", "$1", "instance", "([^:.]*).*" ) * on(instance, device) group_left(ceph_daemon) label_replace( - label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}, + label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*" ) ||| % $.matchers(), @@ -786,7 +780,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addTargetSchema( ||| topk(10, - (sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*"})) + (sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*", %(matchers)s})) ) ||| % $.matchers(), '', diff --git a/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet index a7705654008..2559206fbe2 100644 --- a/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet +++ b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet @@ -24,25 +24,30 @@ local g = import 'grafonnet/grafana.libsonnet'; ) ) .addTemplate( - g.template.datasource('DS_PROMETHEUS', 'prometheus', 'default', label='Data Source') + g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') ) .addTemplate( - $.addTemplateSchema('Cluster', - '$DS_PROMETHEUS', - 'label_values(ceph_health_status, cluster)', - 2, - true, - 0, - null, - '', - current='All') + $.addTemplateSchema( + 'cluster', + '$datasource', + 'label_values(ceph_health_status, %s)' % $._config.clusterLabel, + 1, + true, + 1, + 'cluster', + '(.*)', + if !$._config.showMultiCluster then 'variable' else '', + multi=true, + allValues='.*', + ), ) + .addPanels([ $.addRowSchema(false, true, 'Clusters') + { gridPos: { x: 0, y: 1, w: 24, h: 1 } }, $.addStatPanel( title='Status', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 0, y: 2, w: 5, h: 7 }, graphMode='none', colorMode='value', @@ -87,23 +92,23 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets([ $.addTargetSchema( expr='count(ceph_health_status==0) or vector(0)', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', legendFormat='Healthy', ), $.addTargetSchema( expr='count(ceph_health_status==1)', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', legendFormat='Warning' ), $.addTargetSchema( expr='count(ceph_health_status==2)', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', legendFormat='Error' ), ]), $.addTableExtended( - datasource='${DS_PROMETHEUS}', + datasource='$datasource', title='Details', gridPosition={ h: 7, w: 19, x: 5, y: 2 }, options={ @@ -241,7 +246,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ]).addTargets([ $.addTargetSchema( expr='ceph_health_status', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -252,7 +257,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ), $.addTargetSchema( expr='ceph_mgr_metadata', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -262,8 +267,8 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(ALERTS{alertstate="firing", cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -274,7 +279,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ), $.addTargetSchema( expr='sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -285,7 +290,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ), $.addTargetSchema( expr='sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -296,7 +301,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ), $.addTargetSchema( expr='ceph_cluster_by_class_total_used_bytes', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -311,7 +316,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addRowSchema(false, true, 'Overview') + { gridPos: { x: 0, y: 9, w: 24, h: 1 } }, $.addStatPanel( title='Cluster Count', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 0, y: 10, w: 3, h: 4 }, graphMode='none', colorMode='value', @@ -325,8 +330,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='count(ceph_health_status{cluster=~"$Cluster"}) or vector(0)', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(ceph_health_status{cluster=~"$cluster"}) or vector(0)', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -352,15 +357,15 @@ local g = import 'grafonnet/grafana.libsonnet'; { color: 'red', value: 0.85 }, ]) .addTarget($.addTargetSchema( - expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"}) / sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})', + expr='sum(ceph_cluster_total_used_bytes{cluster=~"$cluster"}) / sum(ceph_cluster_total_bytes{cluster=~"$cluster"})', instant=true, legendFormat='Used', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', )), $.addStatPanel( title='Total Capacity', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 7, y: 10, w: 3, h: 4 }, graphMode='area', colorMode='none', @@ -374,8 +379,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(ceph_cluster_total_bytes{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -388,7 +393,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='OSDs', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 10, y: 10, w: 3, h: 4 }, graphMode='area', colorMode='none', @@ -402,8 +407,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='count(ceph_osd_metadata{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(ceph_osd_metadata{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -416,7 +421,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Hosts', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 13, y: 10, w: 3, h: 4 }, graphMode='area', colorMode='none', @@ -430,8 +435,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='count(sum by (hostname) (ceph_osd_metadata{cluster=~"$Cluster"}))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(sum by (hostname) (ceph_osd_metadata{cluster=~"$cluster"}))', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -444,7 +449,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Client IOPS', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 16, y: 10, w: 4, h: 4 }, graphMode='area', colorMode='none', @@ -458,8 +463,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(irate(ceph_pool_wr{cluster=~"$Cluster"}[$__interval]))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(irate(ceph_pool_wr{cluster=~"$cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, instant=false, @@ -467,8 +472,8 @@ local g = import 'grafonnet/grafana.libsonnet'; range=true, ), $.addTargetSchema( - expr='sum(irate(ceph_pool_rd{cluster=~"$Cluster"}[$__interval]))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(irate(ceph_pool_rd{cluster=~"$cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, legendFormat='Read', @@ -478,7 +483,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='OSD Latencies', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 20, y: 10, w: 4, h: 4 }, graphMode='area', colorMode='none', @@ -492,8 +497,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='avg(ceph_osd_apply_latency_ms{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='avg(ceph_osd_apply_latency_ms{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, instant=false, @@ -501,8 +506,8 @@ local g = import 'grafonnet/grafana.libsonnet'; range=true, ), $.addTargetSchema( - expr='avg(ceph_osd_commit_latency_ms{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='avg(ceph_osd_commit_latency_ms{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, legendFormat='Commit', @@ -512,7 +517,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Alert Count', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 0, y: 14, w: 3, h: 4 }, graphMode='none', colorMode='value', @@ -526,8 +531,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"}) or vector(0)', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(ALERTS{alertstate="firing", cluster=~"$cluster"}) or vector(0)', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -540,7 +545,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Total Used', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 7, y: 14, w: 3, h: 4 }, graphMode='area', colorMode='none', @@ -554,8 +559,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(ceph_cluster_total_used_bytes{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -568,7 +573,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Capacity Prediction', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 10, y: 14, w: 3, h: 4 }, graphMode='none', colorMode='none', @@ -583,7 +588,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets([ $.addTargetSchema( expr='predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~"${Cluster}"}[1d]))[7d:1h],120)', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, legendFormat='__auto', @@ -593,7 +598,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Pools', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 13, y: 14, w: 3, h: 4 }, graphMode='area', colorMode='none', @@ -607,8 +612,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='count(ceph_pool_metadata{cluster=~"$Cluster"})', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='count(ceph_pool_metadata{cluster=~"$cluster"})', + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -621,7 +626,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Client Bandwidth', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 16, y: 14, w: 4, h: 4 }, graphMode='area', colorMode='none', @@ -635,8 +640,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(irate(ceph_pool_rd_bytes{cluster=~"$Cluster"}[$__interval]))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(irate(ceph_pool_rd_bytes{cluster=~"$cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, instant=false, @@ -644,8 +649,8 @@ local g = import 'grafonnet/grafana.libsonnet'; range=true, ), $.addTargetSchema( - expr='sum(irate(ceph_pool_wr_bytes{cluster=~"$Cluster"}[$__interval]))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(irate(ceph_pool_wr_bytes{cluster=~"$cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, legendFormat='Read', @@ -655,7 +660,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addStatPanel( title='Recovery Rate', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 20, y: 14, w: 4, h: 4 }, graphMode='area', colorMode='none', @@ -669,8 +674,8 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(irate(ceph_osd_recovery_ops{cluster=~"$Cluster"}[$__interval]))', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='sum(irate(ceph_osd_recovery_ops{cluster=~"$cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '$datasource' }, hide=false, exemplar=false, instant=false, @@ -684,7 +689,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addPanels([ $.addStatPanel( title='Status', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ x: 0, y: 19, w: 5, h: 7 }, graphMode='area', colorMode='value', @@ -719,15 +724,15 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='count(ALERTS{alertstate="firing",severity="critical", cluster=~"$Cluster"}) OR vector(0)', - datasource='${DS_PROMETHEUS}', + expr='count(ALERTS{alertstate="firing",severity="critical", cluster=~"$cluster"}) OR vector(0)', + datasource='$datasource', legendFormat='Critical', instant=true, range=false ), $.addTargetSchema( - expr='count(ALERTS{alertstate="firing",severity="warning", cluster=~"$Cluster"}) OR vector(0)', - datasource='${DS_PROMETHEUS}', + expr='count(ALERTS{alertstate="firing",severity="warning", cluster=~"$cluster"}) OR vector(0)', + datasource='$datasource', legendFormat='Warning', instant=true, range=false @@ -736,7 +741,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addTableExtended( - datasource='${DS_PROMETHEUS}', + datasource='$datasource', title='Alerts', gridPosition={ h: 7, w: 19, x: 5, y: 19 }, options={ @@ -801,8 +806,8 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='ALERTS{alertstate="firing", cluster=~"$Cluster"}', - datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + expr='ALERTS{alertstate="firing", %(matchers)s}}' % $.matchers(), + datasource={ type: 'prometheus', uid: '$datasource' }, format='table', hide=false, exemplar=false, @@ -844,7 +849,7 @@ local g = import 'grafonnet/grafana.libsonnet'; drawStyle='line', axisPlacement='auto', title='Top 5 - Capacity Utilization(%)', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 0, y: 30 }, fillOpacity=0, pointSize=5, @@ -869,7 +874,7 @@ local g = import 'grafonnet/grafana.libsonnet'; [ $.addTargetSchema( expr='topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=false, legendFormat='{{cluster}}', step=300, @@ -885,7 +890,7 @@ local g = import 'grafonnet/grafana.libsonnet'; drawStyle='line', axisPlacement='auto', title='Top 5 - Cluster IOPS', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 8, y: 30 }, fillOpacity=0, pointSize=5, @@ -910,7 +915,7 @@ local g = import 'grafonnet/grafana.libsonnet'; [ $.addTargetSchema( expr='topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', instant=false, legendFormat='{{cluster}}', step=300, @@ -926,7 +931,7 @@ local g = import 'grafonnet/grafana.libsonnet'; drawStyle='line', axisPlacement='auto', title='Top 10 - Capacity Utilization(%) by Pool', - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={ h: 7, w: 8, x: 16, y: 30 }, fillOpacity=0, pointSize=5, @@ -950,8 +955,8 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='topk(10, ceph_pool_bytes_used{cluster=~"$Cluster"}/ceph_pool_max_avail{cluster=~"$Cluster"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~"$Cluster"})', - datasource='${DS_PROMETHEUS}', + expr='topk(10, ceph_pool_bytes_used{%(matchers)s}}/ceph_pool_max_avail{%(matchers)s}} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{%(matchers)s}})' % $.matchers(), + datasource='$datasource', instant=false, legendFormat='{{cluster}} - {{name}}', step=300, diff --git a/monitoring/ceph-mixin/dashboards/osd.libsonnet b/monitoring/ceph-mixin/dashboards/osd.libsonnet index ca25b0630dd..2b066ea5ff0 100644 --- a/monitoring/ceph-mixin/dashboards/osd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/osd.libsonnet @@ -42,9 +42,6 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTemplate( $.addClusterTemplate() ) - .addTemplate( - $.addJobTemplate() - ) .addPanels([ $.simpleGraphPanel( { '@95%ile': '#e0752d' }, @@ -317,7 +314,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'count(ceph_bluefs_wal_total_bytes{%(matchers)s})' % $.matchers(), 'bluestore', 'time_series', 2 )) .addTarget($.addTargetSchema( - 'absent(ceph_bluefs_wal_total_bytes{job=~"$job"}) * count(ceph_osd_metadata{job=~"$job"})' % $.matchers(), 'filestore', 'time_series', 2 + 'absent(ceph_bluefs_wal_total_bytes{%(matchers)s}) * count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), 'filestore', 'time_series', 2 )), $.pieChartPanel('OSD Size Summary', 'The pie chart shows the various OSD sizes used within the cluster', '$datasource', { x: 8, y: 8, w: 4, h: 8 }, 'table', 'bottom', true, ['percent'], { mode: 'single', sort: 'none' }, 'pie', ['percent', 'value'], 'palette-classic') .addTarget($.addTargetSchema( @@ -555,9 +552,6 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('osd', '$datasource', 'label_values(ceph_osd_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(), @@ -577,11 +571,11 @@ local g = import 'grafonnet/grafana.libsonnet'; 's', 'Read (-) / Write (+)', ||| - rate(ceph_osd_op_r_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) / + rate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) / on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) ||| % $.matchers(), ||| - rate(ceph_osd_op_w_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) / + rate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) / on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) ||| % $.matchers(), 'read', @@ -602,8 +596,8 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'short', 'Read (-) / Write (+)', - 'rate(ceph_osd_op_r{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(), - 'rate(ceph_osd_op_w{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(), + 'rate(ceph_osd_op_r{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(), + 'rate(ceph_osd_op_w{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(), 'Reads', 'Writes', 6, @@ -619,8 +613,8 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'bytes', 'Read (-) / Write (+)', - 'rate(ceph_osd_op_r_out_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(), - 'rate(ceph_osd_op_w_in_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(), + 'rate(ceph_osd_op_r_out_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(), + 'rate(ceph_osd_op_w_in_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(), 'Read Bytes', 'Write Bytes', 12, @@ -640,12 +634,12 @@ local g = import 'grafonnet/grafana.libsonnet'; ||| ( label_replace( - rate(node_disk_read_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) / - rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]), + rate(node_disk_read_time_seconds_total[$__rate_interval]) / + rate(node_disk_reads_completed_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) @@ -654,12 +648,12 @@ local g = import 'grafonnet/grafana.libsonnet'; ||| ( label_replace( - rate(node_disk_write_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) / - rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]), + rate(node_disk_write_time_seconds_total[$__rate_interval]) / + rate(node_disk_writes_completed_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)" + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) ) @@ -681,22 +675,22 @@ local g = import 'grafonnet/grafana.libsonnet'; 'Read (-) / Write (+)', ||| label_replace( - rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]), + rate(node_disk_writes_completed_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) ||| % $.matchers(), ||| label_replace( - rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]), + rate(node_disk_reads_completed_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) @@ -718,20 +712,20 @@ local g = import 'grafonnet/grafana.libsonnet'; 'Read (-) / Write (+)', ||| label_replace( - rate(node_disk_read_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" + rate(node_disk_read_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) ||| % $.matchers(), ||| label_replace( - rate(node_disk_written_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" + rate(node_disk_written_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) @@ -763,11 +757,11 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTarget($.addTargetSchema( ||| label_replace( - rate(node_disk_io_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]), + rate(node_disk_io_time_seconds_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*" ) and on (instance, device) label_replace( label_replace( - ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)" + ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)" ), "instance", "$1", "instance", "([^:.]*).*" ) ||| % $.matchers(), diff --git a/monitoring/ceph-mixin/dashboards/pool.libsonnet b/monitoring/ceph-mixin/dashboards/pool.libsonnet index 87839963fbc..06832114046 100644 --- a/monitoring/ceph-mixin/dashboards/pool.libsonnet +++ b/monitoring/ceph-mixin/dashboards/pool.libsonnet @@ -30,9 +30,6 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( g.template.custom(label='TopK', name='topk', current='15', @@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'Pools with Compression', 'Count of the pools that have compression enabled', 'current', - 'count(ceph_pool_metadata{%(matchers)s, compression_mode!="none"})' % $.matchers(), + 'count(ceph_pool_metadata{compression_mode!="none", %(matchers)s})' % $.matchers(), null, '', 3, @@ -510,7 +507,7 @@ local g = import 'grafonnet/grafana.libsonnet'; true ), $.addTargetSchema( - 'ceph_pool_metadata{%(matchers)s, compression_mode!="none"}' % $.matchers(), 'K', 'table', 1, true + 'ceph_pool_metadata{compression_mode!="none", %(matchers)s}' % $.matchers(), 'K', 'table', 1, true ), $.addTargetSchema('', 'L', '', '', null), ] @@ -623,9 +620,6 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('pool_name', '$datasource', 'label_values(ceph_pool_metadata{%(matchers)s}, name)' % $.matchers(), @@ -648,7 +642,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '.7,.8', ||| (ceph_pool_stored{%(matchers)s} / (ceph_pool_stored{%(matchers)s} + ceph_pool_max_avail{%(matchers)s})) * - on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'time_series', 0, @@ -668,7 +662,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'current', ||| (ceph_pool_max_avail{%(matchers)s} / deriv(ceph_pool_stored{%(matchers)s}[6h])) * - on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} > 0 + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} > 0 ||| % $.matchers(), 'time_series', 7, @@ -689,7 +683,7 @@ local g = import 'grafonnet/grafana.libsonnet'; null, ||| deriv(ceph_pool_objects{%(matchers)s}[1m]) * - on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'Objects per second', 12, @@ -709,7 +703,7 @@ local g = import 'grafonnet/grafana.libsonnet'; null, ||| rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) * - on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'reads', 0, @@ -722,7 +716,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addTargetSchema( ||| rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) * - on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'writes' ) @@ -739,7 +733,7 @@ local g = import 'grafonnet/grafana.libsonnet'; null, ||| rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) + - on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'reads', 12, @@ -752,7 +746,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addTargetSchema( ||| rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval]) + - on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'writes' ) @@ -769,7 +763,7 @@ local g = import 'grafonnet/grafana.libsonnet'; null, ||| ceph_pool_objects{%(matchers)s} * - on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} ||| % $.matchers(), 'Number of Objects', 0, diff --git a/monitoring/ceph-mixin/dashboards/rbd.libsonnet b/monitoring/ceph-mixin/dashboards/rbd.libsonnet index d8cf6cf8485..bcb8a28cfe5 100644 --- a/monitoring/ceph-mixin/dashboards/rbd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rbd.libsonnet @@ -1,5 +1,4 @@ local g = import 'grafonnet/grafana.libsonnet'; -local u = import 'utils.libsonnet'; local info_rbd_stats = std.join( '', @@ -68,22 +67,20 @@ local info_rbd_stats = std.join( $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('pool', '$datasource', - 'label_values(pool)', + 'label_values(ceph_rbd_read_ops{%(matchers)s}, pool)' % $.matchers(), 1, false, 0, '', '') ) + .addTemplate( $.addTemplateSchema('image', '$datasource', - 'label_values(image)', + 'label_values(ceph_rbd_read_ops{%(matchers)s, pool="$pool"}, image)' % $.matchers(), 1, false, 0, @@ -95,9 +92,9 @@ local info_rbd_stats = std.join( 'IOPS', info_rbd_stats, 'iops', - 'rate(ceph_rbd_write_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers() + 'rate(ceph_rbd_write_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers() , - 'rate(ceph_rbd_read_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(), + 'rate(ceph_rbd_read_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(), 0, 0, 8, @@ -107,8 +104,8 @@ local info_rbd_stats = std.join( 'Throughput', info_rbd_stats, 'Bps', - 'rate(ceph_rbd_write_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(), - 'rate(ceph_rbd_read_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(), + 'rate(ceph_rbd_write_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(), + 'rate(ceph_rbd_read_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(), 8, 0, 8, @@ -119,12 +116,12 @@ local info_rbd_stats = std.join( info_rbd_stats, 'ns', ||| - rate(ceph_rbd_write_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) / - rate(ceph_rbd_write_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) + rate(ceph_rbd_write_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) / + rate(ceph_rbd_write_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) ||| % $.matchers(), ||| - rate(ceph_rbd_read_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) / - rate(ceph_rbd_read_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) + rate(ceph_rbd_read_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) / + rate(ceph_rbd_read_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) ||| % $.matchers(), 16, 0, @@ -204,9 +201,6 @@ local info_rbd_stats = std.join( .addTemplate( $.addClusterTemplate() ) - .addTemplate( - $.addJobTemplate() - ) .addPanels([ RbdOverviewPanel( 'IOPS', diff --git a/monitoring/ceph-mixin/dashboards/rgw-s3-analytics.libsonnet b/monitoring/ceph-mixin/dashboards/rgw-s3-analytics.libsonnet index c470dbe5f19..720ffcb60a1 100644 --- a/monitoring/ceph-mixin/dashboards/rgw-s3-analytics.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw-s3-analytics.libsonnet @@ -29,9 +29,13 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTemplate( + $.addClusterTemplate() + ) + + .addTemplate( $.addTemplateSchema('rgw_servers', '$datasource', - 'label_values(ceph_rgw_metadata{}, ceph_daemon)', + 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(), 2, true, 0, @@ -63,7 +67,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='__auto', range=true @@ -87,7 +91,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='__auto', range=true @@ -111,7 +115,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='__auto', range=true @@ -135,7 +139,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ]) .addTargets([ $.addTargetSchema( - expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='__auto', range=true @@ -151,48 +155,48 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='List Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='List Buckets', range=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Put Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Get Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Delete Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Delete Buckets', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Copy Objects', range=true @@ -210,28 +214,28 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Put Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Get Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Delete Objects', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Copy Objects', range=true @@ -248,48 +252,48 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='List Object', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='List Bucket', range=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Put Object', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Get Object', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Delete Object', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Delete Bucket', range=false, instant=true ), $.addTargetSchema( - expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', legendFormat='Copy Object', range=true @@ -443,7 +447,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -454,7 +458,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -465,7 +469,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -476,7 +480,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -646,7 +650,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -657,7 +661,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -668,7 +672,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -679,7 +683,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -690,7 +694,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -850,7 +854,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -861,7 +865,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -872,7 +876,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -883,7 +887,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1053,7 +1057,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1064,7 +1068,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1075,7 +1079,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1086,7 +1090,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1097,7 +1101,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1121,7 +1125,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{bucket}}', range=false, @@ -1140,7 +1144,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{bucket}}', range=false, @@ -1159,7 +1163,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{bucket}}', range=false, @@ -1178,7 +1182,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{bucket}}', range=false, @@ -1220,7 +1224,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1264,7 +1268,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1308,7 +1312,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1352,7 +1356,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1396,7 +1400,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1440,7 +1444,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1484,7 +1488,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1528,7 +1532,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1572,7 +1576,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1741,7 +1745,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1752,7 +1756,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1763,7 +1767,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1774,7 +1778,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1785,7 +1789,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -1810,7 +1814,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n', + expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{user}}', range=false, @@ -1829,7 +1833,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n', + expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{user}}', range=false, @@ -1848,7 +1852,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{user}}', range=false, @@ -1867,7 +1871,7 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addTargets([ $.addTargetSchema( - expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)', + expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(), datasource='${datasource}', legendFormat='{{ceph_daemon}} - {{user}}', range=false, @@ -1909,7 +1913,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1953,7 +1957,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -1997,7 +2001,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2041,7 +2045,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2085,7 +2089,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2129,7 +2133,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2173,7 +2177,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2217,7 +2221,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2261,7 +2265,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTargets( [ $.addTargetSchema( - expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource='${datasource}', format='time_series', instant=false, @@ -2386,7 +2390,7 @@ local g = import 'grafonnet/grafana.libsonnet'; }, ]).addTargets([ $.addTargetSchema( - expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -2397,7 +2401,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -2408,7 +2412,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -2419,7 +2423,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, @@ -2430,7 +2434,7 @@ local g = import 'grafonnet/grafana.libsonnet'; range=false, ), $.addTargetSchema( - expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})', + expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(), datasource={ type: 'prometheus', uid: '${datasource}' }, format='table', hide=false, diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet index 56ba1f0a749..c7c447ca71a 100644 --- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet @@ -1,5 +1,4 @@ local g = import 'grafonnet/grafana.libsonnet'; -local u = import 'utils.libsonnet'; (import 'utils.libsonnet') { 'radosgw-sync-overview.json': @@ -59,9 +58,7 @@ local u = import 'utils.libsonnet'; .addTemplate( $.addClusterTemplate() ) - .addTemplate( - $.addJobTemplate() - ) + .addTemplate( $.addTemplateSchema( 'rgw_servers', @@ -70,8 +67,8 @@ local u = import 'utils.libsonnet'; 1, true, 1, - '', - 'RGW Server' + null, + 'rgw.(.*)' ) ) .addPanels([ @@ -197,9 +194,6 @@ local u = import 'utils.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema( 'rgw_servers', '$datasource', @@ -208,7 +202,7 @@ local u = import 'utils.libsonnet'; true, 1, '', - 'RGW Server' + '.*' ) ) .addTemplate( @@ -714,9 +708,6 @@ local u = import 'utils.libsonnet'; $.addClusterTemplate() ) .addTemplate( - $.addJobTemplate() - ) - .addTemplate( $.addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(), @@ -738,13 +729,13 @@ local u = import 'utils.libsonnet'; sum by (instance_id) ( rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) / rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval]) - ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), ||| sum by (instance_id) ( rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) / rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval]) - ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'GET {{ceph_daemon}}', 'PUT {{ceph_daemon}}', @@ -761,12 +752,12 @@ local u = import 'utils.libsonnet'; 'short', ||| rate(ceph_rgw_op_get_obj_bytes{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), ||| rate(ceph_rgw_op_put_obj_bytes{%(matchers)s}[$__rate_interval]) * on (instance_id) group_left (ceph_daemon) - ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'GETs {{ceph_daemon}}', 'PUTs {{ceph_daemon}}', @@ -789,11 +780,11 @@ local u = import 'utils.libsonnet'; 'short', ||| rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s,ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), ||| rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'Requests Failed {{ceph_daemon}}', 'GETs {{ceph_daemon}}', @@ -807,7 +798,7 @@ local u = import 'utils.libsonnet'; $.addTargetSchema( ||| rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'PUTs {{ceph_daemon}}' ), @@ -819,7 +810,7 @@ local u = import 'utils.libsonnet'; rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) + rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) ) - ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'Other {{ceph_daemon}}' ), @@ -874,21 +865,21 @@ local u = import 'utils.libsonnet'; .addTarget($.addTargetSchema( ||| rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'Failures {{ceph_daemon}}' )) .addTarget($.addTargetSchema( ||| rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'GETs {{ceph_daemon}}' )) .addTarget($.addTargetSchema( ||| rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'PUTs {{ceph_daemon}}' )) @@ -901,7 +892,7 @@ local u = import 'utils.libsonnet'; rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) ) ) * on (instance_id) group_left (ceph_daemon) - ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"} + ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s} ||| % $.matchers(), 'Other (DELETE,LIST) {{ceph_daemon}}' )), diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index 58768b4f40a..333a444dda1 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -220,44 +220,28 @@ local timeSeries = import 'timeseries_panel.libsonnet'; unit: unit, valueMaps: valueMaps, }, + matchers():: - local jobMatcher = 'job=~"$job"'; local clusterMatcher = '%s=~"$cluster"' % $._config.clusterLabel; { // Common labels - jobMatcher: jobMatcher, - clusterMatcher: (if $._config.showMultiCluster then clusterMatcher else ''), - matchers: jobMatcher + - (if $._config.showMultiCluster then ', ' + clusterMatcher else ''), + matchers: (if $._config.showMultiCluster then clusterMatcher + ', ' else ''), }, + addClusterTemplate():: $.addTemplateSchema( 'cluster', '$datasource', - 'label_values(ceph_osd_metadata, %s)' % $._config.clusterLabel, + 'label_values(ceph_health_status, %s)' % $._config.clusterLabel, 1, - true, + false, 1, 'cluster', '(.*)', if !$._config.showMultiCluster then 'variable' else '', - multi=true, - allValues='.+', - ), - - addJobTemplate():: - $.addTemplateSchema( - 'job', - '$datasource', - 'label_values(ceph_osd_metadata{%(clusterMatcher)s}, job)' % $.matchers(), - 1, - true, - 1, - 'job', - '(.*)', - multi=true, - allValues='.+', + multi=false, + allValues=null, ), overviewStyle(alias, @@ -495,7 +479,7 @@ local timeSeries = import 'timeseries_panel.libsonnet'; addGaugePanel(title='', description='', transparent=false, - datasource='${DS_PROMETHEUS}', + datasource='$datasource', gridPosition={}, pluginVersion='9.1.3', unit='percentunit', diff --git a/monitoring/ceph-mixin/dashboards_out/.lint b/monitoring/ceph-mixin/dashboards_out/.lint deleted file mode 100644 index 6352e858f28..00000000000 --- a/monitoring/ceph-mixin/dashboards_out/.lint +++ /dev/null @@ -1,5 +0,0 @@ -exclusions: - template-instance-rule: - reason: "Instance template not needed because of ceph-mgr leader election." - target-instance-rule: - reason: "Instance matcher not needed because of ceph-mgr leader election." diff --git a/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json b/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json index 216e02ed1a3..ff31ebf23ba 100644 --- a/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json +++ b/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json @@ -70,7 +70,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -148,8 +148,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_health_status{}", + "datasource": "$datasource", + "expr": "ceph_health_status{cluster=~\"$cluster\", }", "format": "time_series", "instant": true, "interval": "$interval", @@ -164,7 +164,7 @@ "type": "stat" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -226,8 +226,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}", + "datasource": "$datasource", + "expr": "(ceph_cluster_total_bytes{cluster=~\"$cluster\", }-ceph_cluster_total_used_bytes{cluster=~\"$cluster\", })/ceph_cluster_total_bytes{cluster=~\"$cluster\", }", "format": "time_series", "instant": true, "interval": "$interval", @@ -243,7 +243,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -306,8 +306,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_cluster_total_bytes{}", + "datasource": "$datasource", + "expr": "ceph_cluster_total_bytes{cluster=~\"$cluster\", }", "format": "time_series", "instant": true, "interval": "$interval", @@ -323,7 +323,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -377,8 +377,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{cluster=~\"$cluster\", }[5m]))", "format": "time_series", "instant": true, "interval": "$interval", @@ -393,7 +393,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -455,8 +455,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{cluster=~\"$cluster\", }[5m]))", "format": "time_series", "instant": true, "interval": "$interval", @@ -478,7 +478,7 @@ "warn": "rgba(237, 129, 40, 0.9)" }, "cornerRadius": 0, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -536,12 +536,12 @@ { "aggregation": "Last", "alias": "All", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_osd_metadata)", + "expr": "count(ceph_osd_metadata{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -553,12 +553,12 @@ { "aggregation": "Last", "alias": "In", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_osd_in)", + "expr": "count(ceph_osd_in{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -570,12 +570,12 @@ { "aggregation": "Last", "alias": "Out", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Warning / Critical", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_in == bool 0)", + "expr": "sum(ceph_osd_in{cluster=~\"$cluster\", } == bool 0)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -588,12 +588,12 @@ { "aggregation": "Last", "alias": "Up", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_up)", + "expr": "sum(ceph_osd_up{cluster=~\"$cluster\", })", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -605,12 +605,12 @@ { "aggregation": "Last", "alias": "Down", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Warning / Critical", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_up == bool 0)", + "expr": "sum(ceph_osd_up{cluster=~\"$cluster\", } == bool 0)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -634,7 +634,7 @@ "warn": "rgba(237, 129, 40, 0.9)" }, "cornerRadius": 1, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -692,12 +692,12 @@ { "aggregation": "Last", "alias": "Active", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mgr_status == 1) or vector(0)", + "expr": "count(ceph_mgr_status{cluster=~\"$cluster\", } == 1) or vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -709,12 +709,12 @@ { "aggregation": "Last", "alias": "Standby", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mgr_status == 0) or vector(0)", + "expr": "count(ceph_mgr_status{cluster=~\"$cluster\", } == 0) or vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -737,7 +737,7 @@ "warn": "rgba(237, 129, 40, 0.9)" }, "cornerRadius": 1, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -827,12 +827,12 @@ { "aggregation": "Last", "alias": "Active", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\"}) OR vector(0)", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\", cluster=~\"$cluster\", }) OR vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -844,12 +844,12 @@ { "aggregation": "Last", "alias": "Standby", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\"}) OR vector(0)", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\", cluster=~\"$cluster\", }) OR vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -865,7 +865,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -929,8 +929,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_cluster_total_used_bytes{}", + "datasource": "$datasource", + "expr": "ceph_cluster_total_used_bytes{cluster=~\"$cluster\", }", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -944,7 +944,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -1000,8 +1000,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_w{}[1m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_w{cluster=~\"$cluster\", }[1m]))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1015,7 +1015,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -1079,8 +1079,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_r{}[1m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_r{cluster=~\"$cluster\", }[1m]))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1101,7 +1101,7 @@ "warn": "rgba(237, 129, 40, 0.9)" }, "cornerRadius": 1, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "displayName": "", "fieldConfig": { @@ -1159,12 +1159,12 @@ { "aggregation": "Last", "alias": "In Quorum", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_mon_quorum_status)", + "expr": "sum(ceph_mon_quorum_status{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "In Quorum", @@ -1176,12 +1176,12 @@ "aggregation": "Last", "alias": "Total", "crit": 1, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mon_quorum_status)", + "expr": "count(ceph_mon_quorum_status{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", @@ -1194,12 +1194,12 @@ "aggregation": "Last", "alias": "MONs out of Quorum", "crit": 1.6000000000000001, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "decimals": 2, "displayAliasType": "Warning / Critical", "displayType": "Annotation", "displayValueWithAlias": "Never", - "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)", + "expr": "count(ceph_mon_quorum_status{cluster=~\"$cluster\", }) - sum(ceph_mon_quorum_status{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "MONs out of Quorum", @@ -1248,7 +1248,7 @@ "limit": 10, "onlyAlertsOnDashboard": true, "options": { - "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\"}", + "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\", cluster=~\"$cluster\", }", "alertName": "", "dashboardAlerts": false, "groupBy": [ ], @@ -1271,7 +1271,7 @@ "type": "alertlist" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1393,8 +1393,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_cluster_total_bytes{}", + "datasource": "$datasource", + "expr": "ceph_cluster_total_bytes{cluster=~\"$cluster\", }", "format": "time_series", "instant": false, "interval": "$interval", @@ -1405,8 +1405,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_cluster_total_used_bytes{}", + "datasource": "$datasource", + "expr": "ceph_cluster_total_used_bytes{cluster=~\"$cluster\", }", "format": "time_series", "instant": false, "interval": "$interval", @@ -1421,7 +1421,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1502,8 +1502,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{cluster=~\"$cluster\", }[5m]))", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -1513,8 +1513,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{cluster=~\"$cluster\", }[5m]))", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -1528,7 +1528,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1609,8 +1609,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_w{}[1m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_w{cluster=~\"$cluster\", }[1m]))", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -1620,8 +1620,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_op_r{}[1m]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_op_r{cluster=~\"$cluster\", }[1m]))", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -1635,7 +1635,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1711,8 +1711,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "datasource": "$datasource", + "expr": "(ceph_pool_bytes_used{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -1725,7 +1725,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1813,8 +1813,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "datasource": "$datasource", + "expr": "(ceph_pool_stored_raw{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "time_series", "hide": false, "interval": "", @@ -1829,7 +1829,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1905,8 +1905,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "datasource": "$datasource", + "expr": "(ceph_pool_quota_objects{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -1919,7 +1919,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1995,8 +1995,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "datasource": "$datasource", + "expr": "(ceph_pool_quota_bytes{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2009,7 +2009,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2085,8 +2085,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "datasource": "$datasource", + "expr": "(ceph_pool_objects{cluster=~\"$cluster\", }) * on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2117,7 +2117,7 @@ "type": "row" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2208,8 +2208,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pool_objects)", + "datasource": "$datasource", + "expr": "sum(ceph_pool_objects{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2223,7 +2223,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2316,8 +2316,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_active{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_active{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2326,8 +2326,8 @@ "refId": "A" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_clean{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_clean{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2336,8 +2336,8 @@ "refId": "B" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_peering{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_peering{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2346,8 +2346,8 @@ "refId": "C" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_degraded{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_degraded{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2357,8 +2357,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_stale{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_stale{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2368,8 +2368,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_unclean_pgs{})", + "datasource": "$datasource", + "expr": "sum(ceph_unclean_pgs{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2379,8 +2379,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_undersized{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_undersized{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2390,8 +2390,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_incomplete{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_incomplete{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2400,8 +2400,8 @@ "refId": "H" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_forced_backfill{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_forced_backfill{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2410,8 +2410,8 @@ "refId": "I" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_forced_recovery{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_forced_recovery{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2420,8 +2420,8 @@ "refId": "J" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_creating{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_creating{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2430,8 +2430,8 @@ "refId": "K" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_wait_backfill{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_wait_backfill{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2440,8 +2440,8 @@ "refId": "L" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_deep{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_deep{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2450,8 +2450,8 @@ "refId": "M" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_scrubbing{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_scrubbing{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2460,8 +2460,8 @@ "refId": "N" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_recovering{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_recovering{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2470,8 +2470,8 @@ "refId": "O" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_repair{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_repair{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2480,8 +2480,8 @@ "refId": "P" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_down{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_down{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2490,8 +2490,8 @@ "refId": "Q" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_peered{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_peered{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2500,8 +2500,8 @@ "refId": "R" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_backfill{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_backfill{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2510,8 +2510,8 @@ "refId": "S" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_remapped{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_remapped{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2520,8 +2520,8 @@ "refId": "T" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_backfill_toofull{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_backfill_toofull{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2534,7 +2534,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2628,8 +2628,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_degraded{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_degraded{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2639,8 +2639,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_stale{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_stale{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2650,8 +2650,8 @@ "step": 300 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_pg_undersized{})", + "datasource": "$datasource", + "expr": "sum(ceph_pg_undersized{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2665,7 +2665,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2740,8 +2740,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(irate(ceph_osd_recovery_ops{}[$interval]))", + "datasource": "$datasource", + "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$cluster\", }[$interval]))", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -2777,7 +2777,7 @@ "mode": "opacity" }, "dataFormat": "timeseries", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -2855,8 +2855,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_osd_apply_latency_ms{}", + "datasource": "$datasource", + "expr": "ceph_osd_apply_latency_ms{cluster=~\"$cluster\", }", "format": "time_series", "instant": false, "interval": "$interval", @@ -2902,7 +2902,7 @@ "mode": "opacity" }, "dataFormat": "timeseries", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -2979,8 +2979,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "ceph_osd_commit_latency_ms{}", + "datasource": "$datasource", + "expr": "ceph_osd_commit_latency_ms{cluster=~\"$cluster\", }", "format": "time_series", "instant": false, "interval": "$interval", @@ -3026,7 +3026,7 @@ "mode": "opacity" }, "dataFormat": "timeseries", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -3104,8 +3104,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0", + "datasource": "$datasource", + "expr": "rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[5m]) >= 0", "format": "time_series", "instant": false, "interval": "$interval", @@ -3151,7 +3151,7 @@ "mode": "opacity" }, "dataFormat": "timeseries", - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -3229,8 +3229,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0", + "datasource": "$datasource", + "expr": "rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[5m]) >= 0", "format": "time_series", "instant": false, "interval": "$interval", @@ -3264,7 +3264,7 @@ "yBucketSize": null }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -3339,16 +3339,16 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)", + "datasource": "$datasource", + "expr": "avg(rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[5m]) >= 0)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", "refId": "A" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)", + "datasource": "$datasource", + "expr": "avg(rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[5m]) >= 0)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", @@ -3359,7 +3359,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -3437,8 +3437,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "avg(ceph_osd_apply_latency_ms{})", + "datasource": "$datasource", + "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -3448,8 +3448,8 @@ "step": 4 }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "avg(ceph_osd_commit_latency_ms{})", + "datasource": "$datasource", + "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$cluster\", })", "format": "time_series", "interval": "$interval", "intervalFactor": 1, @@ -3492,7 +3492,7 @@ }, { "columns": [ ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -3552,9 +3552,9 @@ "styles": "", "targets": [ { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "exemplar": false, - "expr": "count by (ceph_version)(ceph_osd_metadata{})", + "expr": "count by (ceph_version)(ceph_osd_metadata{cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3565,9 +3565,9 @@ "refId": "A" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "exemplar": false, - "expr": "count by (ceph_version)(ceph_mon_metadata{})", + "expr": "count by (ceph_version)(ceph_mon_metadata{cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3578,9 +3578,9 @@ "refId": "B" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "exemplar": false, - "expr": "count by (ceph_version)(ceph_mds_metadata{})", + "expr": "count by (ceph_version)(ceph_mds_metadata{cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3590,9 +3590,9 @@ "refId": "C" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "exemplar": false, - "expr": "count by (ceph_version)(ceph_rgw_metadata{})", + "expr": "count by (ceph_version)(ceph_rgw_metadata{cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3603,9 +3603,9 @@ "refId": "D" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "exemplar": false, - "expr": "count by (ceph_version)(ceph_mgr_metadata{})", + "expr": "count by (ceph_version)(ceph_mgr_metadata{cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3655,12 +3655,12 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": "Data Source", - "name": "DS_PROMETHEUS", + "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, @@ -3668,6 +3668,26 @@ "type": "datasource" }, { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_health_status, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { "auto": true, "auto_count": 10, "auto_min": "1m", @@ -3788,5 +3808,6 @@ }, "timezone": "", "title": "Ceph Cluster - Advanced", + "uid": "dn13KBeTv", "version": 0 } diff --git a/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json index 50c850ab197..f65ce4da613 100644 --- a/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json @@ -113,14 +113,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read Ops", "refId": "A" }, { - "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write Ops", @@ -215,7 +215,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}", + "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ceph_daemon}}", @@ -283,36 +283,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -332,7 +312,7 @@ "multi": false, "name": "mds_servers", "options": [ ], - "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_mds_inodes{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/host-details.json b/monitoring/ceph-mixin/dashboards_out/host-details.json index e85bc64ec83..ef357d34b64 100644 --- a/monitoring/ceph-mixin/dashboards_out/host-details.json +++ b/monitoring/ceph-mixin/dashboards_out/host-details.json @@ -123,7 +123,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))", + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{cluster=~\"$cluster\", }))", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -651,7 +651,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", + "expr": "sum(\n ceph_osd_stat_bytes{cluster=~\"$cluster\", } and\n on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", }\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -854,14 +854,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) writes", "refId": "A" }, { - "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", },\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) reads", @@ -961,14 +961,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) write", "refId": "A" }, { - "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) read", @@ -1158,7 +1158,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}})", @@ -1304,7 +1304,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"}))\n)\n", + "expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\", cluster=~\"$cluster\", }))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1357,36 +1357,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -1401,15 +1381,15 @@ "current": { }, "datasource": "$datasource", "hide": 0, - "includeAll": false, - "label": "Hostname", + "includeAll": true, + "label": null, "multi": false, "name": "ceph_hosts", "options": [ ], - "query": "label_values(instance)", + "query": "label_values({__name__=~\"ceph_.+_metadata\", cluster=~\"$cluster\", }, hostname)", "refresh": 1, - "regex": "([^.:]*).*", - "sort": 3, + "regex": "([^.]*).*", + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", diff --git a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json index ef8f12abe93..adbf676f5e9 100644 --- a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json @@ -104,7 +104,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))", + "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$cluster\", }))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -436,7 +436,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", + "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($osd_hosts).*\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -519,7 +519,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n", + "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -687,7 +687,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n", + "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -755,36 +755,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -804,7 +784,7 @@ "multi": false, "name": "osd_hosts", "options": [ ], - "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)", + "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, hostname)", "refresh": 1, "regex": "([^.]*).*", "sort": 1, @@ -824,7 +804,7 @@ "multi": false, "name": "mon_hosts", "options": [ ], - "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_mon_metadata{cluster=~\"$cluster\", }, hostname)", "refresh": 1, "regex": "mon.(.*)", "sort": 1, @@ -844,7 +824,7 @@ "multi": false, "name": "mds_hosts", "options": [ ], - "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_mds_inodes{hostname, cluster=~\"$cluster\", })", "refresh": 1, "regex": "mds.(.*)", "sort": 1, @@ -864,7 +844,7 @@ "multi": false, "name": "rgw_hosts", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{hostname, cluster=~\"$cluster\", })", "refresh": 1, "regex": "rgw.(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json index ff8bcdd0254..70fbcf92bf1 100644 --- a/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json @@ -45,7 +45,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -163,7 +163,7 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "expr": "count(ceph_health_status==0) or vector(0)", "format": "time_series", "intervalFactor": 1, @@ -171,7 +171,7 @@ "refId": "A" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "expr": "count(ceph_health_status==1)", "format": "time_series", "intervalFactor": 1, @@ -179,7 +179,7 @@ "refId": "B" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "expr": "count(ceph_health_status==2)", "format": "time_series", "intervalFactor": 1, @@ -193,7 +193,7 @@ }, { "columns": [ ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -352,7 +352,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "ceph_health_status", @@ -368,7 +368,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "ceph_mgr_metadata", @@ -384,10 +384,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"})", + "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$cluster\"})", "format": "table", "hide": false, "instant": true, @@ -400,7 +400,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ", @@ -416,7 +416,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ", @@ -432,7 +432,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "ceph_cluster_by_class_total_used_bytes", @@ -550,7 +550,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -600,10 +600,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(ceph_health_status{cluster=~\"$Cluster\"}) or vector(0)", + "expr": "count(ceph_health_status{cluster=~\"$cluster\"}) or vector(0)", "format": "table", "hide": false, "instant": true, @@ -619,7 +619,7 @@ "type": "stat" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -671,8 +671,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"}) / sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})", + "datasource": "$datasource", + "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$cluster\"}) / sum(ceph_cluster_total_bytes{cluster=~\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -686,7 +686,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -732,10 +732,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})", + "expr": "sum(ceph_cluster_total_bytes{cluster=~\"$cluster\"})", "format": "table", "hide": false, "instant": false, @@ -752,7 +752,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -798,10 +798,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(ceph_osd_metadata{cluster=~\"$Cluster\"})", + "expr": "count(ceph_osd_metadata{cluster=~\"$cluster\"})", "format": "table", "hide": false, "instant": false, @@ -818,7 +818,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -864,10 +864,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$Cluster\"}))", + "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$cluster\"}))", "format": "table", "hide": false, "instant": false, @@ -884,7 +884,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -930,10 +930,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(irate(ceph_pool_wr{cluster=~\"$Cluster\"}[$__interval]))", + "expr": "sum(irate(ceph_pool_wr{cluster=~\"$cluster\"}[$__interval]))", "format": "time_series", "hide": false, "instant": false, @@ -945,10 +945,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(irate(ceph_pool_rd{cluster=~\"$Cluster\"}[$__interval]))", + "expr": "sum(irate(ceph_pool_rd{cluster=~\"$cluster\"}[$__interval]))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -963,7 +963,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1009,10 +1009,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$Cluster\"})", + "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1024,10 +1024,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$Cluster\"})", + "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$cluster\"})", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -1042,7 +1042,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1092,10 +1092,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}) or vector(0)", + "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$cluster\"}) or vector(0)", "format": "table", "hide": false, "instant": true, @@ -1112,7 +1112,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1158,10 +1158,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"})", + "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$cluster\"})", "format": "table", "hide": false, "instant": false, @@ -1178,7 +1178,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1224,7 +1224,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, "expr": "predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~\"${Cluster}\"}[1d]))[7d:1h],120)", @@ -1242,7 +1242,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1288,10 +1288,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "count(ceph_pool_metadata{cluster=~\"$Cluster\"})", + "expr": "count(ceph_pool_metadata{cluster=~\"$cluster\"})", "format": "table", "hide": false, "instant": false, @@ -1308,7 +1308,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1354,10 +1354,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(irate(ceph_pool_rd_bytes{cluster=~\"$Cluster\"}[$__interval]))", + "expr": "sum(irate(ceph_pool_rd_bytes{cluster=~\"$cluster\"}[$__interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1369,10 +1369,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(irate(ceph_pool_wr_bytes{cluster=~\"$Cluster\"}[$__interval]))", + "expr": "sum(irate(ceph_pool_wr_bytes{cluster=~\"$cluster\"}[$__interval]))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -1387,7 +1387,7 @@ }, { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1433,10 +1433,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$Cluster\"}[$__interval]))", + "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$cluster\"}[$__interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1463,7 +1463,7 @@ "panels": [ { "colors": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -1557,8 +1557,8 @@ "pluginVersion": "9.4.7", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\", cluster=~\"$Cluster\"}) OR vector(0)", + "datasource": "$datasource", + "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\", cluster=~\"$cluster\"}) OR vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1567,8 +1567,8 @@ "refId": "A" }, { - "datasource": "${DS_PROMETHEUS}", - "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\", cluster=~\"$Cluster\"}) OR vector(0)", + "datasource": "$datasource", + "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\", cluster=~\"$cluster\"}) OR vector(0)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1583,7 +1583,7 @@ }, { "columns": [ ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -1642,10 +1642,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "$datasource" }, "exemplar": false, - "expr": "ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}", + "expr": "ALERTS{alertstate=\"firing\", cluster=~\"$cluster\", }}", "format": "table", "hide": false, "instant": true, @@ -1760,7 +1760,7 @@ "id": 23, "panels": [ { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1835,7 +1835,7 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "expr": "topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)", "format": "time_series", "instant": false, @@ -1850,7 +1850,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -1925,7 +1925,7 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "expr": "topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )", "format": "time_series", "instant": false, @@ -1940,7 +1940,7 @@ "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -2015,8 +2015,8 @@ "pluginVersion": "9.1.3", "targets": [ { - "datasource": "${DS_PROMETHEUS}", - "expr": "topk(10, ceph_pool_bytes_used{cluster=~\"$Cluster\"}/ceph_pool_max_avail{cluster=~\"$Cluster\"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~\"$Cluster\"})", + "datasource": "$datasource", + "expr": "topk(10, ceph_pool_bytes_used{cluster=~\"$cluster\", }}/ceph_pool_max_avail{cluster=~\"$cluster\", }} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~\"$cluster\", }})", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2055,7 +2055,7 @@ }, "hide": 0, "label": "Data Source", - "name": "DS_PROMETHEUS", + "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, @@ -2063,22 +2063,19 @@ "type": "datasource" }, { - "allValue": null, - "current": { - "text": "All", - "value": "All" - }, - "datasource": "$DS_PROMETHEUS", + "allValue": ".*", + "current": { }, + "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": null, - "multi": false, - "name": "Cluster", + "label": "cluster", + "multi": true, + "name": "cluster", "options": [ ], "query": "label_values(ceph_health_status, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, + "refresh": 1, + "regex": "(.*)", + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", diff --git a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json index 359f5522cd0..60f1ecc5ad0 100644 --- a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json +++ b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json @@ -113,14 +113,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "expr": "rate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "read", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "expr": "rate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "write", @@ -220,14 +220,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_r{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_w{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -327,14 +327,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read Bytes", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write Bytes", @@ -453,14 +453,14 @@ "steppedLine": false, "targets": [ { - "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", + "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total[$__rate_interval]) /\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Reads", "refId": "A" }, { - "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", + "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total[$__rate_interval]) /\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Writes", @@ -560,14 +560,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Writes", "refId": "A" }, { - "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Reads", @@ -667,14 +667,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_read_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Reads", "refId": "A" }, { - "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_written_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Writes", @@ -769,7 +769,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_io_time_seconds_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}}", @@ -837,36 +837,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -886,7 +866,7 @@ "multi": false, "name": "osd", "options": [ ], - "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json index b64307d32bc..948f0d721b6 100644 --- a/monitoring/ceph-mixin/dashboards_out/osds-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -103,21 +103,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "AVG read", "refId": "A" }, { - "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "expr": "max(\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX read", "refId": "B" }, { - "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n", + "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile", @@ -263,7 +263,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n", + "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n )\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -350,21 +350,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n", + "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "AVG write", "refId": "A" }, { - "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n", + "expr": "max(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX write", "refId": "B" }, { - "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n", + "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile write", @@ -545,7 +545,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n", + "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000)\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -627,7 +627,7 @@ }, "targets": [ { - "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})", + "expr": "count by (device_class) (ceph_osd_metadata{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device_class}}", @@ -686,14 +686,14 @@ }, "targets": [ { - "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", + "expr": "count(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 2, "legendFormat": "bluestore", "refId": "A" }, { - "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})", + "expr": "absent(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", }) * count(ceph_osd_metadata{cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 2, "legendFormat": "filestore", @@ -752,63 +752,63 @@ }, "targets": [ { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } < 1099511627776)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<1TB", "refId": "A" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 1099511627776 < 2199023255552)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<2TB", "refId": "B" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 2199023255552 < 3298534883328)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<3TB", "refId": "C" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 3298534883328 < 4398046511104)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<4TB", "refId": "D" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 4398046511104 < 6597069766656)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<6TB", "refId": "E" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 6597069766656 < 8796093022208)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<8TB", "refId": "F" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 8796093022208 < 10995116277760)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<10TB", "refId": "G" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 10995116277760 < 13194139533312)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<12TB", "refId": "H" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)", + "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 13194139533312)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<12TB+", @@ -869,7 +869,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_osd_numpg{job=~\"$job\"}", + "expr": "ceph_osd_numpg{cluster=~\"$cluster\", }", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -975,7 +975,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n", + "expr": "sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) / (\n sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) +\n sum(ceph_bluestore_onode_misses{cluster=~\"$cluster\", })\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -1066,14 +1066,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -1282,36 +1282,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/monitoring/ceph-mixin/dashboards_out/pool-detail.json index b43e38ccb4a..5e5bf6e9b8b 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-detail.json @@ -104,7 +104,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "(ceph_pool_stored{cluster=~\"$cluster\", } / (ceph_pool_stored{cluster=~\"$cluster\", } + ceph_pool_max_avail{cluster=~\"$cluster\", })) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -186,7 +186,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n", + "expr": "(ceph_pool_max_avail{cluster=~\"$cluster\", } / deriv(ceph_pool_stored{cluster=~\"$cluster\", }[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", } > 0\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -261,7 +261,7 @@ "steppedLine": false, "targets": [ { - "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "deriv(ceph_pool_objects{cluster=~\"$cluster\", }[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Objects per second", @@ -364,14 +364,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -474,14 +474,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -579,7 +579,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "expr": "ceph_pool_objects{cluster=~\"$cluster\", } *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Number of Objects", @@ -647,36 +647,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -696,7 +676,7 @@ "multi": false, "name": "pool_name", "options": [ ], - "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)", + "query": "label_values(ceph_pool_metadata{cluster=~\"$cluster\", }, name)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json index a819ca53793..fa32b3368cf 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -85,7 +85,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(ceph_pool_metadata{job=~\"$job\"})", + "expr": "count(ceph_pool_metadata{cluster=~\"$cluster\", })", "format": "table", "instant": true, "intervalFactor": 1, @@ -168,7 +168,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})", + "expr": "count(ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", })", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -250,7 +250,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})", + "expr": "sum(ceph_osd_stat_bytes{cluster=~\"$cluster\", })", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -332,7 +332,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})", + "expr": "sum(ceph_pool_bytes_used{cluster=~\"$cluster\", })", "format": "", "instant": true, "intervalFactor": 1, @@ -415,7 +415,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_pool_stored{job=~\"$job\"})", + "expr": "sum(ceph_pool_stored{cluster=~\"$cluster\", })", "format": "", "instant": true, "intervalFactor": 1, @@ -498,7 +498,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n", + "expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", }\n)\n", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -580,7 +580,7 @@ "tableColumn": "", "targets": [ { - "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n", + "expr": "(\n sum(ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n sum(ceph_pool_stored_raw{cluster=~\"$cluster\", } and ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n) * 100\n", "format": "table", "intervalFactor": 1, "legendFormat": "", @@ -662,7 +662,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n", + "expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n / sum(ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n)\n", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -1161,7 +1161,7 @@ "styles": "", "targets": [ { - "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n", + "expr": "(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } /\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n ) * 100 > 0.5\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1169,7 +1169,7 @@ "refId": "A" }, { - "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "expr": "ceph_pool_max_avail{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1177,7 +1177,7 @@ "refId": "B" }, { - "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n", + "expr": "(\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n) * 100\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1185,7 +1185,7 @@ "refId": "C" }, { - "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "expr": "ceph_pool_percent_used{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1193,7 +1193,7 @@ "refId": "D" }, { - "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n", + "expr": "ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1201,7 +1201,7 @@ "refId": "E" }, { - "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])", + "expr": "delta(ceph_pool_stored{cluster=~\"$cluster\", }[5d])", "format": "table", "instant": true, "intervalFactor": 1, @@ -1209,7 +1209,7 @@ "refId": "F" }, { - "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n", + "expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])\n + rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1217,7 +1217,7 @@ "refId": "G" }, { - "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n", + "expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1225,7 +1225,7 @@ "refId": "H" }, { - "expr": "ceph_pool_metadata{job=~\"$job\"}", + "expr": "ceph_pool_metadata{cluster=~\"$cluster\", }", "format": "table", "instant": true, "intervalFactor": 1, @@ -1233,7 +1233,7 @@ "refId": "I" }, { - "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}", + "expr": "ceph_pool_stored{cluster=~\"$cluster\", } * on(pool_id) group_left ceph_pool_metadata{cluster=~\"$cluster\", }", "format": "table", "instant": true, "intervalFactor": 1, @@ -1241,7 +1241,7 @@ "refId": "J" }, { - "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}", + "expr": "ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", }", "format": "table", "instant": true, "intervalFactor": 1, @@ -1348,14 +1348,14 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n", + "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", })\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} ", "refId": "A" }, { - "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "expr": "topk($topk,\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} - write", @@ -1450,7 +1450,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -1545,7 +1545,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}", + "expr": "ceph_pool_bytes_used{cluster=~\"$cluster\", } * on(pool_id) group_right ceph_pool_metadata{cluster=~\"$cluster\", }", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -1613,36 +1613,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json index 1e41040c9ad..35de6b09b75 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json @@ -114,14 +114,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET {{ceph_daemon}}", "refId": "A" }, { - "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT {{ceph_daemon}}", @@ -216,14 +216,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", @@ -324,28 +324,28 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests Failed {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other {{ceph_daemon}}", @@ -517,28 +517,28 @@ }, "targets": [ { - "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Failures {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", @@ -574,36 +574,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -623,7 +603,7 @@ "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json index 3cb8a7c76ac..5e185b63b7f 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json @@ -108,14 +108,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET {{rgw_host}}", "refId": "A" }, { - "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT {{rgw_host}}", @@ -210,7 +210,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", + "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -305,7 +305,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -400,14 +400,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs", "refId": "A" }, { - "expr": "sum(rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs", @@ -502,7 +502,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -597,7 +597,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -1199,36 +1199,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -1248,9 +1228,9 @@ "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 1, - "regex": "RGW Server", + "regex": ".*", "sort": 1, "tagValuesQuery": "", "tags": [ ], diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json index 09227b95830..94dcaa4325e 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json @@ -89,7 +89,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -184,7 +184,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -279,7 +279,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -374,7 +374,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{cluster=~\"$cluster\", }[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -443,36 +443,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -488,13 +468,13 @@ "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "", + "label": null, "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 1, - "regex": "RGW Server", + "regex": "rgw.(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-details.json b/monitoring/ceph-mixin/dashboards_out/rbd-details.json index 36cd346a50c..500c51f4ba9 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-details.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-details.json @@ -89,14 +89,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_write_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_read_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -191,14 +191,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_write_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_read_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -293,14 +293,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "expr": "rate(ceph_rbd_write_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "expr": "rate(ceph_rbd_read_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -368,36 +368,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -417,7 +397,7 @@ "multi": false, "name": "pool", "options": [ ], - "query": "label_values(pool)", + "query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", }, pool)", "refresh": 1, "regex": "", "sort": 0, @@ -437,7 +417,7 @@ "multi": false, "name": "image", "options": [ ], - "query": "label_values(image)", + "query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", , pool=\"$pool\"}, image)", "refresh": 1, "regex": "", "sort": 0, diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json index dcef22ad37b..34666c67b29 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json @@ -101,14 +101,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", "refId": "A" }, { - "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", @@ -203,14 +203,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", "refId": "A" }, { - "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", @@ -305,14 +305,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", "refId": "A" }, { - "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", @@ -483,7 +483,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n", + "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])\n ))\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -632,7 +632,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", + "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -781,7 +781,7 @@ "styles": "", "targets": [ { - "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", + "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -828,36 +828,16 @@ "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, - "includeAll": true, + "hide": 0, + "includeAll": false, "label": "cluster", - "multi": true, + "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(ceph_osd_metadata, cluster)", - "refresh": 1, - "regex": "(.*)", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(ceph_osd_metadata{}, job)", + "query": "label_values(ceph_health_status, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/rgw-s3-analytics.json b/monitoring/ceph-mixin/dashboards_out/rgw-s3-analytics.json index 6a41df1cfaa..397279f5455 100644 --- a/monitoring/ceph-mixin/dashboards_out/rgw-s3-analytics.json +++ b/monitoring/ceph-mixin/dashboards_out/rgw-s3-analytics.json @@ -94,7 +94,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "__auto", @@ -157,7 +157,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "__auto", @@ -220,7 +220,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "__auto", @@ -283,7 +283,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "__auto", @@ -333,7 +333,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -343,7 +343,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "List Buckets", @@ -352,7 +352,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -362,7 +362,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -372,7 +372,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -382,7 +382,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -392,7 +392,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "Copy Objects", @@ -455,7 +455,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -465,7 +465,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -475,7 +475,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -485,7 +485,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "Copy Objects", @@ -548,7 +548,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -558,7 +558,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "List Bucket", @@ -567,7 +567,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -577,7 +577,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -587,7 +587,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -597,7 +597,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -607,7 +607,7 @@ }, { "datasource": "${datasource}", - "expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "intervalFactor": 1, "legendFormat": "Copy Object", @@ -692,7 +692,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -708,7 +708,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -724,7 +724,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -740,7 +740,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -938,7 +938,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -954,7 +954,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -970,7 +970,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -986,7 +986,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1002,7 +1002,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1212,7 +1212,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1228,7 +1228,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1244,7 +1244,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1260,7 +1260,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1460,7 +1460,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1476,7 +1476,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1492,7 +1492,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1508,7 +1508,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1524,7 +1524,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }", "format": "table", "hide": false, "instant": true, @@ -1727,7 +1727,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1795,7 +1795,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1842,7 +1842,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1889,7 +1889,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -1996,7 +1996,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2104,7 +2104,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2212,7 +2212,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2320,7 +2320,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2428,7 +2428,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2536,7 +2536,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2644,7 +2644,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2752,7 +2752,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2860,7 +2860,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2946,7 +2946,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -2962,7 +2962,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -2978,7 +2978,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -2994,7 +2994,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3010,7 +3010,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -3222,7 +3222,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n", + "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3290,7 +3290,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n", + "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3337,7 +3337,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3384,7 +3384,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)", + "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3491,7 +3491,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -3599,7 +3599,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -3707,7 +3707,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -3815,7 +3815,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -3923,7 +3923,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -4031,7 +4031,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -4139,7 +4139,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -4247,7 +4247,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -4355,7 +4355,7 @@ "targets": [ { "datasource": "${datasource}", - "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -4441,7 +4441,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -4457,7 +4457,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -4473,7 +4473,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -4489,7 +4489,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -4505,7 +4505,7 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})", + "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })", "format": "table", "hide": false, "instant": true, @@ -4635,12 +4635,32 @@ "current": { }, "datasource": "$datasource", "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_health_status, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)", "refresh": 2, "regex": "", "sort": 0, diff --git a/monitoring/ceph-mixin/tests_dashboards/__init__.py b/monitoring/ceph-mixin/tests_dashboards/__init__.py index 45147e5c324..ea41d01be6b 100644 --- a/monitoring/ceph-mixin/tests_dashboards/__init__.py +++ b/monitoring/ceph-mixin/tests_dashboards/__init__.py @@ -49,17 +49,17 @@ class PromqlTest: The workflow of testing would be something like: # add prometheus query to test - self.set_expression('bonding_slaves > 0') + self.set_expression('node_bonding_slaves > 0') # add some prometheus input series - self.add_series('bonding_slaves{master="bond0"}', '2') - self.add_series('bonding_slaves{master="bond1"}', '3') + self.add_series('node_bonding_slaves{master="bond0"}', '2') + self.add_series('node_bonding_slaves{master="bond1"}', '3') self.add_series('node_network_receive_bytes{instance="127.0.0.1", device="eth1"}', "10 100 230 22") # expected output of the query - self.add_exp_samples('bonding_slaves{master="bond0"}', 2) - self.add_exp_samples('bonding_slaves{master="bond1"}', 3) + self.add_exp_samples('node_bonding_slaves{master="bond0"}', 2) + self.add_exp_samples('node_bonding_slaves{master="bond1"}', 3) # at last, always call promtool with: self.assertTrue(self.run_promtool()) @@ -150,10 +150,10 @@ class PromqlTest: '$osd_hosts', you should change this to a real value. Example: - > self.set_expression('bonding_slaves{master="$osd_hosts"} > 0') + > self.set_expression('node_bonding_slaves{master="$osd_hosts"} > 0') > self.set_variable('osd_hosts', '127.0.0.1') > print(self.query) - > bonding_slaves{master="127.0.0.1"} > 0 + > node_bonding_slaves{master="127.0.0.1"} > 0 Args: variable(str): Variable name diff --git a/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature index c00107ec7ae..f39cb4be59e 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature @@ -3,16 +3,17 @@ Feature: Ceph Cluster Dashboard Scenario: "Test cluster health" Given the following series: | metrics | values | - | ceph_health_status{} | 1.0 | - Then Grafana panel `Health Status` with legend `EMPTY` shows: + | ceph_health_status{job="ceph",cluster="mycluster"} | 1 | + Then Grafana panel `Ceph health status` with legend `EMPTY` shows: | metrics | values | - | ceph_health_status{} | 1.0 | + | ceph_health_status{job="ceph",cluster="mycluster"} | 1 | Scenario: "Test Firing Alerts Warning" Given the following series: | metrics | values | - | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 | - | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="critical", cluster="someothercluster"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 | Then Grafana panel `Firing Alerts` with legend `Warning` shows: | metrics | values | | {} | 1 | @@ -20,8 +21,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Firing Alerts Critical" Given the following series: | metrics | values | - | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 | - | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="someothercluster"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 | Then Grafana panel `Firing Alerts` with legend `Critical` shows: | metrics | values | | {} | 1 | @@ -29,33 +31,33 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Available Capacity" Given the following series: | metrics | values | - | ceph_cluster_total_bytes{}| 100 | - | ceph_cluster_total_used_bytes{}| 70 | + | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 | + | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 70 | Then Grafana panel `Available Capacity` with legend `EMPTY` shows: | metrics | values | - | {} | 0.3 | + | {job="ceph",cluster="mycluster"} | 0.3 | Scenario: "Test Cluster Capacity" Given the following series: | metrics | values | - | ceph_cluster_total_bytes{}| 100 | + | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 | Then Grafana panel `Cluster Capacity` with legend `EMPTY` shows: | metrics | values | - | ceph_cluster_total_bytes{} | 100 | + | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"} | 100 | Scenario: "Test Used Capacity" Given the following series: | metrics | values | - | ceph_cluster_total_used_bytes{}| 100 | + | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 100 | Then Grafana panel `Used Capacity` with legend `EMPTY` shows: | metrics | values | - | ceph_cluster_total_used_bytes{} | 100 | + | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"} | 100 | Scenario: "Test Write Throughput" Given the following series: | metrics | values | - | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 | - | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 | + | ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 | Then Grafana panel `Write Throughput` with legend `EMPTY` shows: | metrics | values | | {} | 2 | @@ -63,8 +65,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Write IOPS" Given the following series: | metrics | values | - | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 500 500 500 | - | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 500 120 110 | + | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 500 120 110 | Then Grafana panel `Write IOPS` with legend `EMPTY` shows: | metrics | values | | {} | 2 | @@ -72,8 +74,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Read Throughput" Given the following series: | metrics | values | - | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 | - | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 | + | ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 | Then Grafana panel `Read Throughput` with legend `EMPTY` shows: | metrics | values | | {} | 2 | @@ -81,8 +83,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Read IOPS" Given the following series: | metrics | values | - | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 500 500 500 | - | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 500 120 110 | + | ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 | Then Grafana panel `Read IOPS` with legend `EMPTY` shows: | metrics | values | | {} | 2 | @@ -90,9 +92,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test OSDs All" Given the following series: | metrics | values | - | ceph_osd_metadata{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_osd_metadata{cluster="mycluster", osd="osd.2"} | 1 | - | ceph_osd_metadata{cluster="mycluster", osd="osd.3"} | 1 | + | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.2"} | 1 | + | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.3"} | 1 | Then Grafana panel `OSDs` with legend `All` shows: | metrics | values | | {} | 3 | @@ -100,9 +102,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test OSDs In" Given the following series: | metrics | values | - | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 1 | - | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 1 | + | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 1 | + | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 1 | Then Grafana panel `OSDs` with legend `In` shows: | metrics | values | | {} | 3 | @@ -110,9 +112,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test OSDs Out" Given the following series: | metrics | values | - | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_osd_in{cjob="ceph", cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `OSDs` with legend `Out` shows: | metrics | values | | {} | 2 | @@ -120,9 +122,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test OSDs Up" Given the following series: | metrics | values | - | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `OSDs` with legend `Up` shows: | metrics | values | | {} | 1 | @@ -130,9 +132,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test OSDs Down" Given the following series: | metrics | values | - | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `OSDs` with legend `Down` shows: | metrics | values | | {} | 2 | @@ -140,9 +142,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test MGRs Standby" Given the following series: | metrics | values | - | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_mgr_status{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `MGRs` with legend `Standby` shows: | metrics | values | | {} | 2 | @@ -150,8 +152,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test MGRs Active" Given the following series: | metrics | values | - | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 | Then Grafana panel `MGRs` with legend `Active` shows: | metrics | values | | {} | 1 | @@ -159,9 +161,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Monitors Total" Given the following series: | metrics | values | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `Monitors` with legend `Total` shows: | metrics | values | | {} | 3 | @@ -169,9 +171,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Monitors In Quorum" Given the following series: | metrics | values | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `Monitors` with legend `In Quorum` shows: | metrics | values | | {} | 1 | @@ -179,9 +181,9 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Monitors out of Quorum" Given the following series: | metrics | values | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | - | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 | Then Grafana panel `Monitors` with legend `MONs out of Quorum` shows: | metrics | values | | {} | 2 | @@ -189,24 +191,24 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Total Capacity" Given the following series: | metrics | values | - | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 | + | ceph_cluster_total_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 | Then Grafana panel `Capacity` with legend `Total Capacity` shows: | metrics | values | - | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 | + | ceph_cluster_total_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 100 | Scenario: "Test Used Capacity" Given the following series: | metrics | values | - | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 | + | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 | Then Grafana panel `Capacity` with legend `Used` shows: | metrics | values | - | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 | + | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 | Scenario: "Test Cluster Throughput Write" Given the following series: | metrics | values | - | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000| - | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 | + | ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 | Then Grafana panel `Cluster Throughput` with legend `Write` shows: | metrics | values | | {} | 25 | @@ -214,8 +216,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Cluster Throughput Read" Given the following series: | metrics | values | - | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000| - | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 | + | ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 | Then Grafana panel `Cluster Throughput` with legend `Read` shows: | metrics | values | | {} | 25 | @@ -223,8 +225,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test IOPS Read" Given the following series: | metrics | values | - | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 1000 1000| - | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 2000 1500 | + | ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 | Then Grafana panel `IOPS` with legend `Read` shows: | metrics | values | | {} | 25 | @@ -232,8 +234,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test IOPS Write" Given the following series: | metrics | values | - | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 1000 1000| - | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 2000 1500 | + | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 | Then Grafana panel `IOPS` with legend `Write` shows: | metrics | values | | {} | 25 | @@ -241,83 +243,83 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Pool Used Bytes" Given the following series: | metrics | values | - | ceph_pool_bytes_used{cluster="mycluster", pool_id="1"} | 10000 | - | ceph_pool_bytes_used{cluster="mycluster", pool_id="2"} | 20000 | - | ceph_pool_bytes_used{cluster="mycluster", pool_id="3"} | 30000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 | + | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="1"} | 10000 | + | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="2"} | 20000 | + | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="3"} | 30000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 | Then Grafana panel `Pool Used Bytes` with legend `{{name}}` shows: | metrics | values | - | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | - | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | - | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | + | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | + | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | + | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | Scenario: "Test Pool Used RAW Bytes" Given the following series: | metrics | values | - | ceph_pool_stored_raw{cluster="mycluster", pool_id="1"} | 10000 | - | ceph_pool_stored_raw{cluster="mycluster", pool_id="2"} | 20000 | - | ceph_pool_stored_raw{cluster="mycluster", pool_id="3"} | 30000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 | - | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 | + | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="1"} | 10000 | + | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="2"} | 20000 | + | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="3"} | 30000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 | Then Grafana panel `Pool Used RAW Bytes` with legend `{{name}}` shows: | metrics | values | - | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | - | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | - | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | + | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | + | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | + | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | Scenario: "Test Pool Objects Quota" Given the following series: | metrics | values | - | ceph_pool_quota_objects{cluster="mycluster", pool_id="1"} | 10 | - | ceph_pool_quota_objects{cluster="mycluster", pool_id="2"} | 20 | - | ceph_pool_quota_objects{cluster="mycluster", pool_id="3"} | 30 | - | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 10 | - | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 15 | - | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 15 | + | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="1"} | 10 | + | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="2"} | 20 | + | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="3"} | 30 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 10 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 15 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 15 | Then Grafana panel `Pool Objects Quota` with legend `{{name}}` shows: | metrics | values | - | {cluster="mycluster", name="pool1", pool_id="1"} | 100 | - | {cluster="mycluster", name="pool2", pool_id="2"} | 300 | - | {cluster="mycluster", name="pool3", pool_id="3"} | 450| + | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 100 | + | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 300 | + | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 450| Scenario: "Test Pool Quota Bytes" Given the following series: | metrics | values | - | ceph_pool_quota_bytes{cluster="mycluster", pool_id="1"} | 100 | - | ceph_pool_quota_bytes{cluster="mycluster", pool_id="2"} | 200 | - | ceph_pool_quota_bytes{cluster="mycluster", pool_id="3"} | 300 | - | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 | - | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 | - | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 | + | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="1"} | 100 | + | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="2"} | 200 | + | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="3"} | 300 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 | Then Grafana panel `Pool Quota Bytes` with legend `{{name}}` shows: | metrics | values | - | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 | - | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 | - | {cluster="mycluster", name="pool3", pool_id="3"} | 45000 | + | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 | + | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 | + | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000 | Scenario: "Test Objects Per Pool" Given the following series: | metrics | values | - | ceph_pool_objects{cluster="mycluster", pool_id="1"} | 100 | - | ceph_pool_objects{cluster="mycluster", pool_id="2"} | 200 | - | ceph_pool_objects{cluster="mycluster", pool_id="3"} | 300 | - | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 | - | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 | - | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 | + | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="1"} | 100 | + | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="2"} | 200 | + | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="3"} | 300 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 | + | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 | Then Grafana panel `Objects Per Pool` with legend `{{name}}` shows: | metrics | values | - | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 | - | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 | - | {cluster="mycluster", name="pool3", pool_id="3"} | 45000| + | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 | + | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 | + | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000| Scenario: "Test OSD Type Count" Given the following series: | metrics | values | - | ceph_pool_objects{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pool_objects{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `OSD Type Count` with legend `Total` shows: | metrics | values | | {} | 30 | @@ -325,8 +327,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Backfill Toofull" Given the following series: | metrics | values | - | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Backfill Toofull` shows: | metrics | values | | {} | 30 | @@ -334,8 +336,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Remapped" Given the following series: | metrics | values | - | ceph_pg_remapped{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_remapped{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Remapped` shows: | metrics | values | | {} | 30 | @@ -343,8 +345,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Backfill" Given the following series: | metrics | values | - | ceph_pg_backfill{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_backfill{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Backfill` shows: | metrics | values | | {} | 30 | @@ -352,8 +354,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Peered" Given the following series: | metrics | values | - | ceph_pg_peered{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_peered{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Peered` shows: | metrics | values | | {} | 30 | @@ -361,8 +363,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Down" Given the following series: | metrics | values | - | ceph_pg_down{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_down{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Down` shows: | metrics | values | | {} | 30 | @@ -370,8 +372,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Repair" Given the following series: | metrics | values | - | ceph_pg_repair{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_repair{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Repair` shows: | metrics | values | | {} | 30 | @@ -379,8 +381,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Recovering" Given the following series: | metrics | values | - | ceph_pg_recovering{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_recovering{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Recovering` shows: | metrics | values | | {} | 30 | @@ -388,8 +390,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Deep" Given the following series: | metrics | values | - | ceph_pg_deep{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_deep{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Deep` shows: | metrics | values | | {} | 30 | @@ -397,8 +399,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Wait Backfill" Given the following series: | metrics | values | - | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Wait Backfill` shows: | metrics | values | | {} | 30 | @@ -406,8 +408,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Creating" Given the following series: | metrics | values | - | ceph_pg_creating{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_creating{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Creating` shows: | metrics | values | | {} | 30 | @@ -415,8 +417,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Forced Recovery" Given the following series: | metrics | values | - | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Forced Recovery` shows: | metrics | values | | {} | 30 | @@ -424,8 +426,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Forced Backfill" Given the following series: | metrics | values | - | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Forced Backfill` shows: | metrics | values | | {} | 30 | @@ -433,8 +435,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Incomplete" Given the following series: | metrics | values | - | ceph_pg_incomplete{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_incomplete{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Incomplete` shows: | metrics | values | | {} | 30 | @@ -442,8 +444,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test PGs State Undersized" Given the following series: | metrics | values | - | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `PGs State` with legend `Undersized` shows: | metrics | values | | {} | 30 | @@ -451,8 +453,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Stuck PGs Undersized" Given the following series: | metrics | values | - | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `Stuck PGs` with legend `Undersized` shows: | metrics | values | | {} | 30 | @@ -460,8 +462,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Stuck PGs Stale" Given the following series: | metrics | values | - | ceph_pg_stale{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_stale{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `Stuck PGs` with legend `Stale` shows: | metrics | values | | {} | 30 | @@ -469,8 +471,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Stuck PGs Degraded" Given the following series: | metrics | values | - | ceph_pg_degraded{cluster="mycluster", osd="osd.0"} | 10 | - | ceph_pg_degraded{cluster="mycluster", osd="osd.1"} | 20 | + | ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.1"} | 20 | Then Grafana panel `Stuck PGs` with legend `Degraded` shows: | metrics | values | | {} | 30 | @@ -478,8 +480,8 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Recovery Operations" Given the following series: | metrics | values | - | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.0"}| 250 200 | - | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.1"} | 800 100 | + | ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.0"}| 250 200 | + | ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.1"} | 800 100 | When variable `interval` is `120s` Then Grafana panel `Recovery Operations` with legend `OPS` shows: | metrics | values | @@ -488,39 +490,49 @@ Feature: Ceph Cluster Dashboard Scenario: "Test Ceph Versions OSD" Given the following series: | metrics | values | - | ceph_osd_metadata{cluster="mycluster", osd="osd.0"}| 17 | + | ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="osd.0", device_class="ssd"} | 1 | + | ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="osd.1", device_class="hdd"} | 1 | Then Grafana panel `Ceph Versions` with legend `OSD Services` shows: | metrics | values | - | {} | 1 | + | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 | + | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 | Scenario: "Test Ceph Versions Mon" Given the following series: | metrics | values | - | ceph_mon_metadata{cluster="mycluster", osd="osd.0"}| 17 | + | ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="somehostname"}| 1 | + | ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="someotherhostname"}| 1 | Then Grafana panel `Ceph Versions` with legend `Mon Services` shows: | metrics | values | - | {} | 1 | + | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 | + | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 | Scenario: "Test Ceph Versions MDS" Given the following series: | metrics | values | - | ceph_mds_metadata{cluster="mycluster", osd="osd.0"}| 17 | + | ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="someotherhostname", ceph_daemon="mds.someotherhostname",fs_id="1"}| 1 | + | ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="somehostname", ceph_daemon="mds.somehostname",fs_id="1"}| 1 | Then Grafana panel `Ceph Versions` with legend `MDS Services` shows: | metrics | values | - | {} | 1 | + | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 | + | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 | Scenario: "Test Ceph Versions RGW" Given the following series: | metrics | values | - | ceph_rgw_metadata{cluster="mycluster", osd="osd.0"}| 17 | + | ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="rgw.somehostname", hostname="somehostname"}| 1 | + | ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="rgw.someotherhostname", hostname="someotherhostname"}| 1 | Then Grafana panel `Ceph Versions` with legend `RGW Services` shows: | metrics | values | - | {} | 1 | + | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 | + | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 | Scenario: "Test Ceph Versions MGR" Given the following series: | metrics | values | - | ceph_mgr_metadata{cluster="mycluster", osd="osd.0"}| 17 | + | ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="mgr.somehostname", hostname="somehostname"}| 1 | + | ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="mgr.someotherhostname", hostname="someotherhostname"}| 1 | Then Grafana panel `Ceph Versions` with legend `MGR Services` shows: | metrics | values | - | {} | 1 |
\ No newline at end of file + | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 | + | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/environment.py b/monitoring/ceph-mixin/tests_dashboards/features/environment.py index aa040ef9411..921474015c8 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/environment.py +++ b/monitoring/ceph-mixin/tests_dashboards/features/environment.py @@ -113,6 +113,7 @@ def step_impl(context, panel_name, legend): legend = '' query_id = panel_name + '-' + legend if query_id not in global_context.query_map: + print(f"QueryMap: {global_context.query_map}") raise KeyError((f'Query with legend {legend} in panel "{panel_name}"' 'couldn\'t be found')) diff --git a/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature b/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature index 51e3c5819ba..e1a543dab34 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature @@ -3,9 +3,9 @@ Feature: Host Details Dashboard Scenario: "Test OSD" Given the following series: | metrics | values | - | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | - | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | - | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | + | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | + | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | + | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 | When variable `ceph_hosts` is `127.0.0.1` Then Grafana panel `OSDs` with legend `EMPTY` shows: | metrics | values | @@ -16,54 +16,54 @@ Scenario: "Test OSD" Scenario: "Test Disk IOPS - Writes - Several OSDs per device" Given the following series: | metrics | values | - | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 | Scenario: "Test Disk IOPS - Writes - Single OSD per device" Given the following series: | metrics | values | - | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 | + | {job="node", ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | + | {job="node", ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 | Scenario: "Test Disk IOPS - Reads - Several OSDs per device" Given the following series: | metrics | values | - | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 | Scenario: "Test Disk IOPS - Reads - Single OSD per device" Given the following series: | metrics | values | - | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 | # IOPS Panel - end @@ -72,44 +72,44 @@ Scenario: "Test Disk IOPS - Reads - Single OSD per device" Scenario: "Test disk throughput - read" Given the following series: | metrics | values | - | node_disk_read_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_read_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_read_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_read_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) read` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 | + | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 | Scenario: "Test disk throughput - write" Given the following series: | metrics | values | - | node_disk_written_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_written_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_written_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_written_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) write` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | - | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 | + | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 | + | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 | # Node disk bytes written/read panel - end Scenario: "Test $ceph_hosts Disk Latency panel" Given the following series: | metrics | values | - | node_disk_write_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_write_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | node_disk_read_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_read_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_write_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_write_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | node_disk_writes_completed_total{job="ndoe",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | node_disk_read_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_read_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk Latency` with legend `{{device}}({{ceph_daemon}})` shows: | metrics | values | @@ -119,13 +119,13 @@ Scenario: "Test $ceph_hosts Disk Latency panel" Scenario: "Test $ceph_hosts Disk utilization" Given the following series: | metrics | values | - | node_disk_io_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_io_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_io_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_io_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `ceph_hosts` is `localhost` Then Grafana panel `$ceph_hosts Disk utilization` with legend `{{device}}({{ceph_daemon}})` shows: | metrics | values | - | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 | - | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 | + | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 | + | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature index 6c5eceaed3f..f2945d423dd 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature @@ -3,10 +3,10 @@ Feature: Hosts Overview Dashboard Scenario: "Test network load succeeds" Given the following series: | metrics | values | - | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 | - | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 | - | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 | - | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 | + | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 | + | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 | When variable `osd_hosts` is `127.0.0.1` Then Grafana panel `Network Load` with legend `EMPTY` shows: | metrics | values | @@ -15,13 +15,13 @@ Scenario: "Test network load succeeds" Scenario: "Test network load with bonding succeeds" Given the following series: | metrics | values | - | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 | - | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 | - | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 | - | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 | - | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 | - | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 | - | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | + | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 200 | + | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 200 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 200 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 200 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="bond0"} | 20 200 300 | + | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="bond0"} | 20 200 300 | + | node_bonding_slaves{job="node",instance="127.0.0.1", master="bond0"} | 2 | When variable `osd_hosts` is `127.0.0.1` Then Grafana panel `Network Load` with legend `EMPTY` shows: | metrics | values | @@ -30,11 +30,11 @@ Scenario: "Test network load with bonding succeeds" Scenario: "Test AVG Disk Utilization" Given the following series: | metrics | values | - | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10+60x1 | - | node_disk_io_time_seconds_total{device="sdb",instance="localhost:9100"} | 10+60x1 | - | node_disk_io_time_seconds_total{device="sdc",instance="localhost:9100"} | 10 2000 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | node_disk_io_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 | + | node_disk_io_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 | + | node_disk_io_time_seconds_total{job="node",device="sdc",instance="localhost:9100"} | 10 2000 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd_hosts` is `localhost` Then Grafana panel `AVG Disk Utilization` with legend `EMPTY` shows: | metrics | values | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature b/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature index 0d6ca8b1715..f25167aaf66 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature @@ -7,8 +7,8 @@ Scenario: "Test Physical Device Latency for $osd - Reads" | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 60 | | node_disk_read_time_seconds_total{device="sda",instance="localhost"} | 100 600 | | node_disk_read_time_seconds_total{device="sdb",instance="localhost"} | 100 600 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Reads` shows: | metrics | values | @@ -21,8 +21,8 @@ Scenario: "Test Physical Device Latency for $osd - Writes" | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 60 | | node_disk_write_time_seconds_total{device="sda",instance="localhost"} | 100 600 | | node_disk_write_time_seconds_total{device="sdb",instance="localhost"} | 100 600 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Writes` shows: | metrics | values | @@ -33,8 +33,8 @@ Scenario: "Test Physical Device R/W IOPS for $osd - Writes" | metrics | values | | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 | | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows: | metrics | values | @@ -45,8 +45,8 @@ Scenario: "Test Physical Device R/W IOPS for $osd - Reads" | metrics | values | | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 | | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows: | metrics | values | @@ -57,8 +57,8 @@ Scenario: "Test Physical Device R/W Bytes for $osd - Reads" | metrics | values | | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 | | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows: | metrics | values | @@ -69,8 +69,8 @@ Scenario: "Test Physical Device R/W Bytes for $osd - Writes" | metrics | values | | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 | | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows: | metrics | values | @@ -80,8 +80,8 @@ Scenario: "Test Physical Device Util% for $osd" Given the following series: | metrics | values | | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10 100 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | - | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 | + | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 | When variable `osd` is `osd.0` Then Grafana panel `Physical Device Util% for $osd` with legend `{{device}} on {{instance}}` shows: | metrics | values | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature index 78d30641996..cb3bf876464 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature @@ -3,12 +3,12 @@ Feature: OSD Overview Scenario: "Test OSD onode Hits Ratio" Given the following series: | metrics | values | - | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 5255 | - | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 5419 | - | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 5242 | - | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 202 | - | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 247 | - | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 234 | + | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5255 | + | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5419 | + | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5242 | + | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph",cluster="mycluster"} | 202 | + | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph",cluster="mycluster"} | 247 | + | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph",cluster="mycluster"} | 234 | Then Grafana panel `OSD onode Hits Ratio` with legend `EMPTY` shows: | metrics | values | | {} | 9.588529429483704E-01 | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature index f819fc6dc41..db5fb4e9017 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature @@ -3,9 +3,9 @@ Feature: RGW Host Detail Dashboard Scenario: "Test $rgw_servers GET/PUT Latencies - GET" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 | - | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `GET {{ceph_daemon}}` shows: @@ -15,9 +15,9 @@ Scenario: "Test $rgw_servers GET/PUT Latencies - GET" Scenario: "Test $rgw_servers GET/PUT Latencies - PUT" Given the following series: | metrics | values | - | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 | - | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 | + | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `PUT {{ceph_daemon}}` shows: @@ -27,113 +27,113 @@ Scenario: "Test $rgw_servers GET/PUT Latencies - PUT" Scenario: "Test Bandwidth by HTTP Operation - GET" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 | - | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 | + | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.1` Then Grafana panel `Bandwidth by HTTP Operation` with legend `GETs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1.5 | + | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1.5 | Scenario: "Test Bandwidth by HTTP Operation - PUT" Given the following series: | metrics | values | - | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 | - | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 | + | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 | + | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.1` Then Grafana panel `Bandwidth by HTTP Operation` with legend `PUTs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 7.5E-01 | + | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 7.5E-01 | Scenario: "Test HTTP Request Breakdown - Requests Failed" Given the following series: | metrics | values | - | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 5 7 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `HTTP Request Breakdown` with legend `Requests Failed {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1E-01 | Scenario: "Test HTTP Request Breakdown - GET" Given the following series: | metrics | values | - | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `HTTP Request Breakdown` with legend `GETs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.1666666666666667 | Scenario: "Test HTTP Request Breakdown - PUT" Given the following series: | metrics | values | - | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `HTTP Request Breakdown` with legend `PUTs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.5 | Scenario: "Test HTTP Request Breakdown - Other" Given the following series: | metrics | values | - | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 | - | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 | - | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 175 250 345 | + | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 | + | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `HTTP Request Breakdown` with legend `Other {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | .16666666666666652 | Scenario: "Test Workload Breakdown - Failures" Given the following series: | metrics | values | - | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 5 7 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `Workload Breakdown` with legend `Failures {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1E-01 | Scenario: "Test Workload Breakdown - GETs" Given the following series: | metrics | values | - | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `Workload Breakdown` with legend `GETs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.1666666666666667 | Scenario: "Test Workload Breakdown - PUTs" Given the following series: | metrics | values | - | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `Workload Breakdown` with legend `PUTs {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.5 | Scenario: "Test Workload Breakdown - Other" Given the following series: | metrics | values | - | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 | - | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 | - | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 175 250 345 | + | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 | + | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` And variable `rgw_servers` is `rgw.foo` Then Grafana panel `Workload Breakdown` with legend `Other (DELETE,LIST) {{ceph_daemon}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | .16666666666666652 | diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature index 14a6e5543a4..8d96dcdd610 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature @@ -3,30 +3,30 @@ Feature: RGW Overview Dashboard Scenario: "Test Average GET Latencies" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 | - | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 | Scenario: "Test Average PUT Latencies" Given the following series: | metrics | values | - | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 | - | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 | + | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 | Scenario: "Test Total Requests/sec by RGW Instance" Given the following series: | metrics | values | - | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 | - | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 | + | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows: | metrics | values | @@ -35,18 +35,18 @@ Scenario: "Test Total Requests/sec by RGW Instance" Scenario: "Test GET Latencies by RGW Instance" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 | - | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When interval is `30s` Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 | Scenario: "Test Bandwidth Consumed by Type- GET" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 | + | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 | When evaluation time is `1m` And interval is `30s` Then Grafana panel `Bandwidth Consumed by Type` with legend `GETs` shows: @@ -56,7 +56,7 @@ Scenario: "Test Bandwidth Consumed by Type- GET" Scenario: "Test Bandwidth Consumed by Type- PUT" Given the following series: | metrics | values | - | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 | + | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 | When evaluation time is `1m` And interval is `30s` Then Grafana panel `Bandwidth Consumed by Type` with legend `PUTs` shows: @@ -66,9 +66,9 @@ Scenario: "Test Bandwidth Consumed by Type- PUT" Scenario: "Test Bandwidth by RGW Instance" Given the following series: | metrics | values | - | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 | - | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 | - | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 | + | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 | + | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 | + | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 | When evaluation time is `1m` And interval is `30s` Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows: @@ -78,14 +78,14 @@ Scenario: "Test Bandwidth by RGW Instance" Scenario: "Test PUT Latencies by RGW Instance" Given the following series: | metrics | values | - | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 | - | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 | - | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | + | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 | + | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 | + | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 | When evaluation time is `1m` And interval is `30s` Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 | Scenario: "Test Total backend responses by HTTP code" Given the following series: diff --git a/monitoring/ceph-mixin/tests_dashboards/features/self.feature b/monitoring/ceph-mixin/tests_dashboards/features/self.feature index 2b44ce0dcec..c45abf65f3b 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/self.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/self.feature @@ -9,7 +9,7 @@ Scenario: "Simple query works" | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 | | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 | | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 | - | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | + | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces: | metrics | values | | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 100 | @@ -24,7 +24,7 @@ Scenario: "Query with evaluation time" | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 | | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 | | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 | - | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | + | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | When evaluation time is `0m` Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces: | metrics | values | @@ -40,7 +40,7 @@ Scenario: "Query with evaluation time and variable value" | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 | | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 | | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 | - | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | + | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | When evaluation time is `0m` And variable `osd_hosts` is `127.0.0.1` Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces: @@ -57,7 +57,7 @@ Scenario: "Query with interval time" | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 | | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 300 | | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 300 | - | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | + | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 | When evaluation time is `2h` And evaluation interval is `1h` And interval is `1h` diff --git a/monitoring/ceph-mixin/tests_dashboards/util.py b/monitoring/ceph-mixin/tests_dashboards/util.py index c6369c5f834..734216eb94c 100644 --- a/monitoring/ceph-mixin/tests_dashboards/util.py +++ b/monitoring/ceph-mixin/tests_dashboards/util.py @@ -75,6 +75,7 @@ def add_dashboard_variables(data: Dict[str, Any], dashboard_data: Dict[str, Any] def add_default_dashboards_variables(data: Dict[str, Any]) -> None: data['variables']['job'] = 'ceph' + data['variables']['cluster'] = 'mycluster' data['variables']['job_haproxy'] = 'haproxy' data['variables']['__rate_interval'] = '1m' |