diff options
Diffstat (limited to 'monitoring')
4 files changed, 3092 insertions, 2 deletions
diff --git a/monitoring/ceph-mixin/dashboards.libsonnet b/monitoring/ceph-mixin/dashboards.libsonnet index 8cb0bcceaf7..82e1888e036 100644 --- a/monitoring/ceph-mixin/dashboards.libsonnet +++ b/monitoring/ceph-mixin/dashboards.libsonnet @@ -8,5 +8,6 @@ (import 'dashboards/rgw.libsonnet') + (import 'dashboards/ceph-cluster.libsonnet') + (import 'dashboards/rgw-s3-analytics.libsonnet') + + (import 'dashboards/multi-cluster.libsonnet') + { _config:: $._config }, } diff --git a/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet new file mode 100644 index 00000000000..ec725f4dfa2 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet @@ -0,0 +1,964 @@ +local g = import 'grafonnet/grafana.libsonnet'; + +(import 'utils.libsonnet') { + 'multi-cluster-overview.json': + $.dashboardSchema( + 'Ceph - Multi-cluster', + '', + 'BnxelG7Sz', + 'now-1h', + '30s', + 22, + $._config.dashboardTags, + '' + ) + .addAnnotation( + $.addAnnotationSchema( + 1, + '-- Grafana --', + true, + true, + 'rgba(0, 211, 255, 1)', + 'Annotations & Alerts', + 'dashboard' + ) + ) + .addTemplate( + g.template.datasource('DS_PROMETHEUS', 'prometheus', 'default', label='Data Source') + ) + + .addTemplate( + $.addTemplateSchema('Cluster', + '$DS_PROMETHEUS', + 'label_values(ceph_health_status, cluster)', + 2, + true, + 0, + null, + '', + current='All') + ) + .addPanels([ + $.addRowSchema(false, true, 'Clusters') + { gridPos: { x: 0, y: 1, w: 24, h: 1 } }, + $.addStatPanel( + title='Status', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 0, y: 2, w: 5, h: 7 }, + graphMode='none', + colorMode='value', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'text', value: null }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byName', options: 'Warning' }, + properties: [ + { + id: 'thresholds', + value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-yellow', value: 1 }] }, + }, + ], + }, + { + matcher: { id: 'byName', options: 'Error' }, + properties: [ + { + id: 'thresholds', + value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-red', value: 1 }] }, + }, + ], + }, + { + matcher: { id: 'byName', options: 'Healthy' }, + properties: [ + { + id: 'thresholds', + value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-green', value: 1 }] }, + }, + ], + }, + ] + ) + .addTargets([ + $.addTargetSchema( + expr='count(ceph_health_status==0) or vector(0)', + datasource='${DS_PROMETHEUS}', + legendFormat='Healthy', + ), + $.addTargetSchema( + expr='count(ceph_health_status==1)', + datasource='${DS_PROMETHEUS}', + legendFormat='Warning' + ), + $.addTargetSchema( + expr='count(ceph_health_status==2)', + datasource='${DS_PROMETHEUS}', + legendFormat='Error' + ), + ]), + + $.addTableExtended( + datasource='${DS_PROMETHEUS}', + title='Details', + gridPosition={ h: 7, w: 19, x: 5, y: 2 }, + options={ + footer: { + fields: '', + reducer: ['sum'], + countRows: false, + enablePagination: false, + show: false, + }, + frameIndex: 1, + showHeader: true, + }, + custom={ align: 'left', cellOptions: { type: 'color-text' }, filterable: false, inspect: false }, + thresholds={ + mode: 'absolute', + steps: [ + { color: 'text' }, + ], + }, + overrides=[ + { + matcher: { id: 'byName', options: 'Value #A' }, + properties: [ + { id: 'mappings', value: [{ options: { '0': { color: 'semi-dark-green', index: 2, text: 'Healthy' }, '1': { color: 'semi-dark-yellow', index: 0, text: 'Warning' }, '2': { color: 'semi-dark-red', index: 1, text: 'Error' } }, type: 'value' }] }, + ], + }, + { + matcher: { id: 'byName', options: 'IOPS' }, + properties: [ + { id: 'unit', value: 'ops' }, + ], + }, + { + matcher: { id: 'byName', options: 'Value #E' }, + properties: [ + { id: 'unit', value: 'bytes' }, + ], + }, + { + matcher: { id: 'byName', options: 'Capacity Used' }, + properties: [ + { id: 'unit', value: 'bytes' }, + ], + }, + { + matcher: { id: 'byName', options: 'Cluster' }, + properties: [ + { id: 'links', value: [{ title: '', url: '/d/GQ3MHvnIz/ceph-cluster-new?var-cluster=${__data.fields.Cluster}&${DS_PROMETHEUS:queryparam}' }] }, + ], + }, + { + matcher: { id: 'byName', options: 'Alerts' }, + properties: [ + { id: 'mappings', value: [{ options: { match: null, result: { index: 0, text: '0' } }, type: 'special' }] }, + ], + }, + ], + pluginVersion='9.4.7' + ) + .addTransformations([ + { + id: 'joinByField', + options: { byField: 'cluster', mode: 'outer' }, + }, + { + id: 'organize', + options: { + excludeByName: { + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + 'Time 6': true, + 'Value #B': true, + '__name__ 1': true, + '__name__ 2': true, + '__name__ 3': true, + ceph_daemon: true, + device_class: true, + hostname: true, + 'instance 1': true, + 'instance 2': true, + 'instance 3': true, + 'job 1': true, + 'job 2': true, + 'job 3': true, + 'replica 1': true, + 'replica 2': true, + 'replica 3': true, + }, + indexByName: { + 'Time 1': 8, + 'Time 2': 13, + 'Time 3': 21, + 'Time 4': 7, + 'Time 5': 22, + 'Time 6': 23, + 'Value #A': 1, + 'Value #B': 20, + 'Value #C': 3, + 'Value #D': 4, + 'Value #E': 5, + 'Value #F': 6, + '__name__ 1': 9, + '__name__ 2': 14, + '__name__ 3': 24, + ceph_daemon: 15, + ceph_version: 2, + cluster: 0, + device_class: 25, + hostname: 16, + 'instance 1': 10, + 'instance 2': 17, + 'instance 3': 26, + 'job 1': 11, + 'job 2': 18, + 'job 3': 27, + 'replica 1': 12, + 'replica 2': 19, + 'replica 3': 28, + }, + renameByName: { + 'Value #A': 'Status', + 'Value #C': 'Alerts', + 'Value #D': 'IOPS', + 'Value #E': 'Throughput', + 'Value #F': 'Capacity Used', + ceph_version: 'Version', + cluster: 'Cluster', + }, + }, + }, + ]).addTargets([ + $.addTargetSchema( + expr='ceph_health_status', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + $.addTargetSchema( + expr='ceph_mgr_metadata', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + $.addTargetSchema( + expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + $.addTargetSchema( + expr='sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + $.addTargetSchema( + expr='sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + $.addTargetSchema( + expr='ceph_cluster_by_class_total_used_bytes', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + ]), + + + $.addRowSchema(false, true, 'Overview') + { gridPos: { x: 0, y: 9, w: 24, h: 1 } }, + $.addStatPanel( + title='Cluster Count', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 0, y: 10, w: 3, h: 4 }, + graphMode='none', + colorMode='value', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'text', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + $.addTargetSchema( + expr='count(ceph_health_status{cluster=~"$Cluster"}) or vector(0)', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + ]), + + $.addGaugePanel( + title='Capacity Used', + gridPosition={ h: 8, w: 4, x: 3, y: 10 }, + unit='percentunit', + max=1, + min=0, + interval='1m', + pluginVersion='9.4.7' + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'semi-dark-yellow', value: 0.75 }, + { color: 'red', value: 0.85 }, + ]) + .addTarget($.addTargetSchema( + expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"}) / sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})', + instant=true, + legendFormat='Used', + datasource='${DS_PROMETHEUS}', + )), + + $.addStatPanel( + title='Total Capacity', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 7, y: 10, w: 3, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='auto', + thresholdsMode='absolute', + unit='bytes', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=false, + interval='', + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='OSDs', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 10, y: 10, w: 3, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='auto', + thresholdsMode='absolute', + unit='none', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='count(ceph_osd_metadata{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=false, + interval='', + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='Hosts', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 13, y: 10, w: 3, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='auto', + thresholdsMode='absolute', + unit='none', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='count(sum by (hostname) (ceph_osd_metadata{cluster=~"$Cluster"}))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=false, + interval='', + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='Client IOPS', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 16, y: 10, w: 4, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + unit='ops', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='sum(irate(ceph_pool_wr{cluster=~"$Cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + instant=false, + legendFormat='Write', + range=true, + ), + $.addTargetSchema( + expr='sum(irate(ceph_pool_rd{cluster=~"$Cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + legendFormat='Read', + range=true, + ), + ]), + + $.addStatPanel( + title='OSD Latencies', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 20, y: 10, w: 4, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + unit='ms', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='avg(ceph_osd_apply_latency_ms{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + instant=false, + legendFormat='Apply', + range=true, + ), + $.addTargetSchema( + expr='avg(ceph_osd_commit_latency_ms{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + legendFormat='Commit', + range=true, + ), + ]), + + $.addStatPanel( + title='Alert Count', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 0, y: 14, w: 3, h: 4 }, + graphMode='none', + colorMode='value', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'text', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + $.addTargetSchema( + expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"}) or vector(0)', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + ]), + + $.addStatPanel( + title='Total Used', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 7, y: 14, w: 3, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='auto', + thresholdsMode='absolute', + unit='bytes', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=false, + interval='', + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='Capacity Prediction', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 10, y: 14, w: 3, h: 4 }, + graphMode='none', + colorMode='none', + orientation='auto', + justifyMode='auto', + unit='s', + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~"${Cluster}"}[1d]))[7d:1h],120)', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='Pools', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 13, y: 14, w: 3, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='auto', + thresholdsMode='absolute', + unit='none', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='count(ceph_pool_metadata{cluster=~"$Cluster"})', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=false, + interval='', + legendFormat='__auto', + range=true, + ), + ]), + + $.addStatPanel( + title='Client Bandwidth', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 16, y: 14, w: 4, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + unit='binBps', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='sum(irate(ceph_pool_rd_bytes{cluster=~"$Cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + instant=false, + legendFormat='Write', + range=true, + ), + $.addTargetSchema( + expr='sum(irate(ceph_pool_wr_bytes{cluster=~"$Cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + legendFormat='Read', + range=true, + ), + ]), + + $.addStatPanel( + title='Recovery Rate', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 20, y: 14, w: 4, h: 4 }, + graphMode='area', + colorMode='none', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + unit='binBps', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + $.addTargetSchema( + expr='sum(irate(ceph_osd_recovery_ops{cluster=~"$Cluster"}[$__interval]))', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + hide=false, + exemplar=false, + instant=false, + legendFormat='Write', + range=true, + ), + ]), + + + $.addRowSchema(false, true, 'Alerts', collapsed=true) + .addPanels([ + $.addStatPanel( + title='Status', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 0, y: 19, w: 5, h: 7 }, + graphMode='area', + colorMode='value', + orientation='auto', + justifyMode='center', + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addThresholds([ + { color: 'text', value: null }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byName', options: 'Critical' }, + properties: [ + { + id: 'thresholds', + value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-red', value: 1 }] }, + }, + ], + }, + { + matcher: { id: 'byName', options: 'Warning' }, + properties: [ + { + id: 'thresholds', + value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-yellow', value: 1 }] }, + }, + ], + }, + ] + ) + .addTargets([ + $.addTargetSchema( + expr='count(ALERTS{alertstate="firing",severity="critical", cluster=~"$Cluster"}) OR vector(0)', + datasource='${DS_PROMETHEUS}', + legendFormat='Critical', + instant=true, + range=false + ), + $.addTargetSchema( + expr='count(ALERTS{alertstate="firing",severity="warning", cluster=~"$Cluster"}) OR vector(0)', + datasource='${DS_PROMETHEUS}', + legendFormat='Warning', + instant=true, + range=false + ), + ]), + + + $.addTableExtended( + datasource='${DS_PROMETHEUS}', + title='Alerts', + gridPosition={ h: 7, w: 19, x: 5, y: 19 }, + options={ + footer: { + fields: '', + reducer: ['sum'], + countRows: false, + enablePagination: false, + show: false, + }, + frameIndex: 1, + showHeader: true, + sortBy: [{ desc: false, displayName: 'Severity' }], + }, + custom={ align: 'auto', cellOptions: { type: 'auto' }, filterable: true, inspect: false }, + thresholds={ + mode: 'absolute', + steps: [ + { color: 'green' }, + { color: 'red', value: 80 }, + ], + }, + pluginVersion='9.4.7' + ) + .addTransformations([ + { + id: 'joinByField', + options: { byField: 'cluster', mode: 'outer' }, + }, + { + id: 'organize', + options: { + excludeByName: { + Time: true, + Value: true, + __name__: true, + instance: true, + job: true, + oid: true, + replica: true, + type: true, + }, + indexByName: { + Time: 0, + Value: 9, + __name__: 1, + alertname: 2, + alertstate: 4, + cluster: 3, + instance: 6, + job: 7, + severity: 5, + type: 8, + }, + renameByName: { + alertname: 'Name', + alertstate: 'State', + cluster: 'Cluster', + severity: 'Severity', + }, + }, + }, + ]).addTargets([ + $.addTargetSchema( + expr='ALERTS{alertstate="firing", cluster=~"$Cluster"}', + datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' }, + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='__auto', + range=false, + ), + ]), + + $.addAlertListPanel( + title='Alerts(Grouped)', + datasource={ + type: 'datasource', + uid: 'grafana', + }, + gridPosition={ h: 8, w: 24, x: 0, y: 26 }, + alertName='', + dashboardAlerts=false, + groupBy=[], + groupMode='default', + maxItems=20, + sortOrder=1, + stateFilter={ + 'error': true, + firing: true, + noData: false, + normal: false, + pending: true, + }, + ), + ]) + { gridPos: { x: 0, y: 18, w: 24, h: 1 } }, + + $.addRowSchema(false, true, 'Cluster Stats', collapsed=true) + .addPanels([ + $.timeSeriesPanel( + lineInterpolation='linear', + lineWidth=1, + drawStyle='line', + axisPlacement='auto', + title='Top 5 - Capacity Utilization(%)', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 0, y: 30 }, + fillOpacity=0, + pointSize=5, + showPoints='auto', + unit='percentunit', + displayMode='table', + showLegend=true, + placement='bottom', + tooltip={ mode: 'multi', sort: 'desc' }, + stackingMode='none', + spanNulls=false, + decimals=2, + thresholdsMode='percentage', + sortBy='Last', + sortDesc=true + ) + .addCalcs(['last']) + .addThresholds([ + { color: 'green' }, + ]) + .addTargets( + [ + $.addTargetSchema( + expr='topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)', + datasource='${DS_PROMETHEUS}', + instant=false, + legendFormat='{{cluster}}', + step=300, + range=true, + ), + ] + ), + + + $.timeSeriesPanel( + lineInterpolation='linear', + lineWidth=1, + drawStyle='line', + axisPlacement='auto', + title='Top 5 - Cluster IOPS', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 8, y: 30 }, + fillOpacity=0, + pointSize=5, + showPoints='auto', + unit='ops', + displayMode='table', + showLegend=true, + placement='bottom', + tooltip={ mode: 'multi', sort: 'desc' }, + stackingMode='none', + spanNulls=false, + decimals=2, + thresholdsMode='percentage', + sortBy='Last', + sortDesc=true + ) + .addCalcs(['last']) + .addThresholds([ + { color: 'green' }, + ]) + .addTargets( + [ + $.addTargetSchema( + expr='topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )', + datasource='${DS_PROMETHEUS}', + instant=false, + legendFormat='{{cluster}}', + step=300, + range=true, + ), + ] + ), + + + $.timeSeriesPanel( + lineInterpolation='linear', + lineWidth=1, + drawStyle='line', + axisPlacement='auto', + title='Top 10 - Capacity Utilization(%) by Pool', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 16, y: 30 }, + fillOpacity=0, + pointSize=5, + showPoints='auto', + unit='percentunit', + displayMode='table', + showLegend=true, + placement='bottom', + tooltip={ mode: 'multi', sort: 'desc' }, + stackingMode='none', + spanNulls=false, + decimals=2, + thresholdsMode='absolute', + sortBy='Last', + sortDesc=true + ) + .addCalcs(['last']) + .addThresholds([ + { color: 'green' }, + ]) + .addTargets( + [ + $.addTargetSchema( + expr='topk(10, ceph_pool_bytes_used{cluster=~"$Cluster"}/ceph_pool_max_avail{cluster=~"$Cluster"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~"$Cluster"})', + datasource='${DS_PROMETHEUS}', + instant=false, + legendFormat='{{cluster}} - {{name}}', + step=300, + range=true, + ), + ] + ), + ]) + { gridPos: { x: 0, y: 29, w: 24, h: 1 } }, + ]), +} diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index 397c00fe53a..a1c2fe94214 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -117,7 +117,8 @@ local timeSeries = import 'timeseries_panel.libsonnet'; regex, hide='', multi=false, - allValues=null):: + allValues=null, + current=null):: g.template.new(name=name, datasource=datasource, query=query, @@ -128,7 +129,8 @@ local timeSeries = import 'timeseries_panel.libsonnet'; regex=regex, hide=hide, multi=multi, - allValues=allValues), + allValues=allValues, + current=current), addAnnotationSchema(builtIn, datasource, diff --git a/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json new file mode 100644 index 00000000000..91b2934f065 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json @@ -0,0 +1,2123 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Clusters", + "titleSize": "h6", + "type": "row" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warning" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "semi-dark-yellow", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Error" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "semi-dark-red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Healthy" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "semi-dark-green", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 2 + }, + "id": 3, + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "count(ceph_health_status==0) or vector(0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Healthy", + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "count(ceph_health_status==1)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Warning", + "refId": "B" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "count(ceph_health_status==2)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Error", + "refId": "C" + } + ], + "title": "Status", + "transparent": false, + "type": "stat" + }, + { + "columns": [ ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "cellOptions": { + "type": "color-text" + }, + "filterable": false, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "0": { + "color": "semi-dark-green", + "index": 2, + "text": "Healthy" + }, + "1": { + "color": "semi-dark-yellow", + "index": 0, + "text": "Warning" + }, + "2": { + "color": "semi-dark-red", + "index": 1, + "text": "Error" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IOPS" + }, + "properties": [ + { + "id": "unit", + "value": "ops" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Capacity Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cluster" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "", + "url": "/d/GQ3MHvnIz/ceph-cluster-new?var-cluster=${__data.fields.Cluster}&${DS_PROMETHEUS:queryparam}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Alerts" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "match": null, + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 19, + "x": 5, + "y": 2 + }, + "id": 4, + "links": [ ], + "options": { + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "9.4.7", + "styles": "", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "ceph_health_status", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "ceph_mgr_metadata", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "ceph_cluster_by_class_total_used_bytes", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "F" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Details", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "cluster", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Value #B": true, + "__name__ 1": true, + "__name__ 2": true, + "__name__ 3": true, + "ceph_daemon": true, + "device_class": true, + "hostname": true, + "instance 1": true, + "instance 2": true, + "instance 3": true, + "job 1": true, + "job 2": true, + "job 3": true, + "replica 1": true, + "replica 2": true, + "replica 3": true + }, + "indexByName": { + "Time 1": 8, + "Time 2": 13, + "Time 3": 21, + "Time 4": 7, + "Time 5": 22, + "Time 6": 23, + "Value #A": 1, + "Value #B": 20, + "Value #C": 3, + "Value #D": 4, + "Value #E": 5, + "Value #F": 6, + "__name__ 1": 9, + "__name__ 2": 14, + "__name__ 3": 24, + "ceph_daemon": 15, + "ceph_version": 2, + "cluster": 0, + "device_class": 25, + "hostname": 16, + "instance 1": 10, + "instance 2": 17, + "instance 3": 26, + "job 1": 11, + "job 2": 18, + "job 3": 27, + "replica 1": 12, + "replica 2": 19, + "replica 3": 28 + }, + "renameByName": { + "Value #A": "Status", + "Value #C": "Alerts", + "Value #D": "IOPS", + "Value #E": "Throughput", + "Value #F": "Capacity Used", + "ceph_version": "Version", + "cluster": "Cluster" + } + } + } + ], + "type": "table" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6", + "type": "row" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 10 + }, + "id": 6, + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(ceph_health_status{cluster=~\"$Cluster\"}) or vector(0)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Cluster Count", + "transparent": false, + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "links": [ ], + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "semi-dark-yellow", + "value": 0.75 + }, + { + "color": "red", + "value": 0.84999999999999998 + } + ] + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 3, + "y": 10 + }, + "id": 7, + "interval": "1m", + "links": [ ], + "maxDataPoints": 100, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"}) / sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + } + ], + "title": "Capacity Used", + "transparent": false, + "type": "gauge" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 7, + "y": 10 + }, + "id": 8, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Total Capacity", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 10, + "y": 10 + }, + "id": 9, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(ceph_osd_metadata{cluster=~\"$Cluster\"})", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "OSDs", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 13, + "y": 10 + }, + "id": 10, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$Cluster\"}))", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Hosts", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 10 + }, + "id": 11, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(irate(ceph_pool_wr{cluster=~\"$Cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(irate(ceph_pool_rd{cluster=~\"$Cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B" + } + ], + "title": "Client IOPS", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 10 + }, + "id": 12, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$Cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Apply", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$Cluster\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Commit", + "range": true, + "refId": "B" + } + ], + "title": "OSD Latencies", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 14 + }, + "id": 13, + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}) or vector(0)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Alert Count", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 7, + "y": 14 + }, + "id": 14, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"})", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Total Used", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 10, + "y": 14 + }, + "id": 15, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~\"${Cluster}\"}[1d]))[7d:1h],120)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Capacity Prediction", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 13, + "y": 14 + }, + "id": 16, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(ceph_pool_metadata{cluster=~\"$Cluster\"})", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pools", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 14 + }, + "id": 17, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(irate(ceph_pool_rd_bytes{cluster=~\"$Cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(irate(ceph_pool_wr_bytes{cluster=~\"$Cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B" + } + ], + "title": "Client Bandwidth", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 14 + }, + "id": 18, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$Cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A" + } + ], + "title": "Recovery Rate", + "transparent": false, + "type": "stat" + }, + { + "collapse": false, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 19, + "panels": [ + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Critical" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "semi-dark-red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Warning" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "semi-dark-yellow", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 19 + }, + "id": 20, + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\", cluster=~\"$Cluster\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Critical", + "range": false, + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\", cluster=~\"$Cluster\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Warning", + "range": false, + "refId": "B" + } + ], + "title": "Status", + "transparent": false, + "type": "stat" + }, + { + "columns": [ ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 19, + "x": 5, + "y": 19 + }, + "id": 21, + "links": [ ], + "options": { + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Severity" + } + ] + }, + "pluginVersion": "9.4.7", + "styles": "", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Alerts", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "cluster", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "instance": true, + "job": true, + "oid": true, + "replica": true, + "type": true + }, + "indexByName": { + "Time": 0, + "Value": 9, + "__name__": 1, + "alertname": 2, + "alertstate": 4, + "cluster": 3, + "instance": 6, + "job": 7, + "severity": 5, + "type": 8 + }, + "renameByName": { + "alertname": "Name", + "alertstate": "State", + "cluster": "Cluster", + "severity": "Severity" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 22, + "limit": 10, + "onlyAlertsOnDashboard": true, + "options": { + "alertName": "", + "dashboardAlerts": false, + "groupBy": [ ], + "groupMode": "default", + "maxItems": 20, + "sortOrder": 1, + "stateFilter": { + "error": true, + "firing": true, + "noData": false, + "normal": false, + "pending": true + }, + "viewMode": "list" + }, + "show": "current", + "sortOrder": 1, + "stateFilter": [ ], + "title": "Alerts(Grouped)", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 23, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 30 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{cluster}}", + "range": true, + "refId": "A", + "step": 300 + } + ], + "title": "Top 5 - Capacity Utilization(%)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 30 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{cluster}}", + "range": true, + "refId": "A", + "step": 300 + } + ], + "title": "Top 5 - Cluster IOPS", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 30 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "topk(10, ceph_pool_bytes_used{cluster=~\"$Cluster\"}/ceph_pool_max_avail{cluster=~\"$Cluster\"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~\"$Cluster\"})", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{cluster}} - {{name}}", + "range": true, + "refId": "A", + "step": 300 + } + ], + "title": "Top 10 - Capacity Utilization(%) by Pool", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Stats", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "DS_PROMETHEUS", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "All" + }, + "datasource": "$DS_PROMETHEUS", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Cluster", + "options": [ ], + "query": "label_values(ceph_health_status, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph - Multi-cluster", + "uid": "BnxelG7Sz", + "version": 0 +} |