summaryrefslogtreecommitdiffstats
path: root/monitoring
diff options
context:
space:
mode:
authorAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>2024-01-25 08:14:01 +0100
committerNizamudeen A <nia@redhat.com>2024-02-22 06:12:01 +0100
commit495f669faf39cacbd1cbca562e97340338ef5cd1 (patch)
tree4d7888a1127a3111a8ba4f4f2c709b335bddc3a7 /monitoring
parentmgr/dashboard: multi-cluster management in ceph dashboard (diff)
downloadceph-495f669faf39cacbd1cbca562e97340338ef5cd1.tar.xz
ceph-495f669faf39cacbd1cbca562e97340338ef5cd1.zip
mgr/dashboard: Add a manage clusters page to the multi-cluster nav to
list/connect/disconnect/edit clusters in multi-cluster setup Fixes: https://tracker.ceph.com/issues/64530 Signed-off-by: Aashish Sharma <aasharma@redhat.com>
Diffstat (limited to 'monitoring')
-rw-r--r--monitoring/ceph-mixin/dashboards.libsonnet1
-rw-r--r--monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet964
-rw-r--r--monitoring/ceph-mixin/dashboards/utils.libsonnet6
-rw-r--r--monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json2123
4 files changed, 3092 insertions, 2 deletions
diff --git a/monitoring/ceph-mixin/dashboards.libsonnet b/monitoring/ceph-mixin/dashboards.libsonnet
index 8cb0bcceaf7..82e1888e036 100644
--- a/monitoring/ceph-mixin/dashboards.libsonnet
+++ b/monitoring/ceph-mixin/dashboards.libsonnet
@@ -8,5 +8,6 @@
(import 'dashboards/rgw.libsonnet') +
(import 'dashboards/ceph-cluster.libsonnet') +
(import 'dashboards/rgw-s3-analytics.libsonnet') +
+ (import 'dashboards/multi-cluster.libsonnet') +
{ _config:: $._config },
}
diff --git a/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet
new file mode 100644
index 00000000000..ec725f4dfa2
--- /dev/null
+++ b/monitoring/ceph-mixin/dashboards/multi-cluster.libsonnet
@@ -0,0 +1,964 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'multi-cluster-overview.json':
+ $.dashboardSchema(
+ 'Ceph - Multi-cluster',
+ '',
+ 'BnxelG7Sz',
+ 'now-1h',
+ '30s',
+ 22,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addTemplate(
+ g.template.datasource('DS_PROMETHEUS', 'prometheus', 'default', label='Data Source')
+ )
+
+ .addTemplate(
+ $.addTemplateSchema('Cluster',
+ '$DS_PROMETHEUS',
+ 'label_values(ceph_health_status, cluster)',
+ 2,
+ true,
+ 0,
+ null,
+ '',
+ current='All')
+ )
+ .addPanels([
+ $.addRowSchema(false, true, 'Clusters') + { gridPos: { x: 0, y: 1, w: 24, h: 1 } },
+ $.addStatPanel(
+ title='Status',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 0, y: 2, w: 5, h: 7 },
+ graphMode='none',
+ colorMode='value',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'text', value: null },
+ ])
+ .addOverrides(
+ [
+ {
+ matcher: { id: 'byName', options: 'Warning' },
+ properties: [
+ {
+ id: 'thresholds',
+ value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-yellow', value: 1 }] },
+ },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Error' },
+ properties: [
+ {
+ id: 'thresholds',
+ value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-red', value: 1 }] },
+ },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Healthy' },
+ properties: [
+ {
+ id: 'thresholds',
+ value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-green', value: 1 }] },
+ },
+ ],
+ },
+ ]
+ )
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ceph_health_status==0) or vector(0)',
+ datasource='${DS_PROMETHEUS}',
+ legendFormat='Healthy',
+ ),
+ $.addTargetSchema(
+ expr='count(ceph_health_status==1)',
+ datasource='${DS_PROMETHEUS}',
+ legendFormat='Warning'
+ ),
+ $.addTargetSchema(
+ expr='count(ceph_health_status==2)',
+ datasource='${DS_PROMETHEUS}',
+ legendFormat='Error'
+ ),
+ ]),
+
+ $.addTableExtended(
+ datasource='${DS_PROMETHEUS}',
+ title='Details',
+ gridPosition={ h: 7, w: 19, x: 5, y: 2 },
+ options={
+ footer: {
+ fields: '',
+ reducer: ['sum'],
+ countRows: false,
+ enablePagination: false,
+ show: false,
+ },
+ frameIndex: 1,
+ showHeader: true,
+ },
+ custom={ align: 'left', cellOptions: { type: 'color-text' }, filterable: false, inspect: false },
+ thresholds={
+ mode: 'absolute',
+ steps: [
+ { color: 'text' },
+ ],
+ },
+ overrides=[
+ {
+ matcher: { id: 'byName', options: 'Value #A' },
+ properties: [
+ { id: 'mappings', value: [{ options: { '0': { color: 'semi-dark-green', index: 2, text: 'Healthy' }, '1': { color: 'semi-dark-yellow', index: 0, text: 'Warning' }, '2': { color: 'semi-dark-red', index: 1, text: 'Error' } }, type: 'value' }] },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'IOPS' },
+ properties: [
+ { id: 'unit', value: 'ops' },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Value #E' },
+ properties: [
+ { id: 'unit', value: 'bytes' },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Capacity Used' },
+ properties: [
+ { id: 'unit', value: 'bytes' },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Cluster' },
+ properties: [
+ { id: 'links', value: [{ title: '', url: '/d/GQ3MHvnIz/ceph-cluster-new?var-cluster=${__data.fields.Cluster}&${DS_PROMETHEUS:queryparam}' }] },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Alerts' },
+ properties: [
+ { id: 'mappings', value: [{ options: { match: null, result: { index: 0, text: '0' } }, type: 'special' }] },
+ ],
+ },
+ ],
+ pluginVersion='9.4.7'
+ )
+ .addTransformations([
+ {
+ id: 'joinByField',
+ options: { byField: 'cluster', mode: 'outer' },
+ },
+ {
+ id: 'organize',
+ options: {
+ excludeByName: {
+ 'Time 1': true,
+ 'Time 2': true,
+ 'Time 3': true,
+ 'Time 4': true,
+ 'Time 5': true,
+ 'Time 6': true,
+ 'Value #B': true,
+ '__name__ 1': true,
+ '__name__ 2': true,
+ '__name__ 3': true,
+ ceph_daemon: true,
+ device_class: true,
+ hostname: true,
+ 'instance 1': true,
+ 'instance 2': true,
+ 'instance 3': true,
+ 'job 1': true,
+ 'job 2': true,
+ 'job 3': true,
+ 'replica 1': true,
+ 'replica 2': true,
+ 'replica 3': true,
+ },
+ indexByName: {
+ 'Time 1': 8,
+ 'Time 2': 13,
+ 'Time 3': 21,
+ 'Time 4': 7,
+ 'Time 5': 22,
+ 'Time 6': 23,
+ 'Value #A': 1,
+ 'Value #B': 20,
+ 'Value #C': 3,
+ 'Value #D': 4,
+ 'Value #E': 5,
+ 'Value #F': 6,
+ '__name__ 1': 9,
+ '__name__ 2': 14,
+ '__name__ 3': 24,
+ ceph_daemon: 15,
+ ceph_version: 2,
+ cluster: 0,
+ device_class: 25,
+ hostname: 16,
+ 'instance 1': 10,
+ 'instance 2': 17,
+ 'instance 3': 26,
+ 'job 1': 11,
+ 'job 2': 18,
+ 'job 3': 27,
+ 'replica 1': 12,
+ 'replica 2': 19,
+ 'replica 3': 28,
+ },
+ renameByName: {
+ 'Value #A': 'Status',
+ 'Value #C': 'Alerts',
+ 'Value #D': 'IOPS',
+ 'Value #E': 'Throughput',
+ 'Value #F': 'Capacity Used',
+ ceph_version: 'Version',
+ cluster: 'Cluster',
+ },
+ },
+ },
+ ]).addTargets([
+ $.addTargetSchema(
+ expr='ceph_health_status',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ $.addTargetSchema(
+ expr='ceph_mgr_metadata',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ $.addTargetSchema(
+ expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ $.addTargetSchema(
+ expr='sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ $.addTargetSchema(
+ expr='sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ $.addTargetSchema(
+ expr='ceph_cluster_by_class_total_used_bytes',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ ]),
+
+
+ $.addRowSchema(false, true, 'Overview') + { gridPos: { x: 0, y: 9, w: 24, h: 1 } },
+ $.addStatPanel(
+ title='Cluster Count',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 0, y: 10, w: 3, h: 4 },
+ graphMode='none',
+ colorMode='value',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'text', value: null },
+ { color: 'red', value: 80 },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ceph_health_status{cluster=~"$Cluster"}) or vector(0)',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ ]),
+
+ $.addGaugePanel(
+ title='Capacity Used',
+ gridPosition={ h: 8, w: 4, x: 3, y: 10 },
+ unit='percentunit',
+ max=1,
+ min=0,
+ interval='1m',
+ pluginVersion='9.4.7'
+ )
+ .addThresholds([
+ { color: 'green', value: null },
+ { color: 'semi-dark-yellow', value: 0.75 },
+ { color: 'red', value: 0.85 },
+ ])
+ .addTarget($.addTargetSchema(
+ expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"}) / sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})',
+ instant=true,
+ legendFormat='Used',
+ datasource='${DS_PROMETHEUS}',
+ )),
+
+ $.addStatPanel(
+ title='Total Capacity',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 7, y: 10, w: 3, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ thresholdsMode='absolute',
+ unit='bytes',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='sum(ceph_cluster_total_bytes{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=false,
+ interval='',
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='OSDs',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 10, y: 10, w: 3, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ thresholdsMode='absolute',
+ unit='none',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ceph_osd_metadata{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=false,
+ interval='',
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Hosts',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 13, y: 10, w: 3, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ thresholdsMode='absolute',
+ unit='none',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(sum by (hostname) (ceph_osd_metadata{cluster=~"$Cluster"}))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=false,
+ interval='',
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Client IOPS',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 16, y: 10, w: 4, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ unit='ops',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='sum(irate(ceph_pool_wr{cluster=~"$Cluster"}[$__interval]))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ instant=false,
+ legendFormat='Write',
+ range=true,
+ ),
+ $.addTargetSchema(
+ expr='sum(irate(ceph_pool_rd{cluster=~"$Cluster"}[$__interval]))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ legendFormat='Read',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='OSD Latencies',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 20, y: 10, w: 4, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ unit='ms',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='avg(ceph_osd_apply_latency_ms{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ instant=false,
+ legendFormat='Apply',
+ range=true,
+ ),
+ $.addTargetSchema(
+ expr='avg(ceph_osd_commit_latency_ms{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ legendFormat='Commit',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Alert Count',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 0, y: 14, w: 3, h: 4 },
+ graphMode='none',
+ colorMode='value',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'text', value: null },
+ { color: 'red', value: 80 },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ALERTS{alertstate="firing", cluster=~"$Cluster"}) or vector(0)',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Total Used',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 7, y: 14, w: 3, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ thresholdsMode='absolute',
+ unit='bytes',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='sum(ceph_cluster_total_used_bytes{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=false,
+ interval='',
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Capacity Prediction',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 10, y: 14, w: 3, h: 4 },
+ graphMode='none',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ unit='s',
+ thresholdsMode='absolute',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~"${Cluster}"}[1d]))[7d:1h],120)',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Pools',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 13, y: 14, w: 3, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='auto',
+ thresholdsMode='absolute',
+ unit='none',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ceph_pool_metadata{cluster=~"$Cluster"})',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=false,
+ interval='',
+ legendFormat='__auto',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Client Bandwidth',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 16, y: 14, w: 4, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ unit='binBps',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='sum(irate(ceph_pool_rd_bytes{cluster=~"$Cluster"}[$__interval]))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ instant=false,
+ legendFormat='Write',
+ range=true,
+ ),
+ $.addTargetSchema(
+ expr='sum(irate(ceph_pool_wr_bytes{cluster=~"$Cluster"}[$__interval]))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ legendFormat='Read',
+ range=true,
+ ),
+ ]),
+
+ $.addStatPanel(
+ title='Recovery Rate',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 20, y: 14, w: 4, h: 4 },
+ graphMode='area',
+ colorMode='none',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ unit='binBps',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'green', value: null },
+ ])
+ .addTargets([
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_recovery_ops{cluster=~"$Cluster"}[$__interval]))',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ hide=false,
+ exemplar=false,
+ instant=false,
+ legendFormat='Write',
+ range=true,
+ ),
+ ]),
+
+
+ $.addRowSchema(false, true, 'Alerts', collapsed=true)
+ .addPanels([
+ $.addStatPanel(
+ title='Status',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ x: 0, y: 19, w: 5, h: 7 },
+ graphMode='area',
+ colorMode='value',
+ orientation='auto',
+ justifyMode='center',
+ thresholdsMode='absolute',
+ pluginVersion='9.4.7',
+ ).addThresholds([
+ { color: 'text', value: null },
+ ])
+ .addOverrides(
+ [
+ {
+ matcher: { id: 'byName', options: 'Critical' },
+ properties: [
+ {
+ id: 'thresholds',
+ value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-red', value: 1 }] },
+ },
+ ],
+ },
+ {
+ matcher: { id: 'byName', options: 'Warning' },
+ properties: [
+ {
+ id: 'thresholds',
+ value: { mode: 'absolute', steps: [{ color: 'text', value: null }, { color: 'semi-dark-yellow', value: 1 }] },
+ },
+ ],
+ },
+ ]
+ )
+ .addTargets([
+ $.addTargetSchema(
+ expr='count(ALERTS{alertstate="firing",severity="critical", cluster=~"$Cluster"}) OR vector(0)',
+ datasource='${DS_PROMETHEUS}',
+ legendFormat='Critical',
+ instant=true,
+ range=false
+ ),
+ $.addTargetSchema(
+ expr='count(ALERTS{alertstate="firing",severity="warning", cluster=~"$Cluster"}) OR vector(0)',
+ datasource='${DS_PROMETHEUS}',
+ legendFormat='Warning',
+ instant=true,
+ range=false
+ ),
+ ]),
+
+
+ $.addTableExtended(
+ datasource='${DS_PROMETHEUS}',
+ title='Alerts',
+ gridPosition={ h: 7, w: 19, x: 5, y: 19 },
+ options={
+ footer: {
+ fields: '',
+ reducer: ['sum'],
+ countRows: false,
+ enablePagination: false,
+ show: false,
+ },
+ frameIndex: 1,
+ showHeader: true,
+ sortBy: [{ desc: false, displayName: 'Severity' }],
+ },
+ custom={ align: 'auto', cellOptions: { type: 'auto' }, filterable: true, inspect: false },
+ thresholds={
+ mode: 'absolute',
+ steps: [
+ { color: 'green' },
+ { color: 'red', value: 80 },
+ ],
+ },
+ pluginVersion='9.4.7'
+ )
+ .addTransformations([
+ {
+ id: 'joinByField',
+ options: { byField: 'cluster', mode: 'outer' },
+ },
+ {
+ id: 'organize',
+ options: {
+ excludeByName: {
+ Time: true,
+ Value: true,
+ __name__: true,
+ instance: true,
+ job: true,
+ oid: true,
+ replica: true,
+ type: true,
+ },
+ indexByName: {
+ Time: 0,
+ Value: 9,
+ __name__: 1,
+ alertname: 2,
+ alertstate: 4,
+ cluster: 3,
+ instance: 6,
+ job: 7,
+ severity: 5,
+ type: 8,
+ },
+ renameByName: {
+ alertname: 'Name',
+ alertstate: 'State',
+ cluster: 'Cluster',
+ severity: 'Severity',
+ },
+ },
+ },
+ ]).addTargets([
+ $.addTargetSchema(
+ expr='ALERTS{alertstate="firing", cluster=~"$Cluster"}',
+ datasource={ type: 'prometheus', uid: '${DS_PROMETHEUS}' },
+ format='table',
+ hide=false,
+ exemplar=false,
+ instant=true,
+ interval='',
+ legendFormat='__auto',
+ range=false,
+ ),
+ ]),
+
+ $.addAlertListPanel(
+ title='Alerts(Grouped)',
+ datasource={
+ type: 'datasource',
+ uid: 'grafana',
+ },
+ gridPosition={ h: 8, w: 24, x: 0, y: 26 },
+ alertName='',
+ dashboardAlerts=false,
+ groupBy=[],
+ groupMode='default',
+ maxItems=20,
+ sortOrder=1,
+ stateFilter={
+ 'error': true,
+ firing: true,
+ noData: false,
+ normal: false,
+ pending: true,
+ },
+ ),
+ ]) + { gridPos: { x: 0, y: 18, w: 24, h: 1 } },
+
+ $.addRowSchema(false, true, 'Cluster Stats', collapsed=true)
+ .addPanels([
+ $.timeSeriesPanel(
+ lineInterpolation='linear',
+ lineWidth=1,
+ drawStyle='line',
+ axisPlacement='auto',
+ title='Top 5 - Capacity Utilization(%)',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ h: 7, w: 8, x: 0, y: 30 },
+ fillOpacity=0,
+ pointSize=5,
+ showPoints='auto',
+ unit='percentunit',
+ displayMode='table',
+ showLegend=true,
+ placement='bottom',
+ tooltip={ mode: 'multi', sort: 'desc' },
+ stackingMode='none',
+ spanNulls=false,
+ decimals=2,
+ thresholdsMode='percentage',
+ sortBy='Last',
+ sortDesc=true
+ )
+ .addCalcs(['last'])
+ .addThresholds([
+ { color: 'green' },
+ ])
+ .addTargets(
+ [
+ $.addTargetSchema(
+ expr='topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)',
+ datasource='${DS_PROMETHEUS}',
+ instant=false,
+ legendFormat='{{cluster}}',
+ step=300,
+ range=true,
+ ),
+ ]
+ ),
+
+
+ $.timeSeriesPanel(
+ lineInterpolation='linear',
+ lineWidth=1,
+ drawStyle='line',
+ axisPlacement='auto',
+ title='Top 5 - Cluster IOPS',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ h: 7, w: 8, x: 8, y: 30 },
+ fillOpacity=0,
+ pointSize=5,
+ showPoints='auto',
+ unit='ops',
+ displayMode='table',
+ showLegend=true,
+ placement='bottom',
+ tooltip={ mode: 'multi', sort: 'desc' },
+ stackingMode='none',
+ spanNulls=false,
+ decimals=2,
+ thresholdsMode='percentage',
+ sortBy='Last',
+ sortDesc=true
+ )
+ .addCalcs(['last'])
+ .addThresholds([
+ { color: 'green' },
+ ])
+ .addTargets(
+ [
+ $.addTargetSchema(
+ expr='topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )',
+ datasource='${DS_PROMETHEUS}',
+ instant=false,
+ legendFormat='{{cluster}}',
+ step=300,
+ range=true,
+ ),
+ ]
+ ),
+
+
+ $.timeSeriesPanel(
+ lineInterpolation='linear',
+ lineWidth=1,
+ drawStyle='line',
+ axisPlacement='auto',
+ title='Top 10 - Capacity Utilization(%) by Pool',
+ datasource='${DS_PROMETHEUS}',
+ gridPosition={ h: 7, w: 8, x: 16, y: 30 },
+ fillOpacity=0,
+ pointSize=5,
+ showPoints='auto',
+ unit='percentunit',
+ displayMode='table',
+ showLegend=true,
+ placement='bottom',
+ tooltip={ mode: 'multi', sort: 'desc' },
+ stackingMode='none',
+ spanNulls=false,
+ decimals=2,
+ thresholdsMode='absolute',
+ sortBy='Last',
+ sortDesc=true
+ )
+ .addCalcs(['last'])
+ .addThresholds([
+ { color: 'green' },
+ ])
+ .addTargets(
+ [
+ $.addTargetSchema(
+ expr='topk(10, ceph_pool_bytes_used{cluster=~"$Cluster"}/ceph_pool_max_avail{cluster=~"$Cluster"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~"$Cluster"})',
+ datasource='${DS_PROMETHEUS}',
+ instant=false,
+ legendFormat='{{cluster}} - {{name}}',
+ step=300,
+ range=true,
+ ),
+ ]
+ ),
+ ]) + { gridPos: { x: 0, y: 29, w: 24, h: 1 } },
+ ]),
+}
diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet
index 397c00fe53a..a1c2fe94214 100644
--- a/monitoring/ceph-mixin/dashboards/utils.libsonnet
+++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet
@@ -117,7 +117,8 @@ local timeSeries = import 'timeseries_panel.libsonnet';
regex,
hide='',
multi=false,
- allValues=null)::
+ allValues=null,
+ current=null)::
g.template.new(name=name,
datasource=datasource,
query=query,
@@ -128,7 +129,8 @@ local timeSeries = import 'timeseries_panel.libsonnet';
regex=regex,
hide=hide,
multi=multi,
- allValues=allValues),
+ allValues=allValues,
+ current=current),
addAnnotationSchema(builtIn,
datasource,
diff --git a/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json
new file mode 100644
index 00000000000..91b2934f065
--- /dev/null
+++ b/monitoring/ceph-mixin/dashboards_out/multi-cluster-overview.json
@@ -0,0 +1,2123 @@
+{
+ "__inputs": [ ],
+ "__requires": [ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 1
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Clusters",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Warning"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "semi-dark-yellow",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Error"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "semi-dark-red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Healthy"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "semi-dark-green",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 0,
+ "y": 2
+ },
+ "id": 3,
+ "links": [ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "count(ceph_health_status==0) or vector(0)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Healthy",
+ "refId": "A"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "count(ceph_health_status==1)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Warning",
+ "refId": "B"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "count(ceph_health_status==2)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Error",
+ "refId": "C"
+ }
+ ],
+ "title": "Status",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "columns": [ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": "left",
+ "cellOptions": {
+ "type": "color-text"
+ },
+ "filterable": false,
+ "inspect": false
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text"
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #A"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "0": {
+ "color": "semi-dark-green",
+ "index": 2,
+ "text": "Healthy"
+ },
+ "1": {
+ "color": "semi-dark-yellow",
+ "index": 0,
+ "text": "Warning"
+ },
+ "2": {
+ "color": "semi-dark-red",
+ "index": 1,
+ "text": "Error"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "IOPS"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "ops"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #E"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Capacity Used"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Cluster"
+ },
+ "properties": [
+ {
+ "id": "links",
+ "value": [
+ {
+ "title": "",
+ "url": "/d/GQ3MHvnIz/ceph-cluster-new?var-cluster=${__data.fields.Cluster}&${DS_PROMETHEUS:queryparam}"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Alerts"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "match": null,
+ "result": {
+ "index": 0,
+ "text": "0"
+ }
+ },
+ "type": "special"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 19,
+ "x": 5,
+ "y": 2
+ },
+ "id": 4,
+ "links": [ ],
+ "options": {
+ "footer": {
+ "countRows": false,
+ "enablePagination": false,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "frameIndex": 1,
+ "showHeader": true
+ },
+ "pluginVersion": "9.4.7",
+ "styles": "",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "ceph_health_status",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "ceph_mgr_metadata",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum by (cluster) (irate(ceph_pool_wr[$__interval])) \n+ sum by (cluster) (irate(ceph_pool_rd[$__interval])) ",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum by (cluster) (irate(ceph_pool_rd_bytes[$__interval]))\n+ sum by (cluster) (irate(ceph_pool_wr_bytes[$__interval])) ",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "E"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "ceph_cluster_by_class_total_used_bytes",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "F"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Details",
+ "transformations": [
+ {
+ "id": "joinByField",
+ "options": {
+ "byField": "cluster",
+ "mode": "outer"
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time 1": true,
+ "Time 2": true,
+ "Time 3": true,
+ "Time 4": true,
+ "Time 5": true,
+ "Time 6": true,
+ "Value #B": true,
+ "__name__ 1": true,
+ "__name__ 2": true,
+ "__name__ 3": true,
+ "ceph_daemon": true,
+ "device_class": true,
+ "hostname": true,
+ "instance 1": true,
+ "instance 2": true,
+ "instance 3": true,
+ "job 1": true,
+ "job 2": true,
+ "job 3": true,
+ "replica 1": true,
+ "replica 2": true,
+ "replica 3": true
+ },
+ "indexByName": {
+ "Time 1": 8,
+ "Time 2": 13,
+ "Time 3": 21,
+ "Time 4": 7,
+ "Time 5": 22,
+ "Time 6": 23,
+ "Value #A": 1,
+ "Value #B": 20,
+ "Value #C": 3,
+ "Value #D": 4,
+ "Value #E": 5,
+ "Value #F": 6,
+ "__name__ 1": 9,
+ "__name__ 2": 14,
+ "__name__ 3": 24,
+ "ceph_daemon": 15,
+ "ceph_version": 2,
+ "cluster": 0,
+ "device_class": 25,
+ "hostname": 16,
+ "instance 1": 10,
+ "instance 2": 17,
+ "instance 3": 26,
+ "job 1": 11,
+ "job 2": 18,
+ "job 3": 27,
+ "replica 1": 12,
+ "replica 2": 19,
+ "replica 3": 28
+ },
+ "renameByName": {
+ "Value #A": "Status",
+ "Value #C": "Alerts",
+ "Value #D": "IOPS",
+ "Value #E": "Throughput",
+ "Value #F": "Capacity Used",
+ "ceph_version": "Version",
+ "cluster": "Cluster"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ },
+ "id": 5,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Overview",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 10
+ },
+ "id": 6,
+ "links": [ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(ceph_health_status{cluster=~\"$Cluster\"}) or vector(0)",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Cluster Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "links": [ ],
+ "mappings": [ ],
+ "max": 1,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "semi-dark-yellow",
+ "value": 0.75
+ },
+ {
+ "color": "red",
+ "value": 0.84999999999999998
+ }
+ ]
+ },
+ "unit": "percentunit"
+ }
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 3,
+ "y": 10
+ },
+ "id": 7,
+ "interval": "1m",
+ "links": [ ],
+ "maxDataPoints": 100,
+ "options": {
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"}) / sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "refId": "A"
+ }
+ ],
+ "title": "Capacity Used",
+ "transparent": false,
+ "type": "gauge"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 7,
+ "y": 10
+ },
+ "id": 8,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(ceph_cluster_total_bytes{cluster=~\"$Cluster\"})",
+ "format": "table",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Total Capacity",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 10,
+ "y": 10
+ },
+ "id": 9,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(ceph_osd_metadata{cluster=~\"$Cluster\"})",
+ "format": "table",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "OSDs",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 13,
+ "y": 10
+ },
+ "id": 10,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$Cluster\"}))",
+ "format": "table",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Hosts",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "ops"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 16,
+ "y": 10
+ },
+ "id": 11,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(irate(ceph_pool_wr{cluster=~\"$Cluster\"}[$__interval]))",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(irate(ceph_pool_rd{cluster=~\"$Cluster\"}[$__interval]))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Client IOPS",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "ms"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 20,
+ "y": 10
+ },
+ "id": 12,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$Cluster\"})",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "Apply",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$Cluster\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Commit",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "OSD Latencies",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 14
+ },
+ "id": 13,
+ "links": [ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}) or vector(0)",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Alert Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 7,
+ "y": 14
+ },
+ "id": 14,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(ceph_cluster_total_used_bytes{cluster=~\"$Cluster\"})",
+ "format": "table",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Total Used",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 10,
+ "y": 14
+ },
+ "id": 15,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "predict_linear(avg(increase(ceph_cluster_total_used_bytes{cluster=~\"${Cluster}\"}[1d]))[7d:1h],120)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Capacity Prediction",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 13,
+ "y": 14
+ },
+ "id": 16,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "count(ceph_pool_metadata{cluster=~\"$Cluster\"})",
+ "format": "table",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Pools",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "binBps"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 16,
+ "y": 14
+ },
+ "id": 17,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(irate(ceph_pool_rd_bytes{cluster=~\"$Cluster\"}[$__interval]))",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(irate(ceph_pool_wr_bytes{cluster=~\"$Cluster\"}[$__interval]))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Client Bandwidth",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "binBps"
+ }
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 20,
+ "y": 14
+ },
+ "id": 18,
+ "links": [ ],
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$Cluster\"}[$__interval]))",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Recovery Rate",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "collapse": false,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 18
+ },
+ "id": 19,
+ "panels": [
+ {
+ "colors": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "decimals": 0,
+ "links": [ ],
+ "mappings": [ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Critical"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "semi-dark-red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Warning"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "semi-dark-yellow",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 0,
+ "y": 19
+ },
+ "id": 20,
+ "links": [ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "9.4.7",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\", cluster=~\"$Cluster\"}) OR vector(0)",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "Critical",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\", cluster=~\"$Cluster\"}) OR vector(0)",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "Warning",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Status",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "columns": [ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "filterable": true,
+ "inspect": false
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 19,
+ "x": 5,
+ "y": 19
+ },
+ "id": 21,
+ "links": [ ],
+ "options": {
+ "footer": {
+ "countRows": false,
+ "enablePagination": false,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "frameIndex": 1,
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": false,
+ "displayName": "Severity"
+ }
+ ]
+ },
+ "pluginVersion": "9.4.7",
+ "styles": "",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "ALERTS{alertstate=\"firing\", cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Alerts",
+ "transformations": [
+ {
+ "id": "joinByField",
+ "options": {
+ "byField": "cluster",
+ "mode": "outer"
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "instance": true,
+ "job": true,
+ "oid": true,
+ "replica": true,
+ "type": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 9,
+ "__name__": 1,
+ "alertname": 2,
+ "alertstate": 4,
+ "cluster": 3,
+ "instance": 6,
+ "job": 7,
+ "severity": 5,
+ "type": 8
+ },
+ "renameByName": {
+ "alertname": "Name",
+ "alertstate": "State",
+ "cluster": "Cluster",
+ "severity": "Severity"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "datasource",
+ "uid": "grafana"
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 26
+ },
+ "id": 22,
+ "limit": 10,
+ "onlyAlertsOnDashboard": true,
+ "options": {
+ "alertName": "",
+ "dashboardAlerts": false,
+ "groupBy": [ ],
+ "groupMode": "default",
+ "maxItems": 20,
+ "sortOrder": 1,
+ "stateFilter": {
+ "error": true,
+ "firing": true,
+ "noData": false,
+ "normal": false,
+ "pending": true
+ },
+ "viewMode": "list"
+ },
+ "show": "current",
+ "sortOrder": 1,
+ "stateFilter": [ ],
+ "title": "Alerts(Grouped)",
+ "type": "alertlist"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Alerts",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 23,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 2,
+ "thresholds": {
+ "mode": "percentage",
+ "steps": [
+ {
+ "color": "green"
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 30
+ },
+ "id": 24,
+ "options": {
+ "legend": {
+ "calcs": [
+ "last"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Last",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "pluginVersion": "9.1.3",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "topk(5, ceph_cluster_total_used_bytes/ceph_cluster_total_bytes)",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{cluster}}",
+ "range": true,
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "title": "Top 5 - Capacity Utilization(%)",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 2,
+ "thresholds": {
+ "mode": "percentage",
+ "steps": [
+ {
+ "color": "green"
+ }
+ ]
+ },
+ "unit": "ops"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 30
+ },
+ "id": 25,
+ "options": {
+ "legend": {
+ "calcs": [
+ "last"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Last",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "pluginVersion": "9.1.3",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "topk(10, sum by (cluster) (irate(ceph_osd_op_w[$__interval])) \n+ sum by (cluster) (irate(ceph_osd_op_r[$__interval])) )",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{cluster}}",
+ "range": true,
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "title": "Top 5 - Cluster IOPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 2,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 30
+ },
+ "id": 26,
+ "options": {
+ "legend": {
+ "calcs": [
+ "last"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Last",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "pluginVersion": "9.1.3",
+ "targets": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "expr": "topk(10, ceph_pool_bytes_used{cluster=~\"$Cluster\"}/ceph_pool_max_avail{cluster=~\"$Cluster\"} * on(pool_id, cluster) group_left(instance, name) ceph_pool_metadata{cluster=~\"$Cluster\"})",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{cluster}} - {{name}}",
+ "range": true,
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "title": "Top 10 - Capacity Utilization(%) by Pool",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Cluster Stats",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 22,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "DS_PROMETHEUS",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "All"
+ },
+ "datasource": "$DS_PROMETHEUS",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "Cluster",
+ "options": [ ],
+ "query": "label_values(ceph_health_status, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph - Multi-cluster",
+ "uid": "BnxelG7Sz",
+ "version": 0
+}