summaryrefslogtreecommitdiffstats
path: root/monitoring
diff options
context:
space:
mode:
authorPere Diaz Bou <pdiazbou@redhat.com>2021-10-26 14:42:17 +0200
committerPere Diaz Bou <pdiazbou@redhat.com>2021-11-16 10:30:49 +0100
commit44d3e4c264506154373ffaeb13d6c924c580e6b5 (patch)
tree9414f7999e457f0a4238c9cc0cb53525c8473b60 /monitoring
parentMerge pull request #43938 from neha-ojha/wip-quick-fix (diff)
downloadceph-44d3e4c264506154373ffaeb13d6c924c580e6b5.tar.xz
ceph-44d3e4c264506154373ffaeb13d6c924c580e6b5.zip
monitoring/grafana: Grafana query tester
Signed-off-by: Pere Diaz Bou <pdiazbou@redhat.com>
Diffstat (limited to 'monitoring')
l---------monitoring/grafana/dashboards/.pylintrc1
-rw-r--r--monitoring/grafana/dashboards/CMakeLists.txt6
-rw-r--r--monitoring/grafana/dashboards/requirements-lint.txt18
-rw-r--r--monitoring/grafana/dashboards/tests/__init__.py187
-rw-r--r--monitoring/grafana/dashboards/tests/features/__init__.py0
-rw-r--r--monitoring/grafana/dashboards/tests/features/ceph-cluster.feature10
-rw-r--r--monitoring/grafana/dashboards/tests/features/environment.py135
-rw-r--r--monitoring/grafana/dashboards/tests/features/hosts_overview.feature28
-rw-r--r--monitoring/grafana/dashboards/tests/features/self.feature68
-rw-r--r--monitoring/grafana/dashboards/tests/features/steps/__init__.py1
-rw-r--r--monitoring/grafana/dashboards/tests/requirements.txt12
-rw-r--r--monitoring/grafana/dashboards/tests/util.py68
-rw-r--r--monitoring/grafana/dashboards/tox.ini24
13 files changed, 555 insertions, 3 deletions
diff --git a/monitoring/grafana/dashboards/.pylintrc b/monitoring/grafana/dashboards/.pylintrc
new file mode 120000
index 00000000000..aa04b020cb4
--- /dev/null
+++ b/monitoring/grafana/dashboards/.pylintrc
@@ -0,0 +1 @@
+../../../src/pybind/mgr/dashboard/.pylintrc \ No newline at end of file
diff --git a/monitoring/grafana/dashboards/CMakeLists.txt b/monitoring/grafana/dashboards/CMakeLists.txt
index b8f59fc04b8..5711957923c 100644
--- a/monitoring/grafana/dashboards/CMakeLists.txt
+++ b/monitoring/grafana/dashboards/CMakeLists.txt
@@ -12,7 +12,9 @@ endif()
if(WITH_GRAFANA)
include(AddCephTest)
- add_tox_test(grafana TOX_ENVS grafonnet-check)
+ add_tox_test(grafana-check TOX_ENVS grafonnet-check)
+ add_tox_test(grafana-query-test TOX_ENVS promql-query-test)
+ add_tox_test(grafana-lint TOX_ENVS lint)
set(ver 0.1.0)
set(name grafonnet-lib)
include(ExternalProject)
@@ -30,7 +32,7 @@ if(WITH_GRAFANA)
${name})
ExternalProject_Get_Property(${name} SOURCE_DIR)
set_property(
- TEST run-tox-grafana
+ TEST run-tox-grafana-check run-tox-grafana-query-test run-tox-grafana-lint
APPEND
PROPERTY ENVIRONMENT
GRAFONNET_PATH=${SOURCE_DIR}/grafonnet)
diff --git a/monitoring/grafana/dashboards/requirements-lint.txt b/monitoring/grafana/dashboards/requirements-lint.txt
new file mode 100644
index 00000000000..f9a3c772f79
--- /dev/null
+++ b/monitoring/grafana/dashboards/requirements-lint.txt
@@ -0,0 +1,18 @@
+attrs==21.2.0
+behave==1.2.6
+py==1.10.0
+pyparsing==2.4.7
+PyYAML==6.0
+types-PyYAML==6.0.0
+typing-extensions==3.10.0.2
+termcolor==1.1.0
+types-termcolor==1.1.2
+dataclasses==0.6
+types-dataclasses==0.6.1
+six==1.16.0
+toml==0.10.2
+pylint==2.6.0
+isort==5.10.0
+mypy==0.910
+mypy-extensions==0.4.3
+prettytable==2.4.0
diff --git a/monitoring/grafana/dashboards/tests/__init__.py b/monitoring/grafana/dashboards/tests/__init__.py
new file mode 100644
index 00000000000..204a5f0d721
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/__init__.py
@@ -0,0 +1,187 @@
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import asdict, dataclass, field
+from typing import Any, List
+
+import yaml
+
+
+@dataclass
+class InputSeries:
+ series: str = ''
+ values: str = ''
+
+@dataclass
+class ExprSample:
+ labels: str = ''
+ value: float = -1
+
+@dataclass
+class PromqlExprTest:
+ expr: str = ''
+ eval_time: str = '1m'
+ exp_samples: List[ExprSample] = field(default_factory=list)
+
+@dataclass
+class Test:
+ interval: str = '1m'
+ input_series: List[InputSeries] = field(default_factory=list)
+ promql_expr_test: List[PromqlExprTest] = field(default_factory=list)
+
+
+@dataclass
+class TestFile:
+ evaluation_interval: str = '1m'
+ tests: List[Test] = field(default_factory=list)
+
+
+class PromqlTest:
+ """
+ Base class to provide prometheus query test capabilities. After setting up
+ the query test with its input and expected output it's expected to run promtool.
+
+ https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-yml
+
+ The workflow of testing would be something like:
+
+ # add prometheus query to test
+ self.set_expression('bonding_slaves > 0')
+
+ # add some prometheus input series
+ self.add_series('bonding_slaves{master="bond0"}', '2')
+ self.add_series('bonding_slaves{master="bond1"}', '3')
+ self.add_series('node_network_receive_bytes{instance="127.0.0.1",
+ device="eth1"}', "10 100 230 22")
+
+ # expected output of the query
+ self.add_exp_samples('bonding_slaves{master="bond0"}', 2)
+ self.add_exp_samples('bonding_slaves{master="bond1"}', 3)
+
+ # at last, always call promtool with:
+ self.assertTrue(self.run_promtool())
+ # assertTrue means it expect promtool to succeed
+ """
+
+ def __init__(self):
+ self.test_output_file = tempfile.NamedTemporaryFile('w+')
+
+ self.test_file = TestFile()
+ self.test = Test()
+ self.promql_expr_test = PromqlExprTest()
+ self.test.promql_expr_test.append(self.promql_expr_test)
+ self.test_file.tests.append(self.test)
+
+ self.variables = {}
+
+ def __del__(self):
+ self.test_output_file.close()
+
+
+ def set_evaluation_interval(self, interval: int, unit: str = 'm') -> None:
+ """
+ Set the evaluation interval of the time series
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.test_file.evaluation_interval = f'{interval}{unit}'
+
+ def set_interval(self, interval: int, unit: str = 'm') -> None:
+ """
+ Set the duration of the time series
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.test.interval = f'{interval}{unit}'
+
+ def set_expression(self, expr: str) -> None:
+ """
+ Set the prometheus expression/query used to filter data.
+
+ Args:
+ expr(str): expression/query.
+ """
+ self.promql_expr_test.expr = expr
+
+ def add_series(self, series: str, values: str) -> None:
+ """
+ Add a series to the input.
+
+ Args:
+ series(str): Prometheus series.
+ Notation: '<metric name>{<label name>=<label value>, ...}'
+ values(str): Value of the series.
+ """
+ input_series = InputSeries(series=series, values=values)
+ self.test.input_series.append(input_series)
+
+ def set_eval_time(self, eval_time: int, unit: str = 'm') -> None:
+ """
+ Set the time when the expression will be evaluated
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.promql_expr_test.eval_time = f'{eval_time}{unit}'
+
+ def add_exp_samples(self, sample: str, values: Any) -> None:
+ """
+ Add an expected sample/output of the query given the series/input
+
+ Args:
+ sample(str): Expected sample.
+ Notation: '<metric name>{<label name>=<label value>, ...}'
+ values(Any): Value of the sample.
+ """
+ expr_sample = ExprSample(labels=sample, value=values)
+ self.promql_expr_test.exp_samples.append(expr_sample)
+
+ def set_variable(self, variable: str, value: str):
+ """
+ If a query makes use of grafonnet variables, for example
+ '$osd_hosts', you should change this to a real value. Example:
+
+
+ > self.set_expression('bonding_slaves{master="$osd_hosts"} > 0')
+ > self.set_variable('osd_hosts', '127.0.0.1')
+ > print(self.query)
+ > bonding_slaves{master="127.0.0.1"} > 0
+
+ Args:
+ variable(str): Variable name
+ value(str): Value to replace variable with
+
+ """
+ self.variables[variable] = value
+
+ def run_promtool(self):
+ """
+ Run promtool to test the query after setting up the input, output
+ and extra parameters.
+
+ Returns:
+ bool: True if successful, False otherwise.
+ """
+
+ for variable, value in self.variables.items():
+ expr = self.promql_expr_test.expr
+ new_expr = re.sub(r'\${0}'.format(variable), str(value), expr)
+ self.set_expression(new_expr)
+
+ test_as_dict = asdict(self.test_file)
+ yaml.dump(test_as_dict, self.test_output_file)
+
+ args = f'promtool test rules {self.test_output_file.name}'.split()
+ try:
+ subprocess.run(args, check=True)
+ return True
+ except subprocess.CalledProcessError as process_error:
+ print(yaml.dump(test_as_dict))
+ print(process_error.stderr)
+ return False
diff --git a/monitoring/grafana/dashboards/tests/features/__init__.py b/monitoring/grafana/dashboards/tests/features/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/__init__.py
diff --git a/monitoring/grafana/dashboards/tests/features/ceph-cluster.feature b/monitoring/grafana/dashboards/tests/features/ceph-cluster.feature
new file mode 100644
index 00000000000..f8b580566f4
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/ceph-cluster.feature
@@ -0,0 +1,10 @@
+Feature: Ceph Cluster Dashboard
+
+Scenario: "Test total PG States"
+ Given the following series:
+ | metrics | values |
+ | ceph_pg_total{foo="var"} | 10 100 |
+ | ceph_pg_total{foo="bar"} | 20 200 |
+ Then Grafana panel `PG States` with legend `Total` shows:
+ | metrics | values |
+ | {} | 300 | \ No newline at end of file
diff --git a/monitoring/grafana/dashboards/tests/features/environment.py b/monitoring/grafana/dashboards/tests/features/environment.py
new file mode 100644
index 00000000000..8509b9d97e8
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/environment.py
@@ -0,0 +1,135 @@
+# type: ignore[no-redef]
+# pylint: disable=E0611,W0613,E0102
+import copy
+
+from behave import given, then, when
+from prettytable import PrettyTable
+
+from tests import PromqlTest
+from tests.util import get_dashboards_data, resolve_time_and_unit
+
+
+class GlobalContext:
+ def __init__(self):
+ self.tested_queries_count = 0
+ self.promql_expr_test = None
+ self.data = get_dashboards_data()
+ self.query_map = self.data['queries']
+
+ def reset_promql_test(self):
+ self.promql_expr_test = PromqlTest()
+ self.promql_expr_test.variables = copy.copy(self.data['variables'])
+
+ def print_query_stats(self):
+ total = len(self.query_map)
+ table = PrettyTable()
+ table.field_names = ['Name', 'Queries', 'Tested', 'Cover']
+
+ def percent(tested, total):
+ return str(round((tested / total) * 100, 2)) + '%'
+
+ def file_name(path):
+ return path.split('/')[-1]
+
+ total = 0
+ tested = 0
+ for path, stat in self.data['stats'].items():
+ assert stat['total']
+ table.add_row([file_name(path), stat['total'], stat['tested'],
+ percent(stat['tested'], stat['total'])])
+ total += stat['total']
+ tested += stat['tested']
+
+ assert total
+ table.add_row(['Total', total, tested, percent(tested, total)])
+ print(table)
+
+
+global_context = GlobalContext()
+
+# Behave function overloading
+# ===========================
+
+
+def before_scenario(context, scenario):
+ global_context.reset_promql_test()
+
+
+def after_scenario(context, scenario):
+ assert global_context.promql_expr_test.run_promtool()
+
+
+def after_all(context):
+ global_context.print_query_stats()
+
+
+@given("the following series")
+def step_impl(context):
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_series(metric, value)
+
+
+@when('evaluation interval is `{interval}`')
+def step_impl(context, interval):
+ interval_without_unit, unit = resolve_time_and_unit(interval)
+ if interval_without_unit is None:
+ raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_evaluation_interval(interval_without_unit, unit)
+
+
+@when('interval is `{interval}`')
+def step_impl(context, interval):
+ interval_without_unit, unit = resolve_time_and_unit(interval)
+ if interval_without_unit is None:
+ raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_interval(interval_without_unit, unit)
+
+
+@when('evaluation time is `{eval_time}`')
+def step_impl(context, eval_time):
+ eval_time_without_unit, unit = resolve_time_and_unit(eval_time)
+ if eval_time_without_unit is None:
+ raise ValueError(f'Invalid evalution time: {eval_time}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_eval_time(eval_time_without_unit, unit)
+
+
+@when('variable `{variable}` is `{value}`')
+def step_impl(context, variable, value):
+ global_context.promql_expr_test.set_variable(variable, value)
+
+
+@then('Grafana panel `{panel_name}` with legend `{legend}` shows')
+def step_impl(context, panel_name, legend):
+ """
+ This step can have an empty legend. As 'behave' doesn't provide a way
+ to say it's empty we use EMPTY to mark as empty.
+ """
+ if legend == "EMPTY":
+ legend = ''
+ query_id = panel_name + '-' + legend
+ if query_id not in global_context.query_map:
+ raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
+ 'couldn\'t be found'))
+
+ expr = global_context.query_map[query_id]['query']
+ global_context.promql_expr_test.set_expression(expr)
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_exp_samples(metric, float(value))
+ path = global_context.query_map[query_id]['path']
+ global_context.data['stats'][path]['tested'] += 1
+
+
+@then('query `{query}` produces')
+def step_impl(context, query):
+ global_context.promql_expr_test.set_expression(query)
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_exp_samples(metric, float(value))
diff --git a/monitoring/grafana/dashboards/tests/features/hosts_overview.feature b/monitoring/grafana/dashboards/tests/features/hosts_overview.feature
new file mode 100644
index 00000000000..e3ddf57bb2b
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/hosts_overview.feature
@@ -0,0 +1,28 @@
+Feature: Hosts Overview Dashboard
+
+Scenario: "Test network load succeeds"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ When variable `osd_hosts` is `127.0.0.1`
+ Then Grafana panel `Network Load` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 6 |
+
+Scenario: "Test network load with bonding succeeds"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When variable `osd_hosts` is `127.0.0.1`
+ Then Grafana panel `Network Load` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 6 |
diff --git a/monitoring/grafana/dashboards/tests/features/self.feature b/monitoring/grafana/dashboards/tests/features/self.feature
new file mode 100644
index 00000000000..2b44ce0dcec
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/self.feature
@@ -0,0 +1,68 @@
+Feature: Test tester
+
+Scenario: "Simple query works"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 100 |
+
+Scenario: "Query with evaluation time"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `0m`
+ Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with evaluation time and variable value"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `0m`
+ And variable `osd_hosts` is `127.0.0.1`
+ Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with interval time"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 300 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 300 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `2h`
+ And evaluation interval is `1h`
+ And interval is `1h`
+ And variable `osd_hosts` is `127.0.0.1`
+ Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 200 | \ No newline at end of file
diff --git a/monitoring/grafana/dashboards/tests/features/steps/__init__.py b/monitoring/grafana/dashboards/tests/features/steps/__init__.py
new file mode 100644
index 00000000000..0b90f46f2c3
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/features/steps/__init__.py
@@ -0,0 +1 @@
+# This file and steps files is needed even if its empty because of 'behave' :(
diff --git a/monitoring/grafana/dashboards/tests/requirements.txt b/monitoring/grafana/dashboards/tests/requirements.txt
new file mode 100644
index 00000000000..8ad130e5bbe
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/requirements.txt
@@ -0,0 +1,12 @@
+attrs==21.2.0
+behave==1.2.6
+py==1.10.0
+pyparsing==2.4.7
+PyYAML==6.0
+types-PyYAML==6.0.0
+typing-extensions==3.10.0.2
+termcolor==1.1.0
+types-termcolor==1.1.2
+dataclasses==0.6
+types-dataclasses==0.6.1
+prettytable==2.4.0
diff --git a/monitoring/grafana/dashboards/tests/util.py b/monitoring/grafana/dashboards/tests/util.py
new file mode 100644
index 00000000000..4f09e9edd3b
--- /dev/null
+++ b/monitoring/grafana/dashboards/tests/util.py
@@ -0,0 +1,68 @@
+import json
+from pathlib import Path
+from typing import Any, Dict, Tuple, Union
+
+from termcolor import cprint
+
+UNITS = ['ms', 's', 'm', 'h', 'd', 'w', 'y']
+
+
+def resolve_time_and_unit(time: str) -> Union[Tuple[int, str], Tuple[None, None]]:
+ """
+ Divide time with its unit and return a tuple like (10, 'm')
+ Return None if its and invalid prometheus time
+ Valid units are inside UNITS.
+ """
+ if time[-1] in UNITS:
+ return int(time[:-1]), time[-1]
+ if time[-2:] in UNITS:
+ return int(time[:-2]), time[-2:]
+ return None, None
+
+
+def get_dashboards_data() -> Dict[str, Any]:
+ data: Dict[str, Any] = {'queries': {}, 'variables': {}, 'stats': {}}
+ for file in Path(__file__).parent.parent.glob('*.json'):
+ with open(file, 'r') as f:
+ dashboard_data = json.load(f)
+ data['stats'][str(file)] = {'total': 0, 'tested': 0}
+ add_dashboard_queries(data, dashboard_data, str(file))
+ add_dashboard_variables(data, dashboard_data)
+ return data
+
+
+def add_dashboard_queries(data: Dict[str, Any], dashboard_data: Dict[str, Any], path: str) -> None:
+ """
+ Grafana panels can have more than one target/query, in order to identify each
+ query in the panel we append the "legendFormat" of the target to the panel name.
+ format: panel_name-legendFormat
+ """
+ if 'panels' not in dashboard_data:
+ return
+ for panel in dashboard_data['panels']:
+ if (
+ 'title' in panel
+ and 'targets' in panel
+ and len(panel['targets']) > 0
+ and 'expr' in panel['targets'][0]
+ ):
+ for target in panel['targets']:
+ title = panel['title']
+ legend_format = target['legendFormat'] if 'legendFormat' in target else ""
+ query_id = title + '-' + legend_format
+ if query_id in data['queries']:
+ # NOTE: If two or more panels have the same name and legend it
+ # might suggest a refactoring is needed or add something else
+ # to identify each query.
+ cprint((f'WARNING: Query in panel "{title}" with legend "{legend_format}"'
+ ' already exists'), 'yellow')
+ data['queries'][query_id] = {'query': target['expr'], 'path': path}
+ data['stats'][path]['total'] += 1
+
+
+def add_dashboard_variables(data: Dict[str, Any], dashboard_data: Dict[str, Any]) -> None:
+ if 'templating' not in dashboard_data or 'list' not in dashboard_data['templating']:
+ return
+ for variable in dashboard_data['templating']['list']:
+ if 'name' in variable:
+ data['variables'][variable['name']] = 'UNSET VARIABLE'
diff --git a/monitoring/grafana/dashboards/tox.ini b/monitoring/grafana/dashboards/tox.ini
index 10aeb9f38aa..382952c5b1b 100644
--- a/monitoring/grafana/dashboards/tox.ini
+++ b/monitoring/grafana/dashboards/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = grafonnet-{check,fix}
+envlist = grafonnet-{check,fix},lint,promql-query-test
skipsdist = true
[grafonnet]
@@ -20,3 +20,25 @@ passenv = GRAFONNET_PATH
commands =
check: bash test-jsonnet.sh
fix: jsonnet -m . jsonnet/grafana_dashboards.jsonnet
+
+
+[testenv:lint]
+description =
+ Run linters
+deps =
+ -rrequirements-lint.txt
+setenv =
+commands =
+ pylint --rcfile=.pylintrc tests
+ mypy tests
+ isort tests
+
+[testenv:promql-query-test]
+description =
+ Run promtool unit testing on grafana queries.
+deps =
+ -rtests/requirements.txt
+depends = grafonnet-check
+setenv =
+commands =
+ behave tests/features