summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>2024-03-05 05:27:42 +0100
committerAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>2024-09-11 07:24:16 +0200
commit5f31be9df255b851251bc1affaf049f88cdebd68 (patch)
treebe0ffa6cc92f56a72c4b2b5f71d42bfbe6961438
parentMerge pull request #59485 from mcv21/cephadm_image_warn (diff)
downloadceph-5f31be9df255b851251bc1affaf049f88cdebd68.tar.xz
ceph-5f31be9df255b851251bc1affaf049f88cdebd68.zip
mgr/dashboard: add ssl to prometheus federation
Fixes: https://tracker.ceph.com/issues/65057 Signed-off-by: Aashish Sharma <aasharma@redhat.com>
-rw-r--r--src/pybind/mgr/cephadm/module.py13
-rw-r--r--src/pybind/mgr/cephadm/services/monitoring.py32
-rw-r--r--src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j260
-rw-r--r--src/pybind/mgr/dashboard/controllers/multi_cluster.py50
-rw-r--r--src/pybind/mgr/orchestrator/_interface.py4
-rw-r--r--src/pybind/mgr/orchestrator/module.py6
6 files changed, 130 insertions, 35 deletions
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index 0bca599961e..021ec23f135 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -3184,6 +3184,16 @@ Then run the following:
self.set_store(PrometheusService.USER_CFG_KEY, user)
self.set_store(PrometheusService.PASS_CFG_KEY, password)
return 'prometheus credentials updated correctly'
+
+ @handle_orch_error
+ def set_prometheus_cert(self, cert: str) -> str:
+ self.set_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY, cert)
+ return 'prometheus cert stored correctly'
+
+ @handle_orch_error
+ def get_prometheus_cert(self) -> str:
+ prometheus_cert = self.get_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY)
+ return prometheus_cert
@handle_orch_error
def set_custom_prometheus_alerts(self, alerts_file: str) -> str:
@@ -3195,6 +3205,9 @@ Then run the following:
@handle_orch_error
def set_prometheus_target(self, url: str) -> str:
+ valid_url_pattern = r"^(?!http:\/\/)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5})$"
+ if re.match(valid_url_pattern, url) is None:
+ return f"Invalid URL '{url}'. It should be in the format host_ip:port"
prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
if url not in prometheus_spec.targets:
prometheus_spec.targets.append(url)
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py
index 5d5d26ecad1..7127406dd59 100644
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -1,5 +1,6 @@
import errno
import logging
+import json
import os
import socket
from typing import List, Any, Tuple, Dict, Optional, cast
@@ -435,6 +436,7 @@ class PrometheusService(CephadmService):
DEFAULT_MGR_PROMETHEUS_PORT = 9283
USER_CFG_KEY = 'prometheus/web_user'
PASS_CFG_KEY = 'prometheus/web_password'
+ PROMETHEUS_CERT_CFG_KEY = 'prometheus/cert'
def config(self, spec: ServiceSpec) -> None:
# make sure module is enabled
@@ -509,6 +511,18 @@ class PrometheusService(CephadmService):
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
FSID = self.mgr._cluster_fsid
+ clusters_credentials = {}
+ multi_cluster_config_raw = str(self.mgr.get_module_option_ex('dashboard', 'MULTICLUSTER_CONFIG'))
+ multi_cluster_config_str = multi_cluster_config_raw.replace("'", '"')
+ valid_multi_cluster_config_str = multi_cluster_config_str.replace('True', '"True"').replace('False', '"False"')
+ multi_cluster_config = json.loads(valid_multi_cluster_config_str)
+ if multi_cluster_config:
+ for url in targets:
+ credentials = self.find_prometheus_credentials(multi_cluster_config, url)
+ if credentials:
+ clusters_credentials[url] = credentials
+ clusters_credentials[url]['cert_file_name'] = ''
+
# generate the prometheus configuration
context = {
'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/',
@@ -526,7 +540,8 @@ class PrometheusService(CephadmService):
'external_prometheus_targets': targets,
'cluster_fsid': FSID,
'nfs_sd_url': nfs_sd_url,
- 'smb_sd_url': smb_sd_url
+ 'smb_sd_url': smb_sd_url,
+ 'clusters_credentials': clusters_credentials
}
ip_to_bind_to = ''
@@ -544,6 +559,11 @@ class PrometheusService(CephadmService):
}
if security_enabled:
+ r2: Dict[str, Any] = {'files': {}}
+ for url, credentials in clusters_credentials.items():
+ r2['files'][f'prometheus_{url}_cert.crt'] = credentials['certificate']
+ credentials['cert_file_name'] = f'prometheus_{url}_cert.crt'
+ context['clusters_credentials'] = clusters_credentials
# Following key/cert are needed for:
# 1- run the prometheus server (web.yml config)
# 2- use mTLS to scrape node-exporter (prometheus acts as client)
@@ -674,6 +694,16 @@ class PrometheusService(CephadmService):
return HandleCommandResult(-errno.EBUSY, '', warn_message)
return HandleCommandResult(0, warn_message, '')
+ def find_prometheus_credentials(self, multicluster_config, url):
+ for cluster_id, clusters in multicluster_config['config'].items():
+ for cluster in clusters:
+ prometheus_url = cluster.get('prometheus_url')
+ if prometheus_url:
+ valid_url = prometheus_url.replace("https://", "").replace("http://", "")
+ if valid_url == url:
+ return cluster.get('prometheus_access_info')
+ return None
+
class NodeExporterService(CephadmService):
TYPE = 'node-exporter'
diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
index e7694867755..3170e4ea2e9 100644
--- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
+++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
@@ -2,10 +2,8 @@
global:
scrape_interval: 10s
evaluation_interval: 10s
-{% if not security_enabled %}
external_labels:
cluster: {{ cluster_fsid }}
-{% endif %}
rule_files:
- /etc/prometheus/alerting/*
@@ -39,15 +37,18 @@ alerting:
scrape_configs:
- job_name: 'ceph'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
+ - source_labels: [instance]
+ target_label: instance
+ replacement: 'ceph_cluster'
{% if security_enabled %}
scheme: https
tls_config:
ca_file: root_cert.pem
honor_labels: true
- relabel_configs:
- - source_labels: [instance]
- target_label: instance
- replacement: 'ceph_cluster'
http_sd_configs:
- url: {{ mgr_prometheus_sd_url }}
basic_auth:
@@ -57,19 +58,16 @@ scrape_configs:
ca_file: root_cert.pem
{% else %}
honor_labels: true
- relabel_configs:
- - source_labels: [__address__]
- target_label: cluster
- replacement: {{ cluster_fsid }}
- - source_labels: [instance]
- target_label: instance
- replacement: 'ceph_cluster'
http_sd_configs:
- url: {{ mgr_prometheus_sd_url }}
{% endif %}
{% if node_exporter_sd_url %}
- job_name: 'node'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
{% if security_enabled %}
scheme: https
tls_config:
@@ -86,15 +84,15 @@ scrape_configs:
{% else %}
http_sd_configs:
- url: {{ node_exporter_sd_url }}
- relabel_configs:
- - source_labels: [__address__]
- target_label: cluster
- replacement: {{ cluster_fsid }}
{% endif %}
{% endif %}
{% if haproxy_sd_url %}
- job_name: 'haproxy'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
{% if security_enabled %}
scheme: https
tls_config:
@@ -109,15 +107,15 @@ scrape_configs:
{% else %}
http_sd_configs:
- url: {{ haproxy_sd_url }}
- relabel_configs:
- - source_labels: [__address__]
- target_label: cluster
- replacement: {{ cluster_fsid }}
{% endif %}
{% endif %}
{% if ceph_exporter_sd_url %}
- job_name: 'ceph-exporter'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
{% if security_enabled %}
honor_labels: true
scheme: https
@@ -132,10 +130,6 @@ scrape_configs:
ca_file: root_cert.pem
{% else %}
honor_labels: true
- relabel_configs:
- - source_labels: [__address__]
- target_label: cluster
- replacement: {{ cluster_fsid }}
http_sd_configs:
- url: {{ ceph_exporter_sd_url }}
{% endif %}
@@ -201,17 +195,27 @@ scrape_configs:
{% endif %}
{% endif %}
-{% if not security_enabled %}
+{% for url, details in clusters_credentials.items() %}
- job_name: 'federate'
scrape_interval: 15s
honor_labels: true
metrics_path: '/federate'
+{% if secure_monitoring_stack %}
+ scheme: https
+ tls_config:
+ ca_file: {{ details['cert_file_name'] }}
+ basic_auth:
+ username: {{ details['user'] }}
+ password: {{ details['password'] }}
+{% endif %}
params:
'match[]':
- '{job="ceph"}'
- '{job="node"}'
- '{job="haproxy"}'
- '{job="ceph-exporter"}'
+ - '{job="nvmeof"}'
static_configs:
- - targets: {{ external_prometheus_targets }}
-{% endif %}
+ - targets: ['{{ url }}']
+{% endfor %}
+
diff --git a/src/pybind/mgr/dashboard/controllers/multi_cluster.py b/src/pybind/mgr/dashboard/controllers/multi_cluster.py
index f7e6d516b49..1551f0969ff 100644
--- a/src/pybind/mgr/dashboard/controllers/multi_cluster.py
+++ b/src/pybind/mgr/dashboard/controllers/multi_cluster.py
@@ -107,14 +107,18 @@ class MultiCluster(RESTController):
prometheus_url = self._proxy('GET', url, 'api/multi-cluster/get_prometheus_api_url',
token=cluster_token, verify=ssl_verify,
cert=ssl_certificate)
+
+ prometheus_access_info = self._proxy('GET', url,
+ 'ui-api/multi-cluster/get_prometheus_access_info', # noqa E501 #pylint: disable=line-too-long
+ token=cluster_token, verify=ssl_verify,
+ cert=ssl_certificate)
_set_prometheus_targets(prometheus_url)
self.set_multi_cluster_config(fsid, username, url, cluster_alias,
- cluster_token, prometheus_url,
- ssl_verify, ssl_certificate)
+ cluster_token, prometheus_url, ssl_verify,
+ ssl_certificate, prometheus_access_info)
return True
-
return False
def get_cors_endpoints_string(self, hub_url):
@@ -188,7 +192,8 @@ class MultiCluster(RESTController):
return cluster_token
def set_multi_cluster_config(self, fsid, username, url, cluster_alias, token,
- prometheus_url=None, ssl_verify=False, ssl_certificate=None):
+ prometheus_url=None, ssl_verify=False, ssl_certificate=None,
+ prometheus_access_info=None):
multi_cluster_config = self.load_multi_cluster_config()
if fsid in multi_cluster_config['config']:
existing_entries = multi_cluster_config['config'][fsid]
@@ -201,7 +206,8 @@ class MultiCluster(RESTController):
"token": token,
"prometheus_url": prometheus_url if prometheus_url else '',
"ssl_verify": ssl_verify,
- "ssl_certificate": ssl_certificate if ssl_certificate else ''
+ "ssl_certificate": ssl_certificate if ssl_certificate else '',
+ "prometheus_access_info": prometheus_access_info
})
else:
multi_cluster_config['current_user'] = username
@@ -213,7 +219,8 @@ class MultiCluster(RESTController):
"token": token,
"prometheus_url": prometheus_url if prometheus_url else '',
"ssl_verify": ssl_verify,
- "ssl_certificate": ssl_certificate if ssl_certificate else ''
+ "ssl_certificate": ssl_certificate if ssl_certificate else '',
+ "prometheus_access_info": prometheus_access_info
}]
Settings.MULTICLUSTER_CONFIG = multi_cluster_config
@@ -401,6 +408,37 @@ class MultiClusterUi(RESTController):
@UpdatePermission
def set_cors_endpoint(self, url: str):
configure_cors(url)
+
+ @Endpoint('GET')
+ @ReadPermission
+ def get_prometheus_access_info(self):
+ user = ''
+ password = ''
+ prometheus_cert = ''
+ orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator')
+ if orch_backend == 'cephadm':
+ cmd = {
+ 'prefix': 'orch prometheus get-credentials',
+ }
+ ret, out, _ = mgr.mon_command(cmd)
+ if ret == 0 and out is not None:
+ access_info = json.loads(out)
+ user = access_info['user']
+ password = access_info['password']
+
+ cert_cmd = {
+ 'prefix': 'orch prometheus get-prometheus-cert',
+ }
+ ret, out, _ = mgr.mon_command(cert_cmd)
+ if ret == 0 and out is not None:
+ cert = json.loads(out)
+ prometheus_cert = cert
+
+ return {
+ 'user': user,
+ 'password': password,
+ 'certificate': prometheus_cert
+ }
def _set_prometheus_targets(prometheus_url: str):
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index c33f38cfdd4..7839e1e8386 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -791,6 +791,10 @@ class Orchestrator(object):
"""set alertmanager access information"""
raise NotImplementedError()
+ def get_prometheus_cert(self, url: str) -> OrchResult[str]:
+ """set prometheus target for multi-cluster"""
+ raise NotImplementedError()
+
def set_prometheus_access_info(self, user: str, password: str) -> OrchResult[str]:
"""set prometheus access information"""
raise NotImplementedError()
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index d0f3286177c..d3ef1e59817 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -1232,6 +1232,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
completion = self.set_prometheus_target(url)
result = raise_if_exception(completion)
return HandleCommandResult(stdout=json.dumps(result))
+
+ @_cli_write_command('orch prometheus get-prometheus-cert')
+ def _get_prometheus_cert(self) -> HandleCommandResult:
+ completion = self.get_prometheus_cert()
+ result = raise_if_exception(completion)
+ return HandleCommandResult(stdout=json.dumps(result))
@_cli_write_command('orch prometheus remove-target')
def _remove_prometheus_target(self, url: str) -> HandleCommandResult: