diff options
author | Aashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com> | 2024-03-05 05:27:42 +0100 |
---|---|---|
committer | Aashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com> | 2024-09-11 07:24:16 +0200 |
commit | 5f31be9df255b851251bc1affaf049f88cdebd68 (patch) | |
tree | be0ffa6cc92f56a72c4b2b5f71d42bfbe6961438 | |
parent | Merge pull request #59485 from mcv21/cephadm_image_warn (diff) | |
download | ceph-5f31be9df255b851251bc1affaf049f88cdebd68.tar.xz ceph-5f31be9df255b851251bc1affaf049f88cdebd68.zip |
mgr/dashboard: add ssl to prometheus federation
Fixes: https://tracker.ceph.com/issues/65057
Signed-off-by: Aashish Sharma <aasharma@redhat.com>
-rw-r--r-- | src/pybind/mgr/cephadm/module.py | 13 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/services/monitoring.py | 32 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 | 60 | ||||
-rw-r--r-- | src/pybind/mgr/dashboard/controllers/multi_cluster.py | 50 | ||||
-rw-r--r-- | src/pybind/mgr/orchestrator/_interface.py | 4 | ||||
-rw-r--r-- | src/pybind/mgr/orchestrator/module.py | 6 |
6 files changed, 130 insertions, 35 deletions
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 0bca599961e..021ec23f135 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3184,6 +3184,16 @@ Then run the following: self.set_store(PrometheusService.USER_CFG_KEY, user) self.set_store(PrometheusService.PASS_CFG_KEY, password) return 'prometheus credentials updated correctly' + + @handle_orch_error + def set_prometheus_cert(self, cert: str) -> str: + self.set_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY, cert) + return 'prometheus cert stored correctly' + + @handle_orch_error + def get_prometheus_cert(self) -> str: + prometheus_cert = self.get_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY) + return prometheus_cert @handle_orch_error def set_custom_prometheus_alerts(self, alerts_file: str) -> str: @@ -3195,6 +3205,9 @@ Then run the following: @handle_orch_error def set_prometheus_target(self, url: str) -> str: + valid_url_pattern = r"^(?!http:\/\/)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5})$" + if re.match(valid_url_pattern, url) is None: + return f"Invalid URL '{url}'. It should be in the format host_ip:port" prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec) if url not in prometheus_spec.targets: prometheus_spec.targets.append(url) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 5d5d26ecad1..7127406dd59 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -1,5 +1,6 @@ import errno import logging +import json import os import socket from typing import List, Any, Tuple, Dict, Optional, cast @@ -435,6 +436,7 @@ class PrometheusService(CephadmService): DEFAULT_MGR_PROMETHEUS_PORT = 9283 USER_CFG_KEY = 'prometheus/web_user' PASS_CFG_KEY = 'prometheus/web_password' + PROMETHEUS_CERT_CFG_KEY = 'prometheus/cert' def config(self, spec: ServiceSpec) -> None: # make sure module is enabled @@ -509,6 +511,18 @@ class PrometheusService(CephadmService): prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() FSID = self.mgr._cluster_fsid + clusters_credentials = {} + multi_cluster_config_raw = str(self.mgr.get_module_option_ex('dashboard', 'MULTICLUSTER_CONFIG')) + multi_cluster_config_str = multi_cluster_config_raw.replace("'", '"') + valid_multi_cluster_config_str = multi_cluster_config_str.replace('True', '"True"').replace('False', '"False"') + multi_cluster_config = json.loads(valid_multi_cluster_config_str) + if multi_cluster_config: + for url in targets: + credentials = self.find_prometheus_credentials(multi_cluster_config, url) + if credentials: + clusters_credentials[url] = credentials + clusters_credentials[url]['cert_file_name'] = '' + # generate the prometheus configuration context = { 'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/', @@ -526,7 +540,8 @@ class PrometheusService(CephadmService): 'external_prometheus_targets': targets, 'cluster_fsid': FSID, 'nfs_sd_url': nfs_sd_url, - 'smb_sd_url': smb_sd_url + 'smb_sd_url': smb_sd_url, + 'clusters_credentials': clusters_credentials } ip_to_bind_to = '' @@ -544,6 +559,11 @@ class PrometheusService(CephadmService): } if security_enabled: + r2: Dict[str, Any] = {'files': {}} + for url, credentials in clusters_credentials.items(): + r2['files'][f'prometheus_{url}_cert.crt'] = credentials['certificate'] + credentials['cert_file_name'] = f'prometheus_{url}_cert.crt' + context['clusters_credentials'] = clusters_credentials # Following key/cert are needed for: # 1- run the prometheus server (web.yml config) # 2- use mTLS to scrape node-exporter (prometheus acts as client) @@ -674,6 +694,16 @@ class PrometheusService(CephadmService): return HandleCommandResult(-errno.EBUSY, '', warn_message) return HandleCommandResult(0, warn_message, '') + def find_prometheus_credentials(self, multicluster_config, url): + for cluster_id, clusters in multicluster_config['config'].items(): + for cluster in clusters: + prometheus_url = cluster.get('prometheus_url') + if prometheus_url: + valid_url = prometheus_url.replace("https://", "").replace("http://", "") + if valid_url == url: + return cluster.get('prometheus_access_info') + return None + class NodeExporterService(CephadmService): TYPE = 'node-exporter' diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index e7694867755..3170e4ea2e9 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -2,10 +2,8 @@ global: scrape_interval: 10s evaluation_interval: 10s -{% if not security_enabled %} external_labels: cluster: {{ cluster_fsid }} -{% endif %} rule_files: - /etc/prometheus/alerting/* @@ -39,15 +37,18 @@ alerting: scrape_configs: - job_name: 'ceph' + relabel_configs: + - source_labels: [__address__] + target_label: cluster + replacement: {{ cluster_fsid }} + - source_labels: [instance] + target_label: instance + replacement: 'ceph_cluster' {% if security_enabled %} scheme: https tls_config: ca_file: root_cert.pem honor_labels: true - relabel_configs: - - source_labels: [instance] - target_label: instance - replacement: 'ceph_cluster' http_sd_configs: - url: {{ mgr_prometheus_sd_url }} basic_auth: @@ -57,19 +58,16 @@ scrape_configs: ca_file: root_cert.pem {% else %} honor_labels: true - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} - - source_labels: [instance] - target_label: instance - replacement: 'ceph_cluster' http_sd_configs: - url: {{ mgr_prometheus_sd_url }} {% endif %} {% if node_exporter_sd_url %} - job_name: 'node' + relabel_configs: + - source_labels: [__address__] + target_label: cluster + replacement: {{ cluster_fsid }} {% if security_enabled %} scheme: https tls_config: @@ -86,15 +84,15 @@ scrape_configs: {% else %} http_sd_configs: - url: {{ node_exporter_sd_url }} - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} {% endif %} {% endif %} {% if haproxy_sd_url %} - job_name: 'haproxy' + relabel_configs: + - source_labels: [__address__] + target_label: cluster + replacement: {{ cluster_fsid }} {% if security_enabled %} scheme: https tls_config: @@ -109,15 +107,15 @@ scrape_configs: {% else %} http_sd_configs: - url: {{ haproxy_sd_url }} - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} {% endif %} {% endif %} {% if ceph_exporter_sd_url %} - job_name: 'ceph-exporter' + relabel_configs: + - source_labels: [__address__] + target_label: cluster + replacement: {{ cluster_fsid }} {% if security_enabled %} honor_labels: true scheme: https @@ -132,10 +130,6 @@ scrape_configs: ca_file: root_cert.pem {% else %} honor_labels: true - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} http_sd_configs: - url: {{ ceph_exporter_sd_url }} {% endif %} @@ -201,17 +195,27 @@ scrape_configs: {% endif %} {% endif %} -{% if not security_enabled %} +{% for url, details in clusters_credentials.items() %} - job_name: 'federate' scrape_interval: 15s honor_labels: true metrics_path: '/federate' +{% if secure_monitoring_stack %} + scheme: https + tls_config: + ca_file: {{ details['cert_file_name'] }} + basic_auth: + username: {{ details['user'] }} + password: {{ details['password'] }} +{% endif %} params: 'match[]': - '{job="ceph"}' - '{job="node"}' - '{job="haproxy"}' - '{job="ceph-exporter"}' + - '{job="nvmeof"}' static_configs: - - targets: {{ external_prometheus_targets }} -{% endif %} + - targets: ['{{ url }}'] +{% endfor %} + diff --git a/src/pybind/mgr/dashboard/controllers/multi_cluster.py b/src/pybind/mgr/dashboard/controllers/multi_cluster.py index f7e6d516b49..1551f0969ff 100644 --- a/src/pybind/mgr/dashboard/controllers/multi_cluster.py +++ b/src/pybind/mgr/dashboard/controllers/multi_cluster.py @@ -107,14 +107,18 @@ class MultiCluster(RESTController): prometheus_url = self._proxy('GET', url, 'api/multi-cluster/get_prometheus_api_url', token=cluster_token, verify=ssl_verify, cert=ssl_certificate) + + prometheus_access_info = self._proxy('GET', url, + 'ui-api/multi-cluster/get_prometheus_access_info', # noqa E501 #pylint: disable=line-too-long + token=cluster_token, verify=ssl_verify, + cert=ssl_certificate) _set_prometheus_targets(prometheus_url) self.set_multi_cluster_config(fsid, username, url, cluster_alias, - cluster_token, prometheus_url, - ssl_verify, ssl_certificate) + cluster_token, prometheus_url, ssl_verify, + ssl_certificate, prometheus_access_info) return True - return False def get_cors_endpoints_string(self, hub_url): @@ -188,7 +192,8 @@ class MultiCluster(RESTController): return cluster_token def set_multi_cluster_config(self, fsid, username, url, cluster_alias, token, - prometheus_url=None, ssl_verify=False, ssl_certificate=None): + prometheus_url=None, ssl_verify=False, ssl_certificate=None, + prometheus_access_info=None): multi_cluster_config = self.load_multi_cluster_config() if fsid in multi_cluster_config['config']: existing_entries = multi_cluster_config['config'][fsid] @@ -201,7 +206,8 @@ class MultiCluster(RESTController): "token": token, "prometheus_url": prometheus_url if prometheus_url else '', "ssl_verify": ssl_verify, - "ssl_certificate": ssl_certificate if ssl_certificate else '' + "ssl_certificate": ssl_certificate if ssl_certificate else '', + "prometheus_access_info": prometheus_access_info }) else: multi_cluster_config['current_user'] = username @@ -213,7 +219,8 @@ class MultiCluster(RESTController): "token": token, "prometheus_url": prometheus_url if prometheus_url else '', "ssl_verify": ssl_verify, - "ssl_certificate": ssl_certificate if ssl_certificate else '' + "ssl_certificate": ssl_certificate if ssl_certificate else '', + "prometheus_access_info": prometheus_access_info }] Settings.MULTICLUSTER_CONFIG = multi_cluster_config @@ -401,6 +408,37 @@ class MultiClusterUi(RESTController): @UpdatePermission def set_cors_endpoint(self, url: str): configure_cors(url) + + @Endpoint('GET') + @ReadPermission + def get_prometheus_access_info(self): + user = '' + password = '' + prometheus_cert = '' + orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator') + if orch_backend == 'cephadm': + cmd = { + 'prefix': 'orch prometheus get-credentials', + } + ret, out, _ = mgr.mon_command(cmd) + if ret == 0 and out is not None: + access_info = json.loads(out) + user = access_info['user'] + password = access_info['password'] + + cert_cmd = { + 'prefix': 'orch prometheus get-prometheus-cert', + } + ret, out, _ = mgr.mon_command(cert_cmd) + if ret == 0 and out is not None: + cert = json.loads(out) + prometheus_cert = cert + + return { + 'user': user, + 'password': password, + 'certificate': prometheus_cert + } def _set_prometheus_targets(prometheus_url: str): diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index c33f38cfdd4..7839e1e8386 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -791,6 +791,10 @@ class Orchestrator(object): """set alertmanager access information""" raise NotImplementedError() + def get_prometheus_cert(self, url: str) -> OrchResult[str]: + """set prometheus target for multi-cluster""" + raise NotImplementedError() + def set_prometheus_access_info(self, user: str, password: str) -> OrchResult[str]: """set prometheus access information""" raise NotImplementedError() diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index d0f3286177c..d3ef1e59817 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -1232,6 +1232,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, completion = self.set_prometheus_target(url) result = raise_if_exception(completion) return HandleCommandResult(stdout=json.dumps(result)) + + @_cli_write_command('orch prometheus get-prometheus-cert') + def _get_prometheus_cert(self) -> HandleCommandResult: + completion = self.get_prometheus_cert() + result = raise_if_exception(completion) + return HandleCommandResult(stdout=json.dumps(result)) @_cli_write_command('orch prometheus remove-target') def _remove_prometheus_target(self, url: str) -> HandleCommandResult: |