diff options
author | Adam King <47704447+adk3798@users.noreply.github.com> | 2025-01-13 19:07:50 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-13 19:07:50 +0100 |
commit | 868c90b548360fe2ea9167cb324c800322db3a1f (patch) | |
tree | f2941fccd548d7ff2f9eaee9e54e0b9be850ed3e | |
parent | Merge pull request #60828 from rkachach/fix_prometheus_targets_discovery_mtls (diff) | |
parent | mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager (diff) | |
download | ceph-868c90b548360fe2ea9167cb324c800322db3a1f.tar.xz ceph-868c90b548360fe2ea9167cb324c800322db3a1f.zip |
Merge pull request #60888 from ShwetaBhosale1/fix_issue_69070_only_bind_ports_on_network_parameter_spec_for_alertmanager
mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager
Reviewed-by: Adam King <adking@redhat.com>
-rw-r--r-- | src/cephadm/cephadmlib/daemons/monitoring.py | 28 | ||||
-rw-r--r-- | src/cephadm/cephadmlib/net_utils.py | 14 | ||||
-rw-r--r-- | src/cephadm/tests/test_deploy.py | 2 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/schedule.py | 2 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/services/monitoring.py | 16 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/tests/test_services.py | 28 | ||||
-rw-r--r-- | src/python-common/ceph/deployment/service_spec.py | 4 |
7 files changed, 72 insertions, 22 deletions
diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index 9a9402632b0..4ba00daaefb 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -16,7 +16,13 @@ from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container from ..exceptions import Error -from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6 +from ..net_utils import ( + get_fqdn, + get_hostname, + get_ip_addresses, + wrap_ipv6, + EndPoint, +) @register_daemon_form @@ -89,11 +95,6 @@ class Monitoring(ContainerDaemonForm): 'image': DefaultImages.ALERTMANAGER.image_ref, 'cpus': '2', 'memory': '2GB', - 'args': [ - '--cluster.listen-address=:{}'.format( - port_map['alertmanager'][1] - ), - ], 'config-json-files': [ 'alertmanager.yml', ], @@ -248,11 +249,14 @@ class Monitoring(ContainerDaemonForm): ip = meta['ip'] if 'ports' in meta and meta['ports']: port = meta['ports'][0] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) + config = fetch_configs(ctx) + if daemon_type in ['prometheus', 'alertmanager']: ip_to_bind_to = config.get('ip_to_bind_to', '') if ip_to_bind_to: ip = ip_to_bind_to + web_listen_addr = str(EndPoint(ip, port)) + r += [f'--web.listen-address={web_listen_addr}'] + if daemon_type == 'prometheus': retention_time = config.get('retention_time', '15d') retention_size = config.get( 'retention_size', '0' @@ -276,9 +280,11 @@ class Monitoring(ContainerDaemonForm): r += ['--web.route-prefix=/prometheus/'] else: r += [f'--web.external-url={scheme}://{host}:{port}'] - r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': - config = fetch_configs(ctx) + clus_listen_addr = str( + EndPoint(ip, self.port_map[daemon_type][1]) + ) + r += [f'--cluster.listen-address={clus_listen_addr}'] use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: @@ -294,13 +300,11 @@ class Monitoring(ContainerDaemonForm): if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: pass if daemon_type == 'node-exporter': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py index 9a7f138b1c6..bfa61d933ef 100644 --- a/src/cephadm/cephadmlib/net_utils.py +++ b/src/cephadm/cephadmlib/net_utils.py @@ -24,12 +24,22 @@ class EndPoint: def __init__(self, ip: str, port: int) -> None: self.ip = ip self.port = port + self.is_ipv4 = True + try: + if ip and ipaddress.ip_network(ip).version == 6: + self.is_ipv4 = False + except Exception: + logger.exception('Failed to check ip address version') def __str__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def __repr__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def attempt_bind(ctx, s, address, port): diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index c5094db335f..1736639ed55 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch): runfile_lines = f.read().splitlines() assert 'podman' in runfile_lines[-1] assert runfile_lines[-1].endswith( - 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095' + 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095' ) assert '--user 8765' in runfile_lines[-1] assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1] diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 98d2fe99897..04d3712c50a 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -385,6 +385,8 @@ class HostAssignment(object): def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: for subnet in subnets: + # to normalize subnet + subnet = str(ipaddress.ip_network(subnet)) ips: List[str] = [] # following is to allow loopback interfaces for both ipv4 and ipv6. Since we # only have the subnet (and no IP) we assume default loopback IP address. diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 1b9cf618570..9c5b5a112f3 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -3,6 +3,7 @@ import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast +import ipaddress from mgr_module import HandleCommandResult @@ -57,6 +58,8 @@ class GrafanaService(CephadmService): if ip_to_bind_to: daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + if ipaddress.ip_network(grafana_ip).version == 6: + grafana_ip = f"[{grafana_ip}]" domain = self.mgr.get_fqdn(daemon_spec.host) mgmt_gw_ips = [] @@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService): addr = self.mgr.get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + ip_to_bind_to = '' + if spec.only_bind_port_on_networks and spec.networks: + assert daemon_spec.host is not None + ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or '' + if ip_to_bind_to: + daemon_spec.port_ips = {str(port): ip_to_bind_to} + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() @@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService): }, 'peers': peers, 'web_config': '/etc/alertmanager/web.yml', - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) else: return { @@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService): "alertmanager.yml": yml }, "peers": peers, - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index e12185e9999..d872219df80 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -581,7 +581,14 @@ class TestMonitoring: mock_getfqdn.return_value = purl.hostname with with_host(cephadm_module, "test"): - with with_service(cephadm_module, AlertManagerSpec()): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['1.2.3.0/24'], + only_bind_port_on_networks=True)): y = dedent(self._get_config(expected_yaml_url)).lstrip() _run_cephadm.assert_called_with( 'test', @@ -595,11 +602,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "1.2.3.1"}, }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': '1.2.3.1', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -612,6 +620,7 @@ class TestMonitoring: }, "peers": [], "use_url_prefix": False, + "ip_to_bind_to": "1.2.3.1", } }), error_ok=True, @@ -634,8 +643,16 @@ class TestMonitoring: cephadm_module.secure_monitoring_stack = True cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + + cephadm_module.cache.update_host_networks('test', { + 'fd12:3456:789a::/64': { + 'if0': ['fd12:3456:789a::10'] + }, + }) with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \ - with_service(cephadm_module, AlertManagerSpec()): + with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['fd12:3456:789a::/64'], + only_bind_port_on_networks=True)): y = dedent(""" # This file is generated by cephadm. @@ -688,11 +705,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "fd12:3456:789a::10"} }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': 'fd12:3456:789a::10', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -710,6 +728,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": True, + "ip_to_bind_to": "fd12:3456:789a::10", } }), error_ok=True, @@ -805,6 +824,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": False, + "ip_to_bind_to": "", } }), error_ok=True, diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index f8a0e41baf2..1ac9fa49e32 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -2331,6 +2331,7 @@ class AlertManagerSpec(MonitoringSpec): user_data: Optional[Dict[str, Any]] = None, config: Optional[Dict[str, str]] = None, networks: Optional[List[str]] = None, + only_bind_port_on_networks: bool = False, port: Optional[int] = None, secure: bool = False, extra_container_args: Optional[GeneralArgList] = None, @@ -2361,6 +2362,7 @@ class AlertManagerSpec(MonitoringSpec): # <webhook_configs> configuration. self.user_data = user_data or {} self.secure = secure + self.only_bind_port_on_networks = only_bind_port_on_networks def get_port_start(self) -> List[int]: return [self.get_port(), 9094] @@ -2407,7 +2409,7 @@ class GrafanaSpec(MonitoringSpec): self.protocol = protocol # whether ports daemons for this service bind to should - # bind to only hte networks listed in networks param, or + # bind to only the networks listed in networks param, or # to all networks. Defaults to false which is saying to bind # on all networks. self.only_bind_port_on_networks = only_bind_port_on_networks |