summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam King <47704447+adk3798@users.noreply.github.com>2025-01-13 19:07:50 +0100
committerGitHub <noreply@github.com>2025-01-13 19:07:50 +0100
commit868c90b548360fe2ea9167cb324c800322db3a1f (patch)
treef2941fccd548d7ff2f9eaee9e54e0b9be850ed3e
parentMerge pull request #60828 from rkachach/fix_prometheus_targets_discovery_mtls (diff)
parentmgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager (diff)
downloadceph-868c90b548360fe2ea9167cb324c800322db3a1f.tar.xz
ceph-868c90b548360fe2ea9167cb324c800322db3a1f.zip
Merge pull request #60888 from ShwetaBhosale1/fix_issue_69070_only_bind_ports_on_network_parameter_spec_for_alertmanager
mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager Reviewed-by: Adam King <adking@redhat.com>
-rw-r--r--src/cephadm/cephadmlib/daemons/monitoring.py28
-rw-r--r--src/cephadm/cephadmlib/net_utils.py14
-rw-r--r--src/cephadm/tests/test_deploy.py2
-rw-r--r--src/pybind/mgr/cephadm/schedule.py2
-rw-r--r--src/pybind/mgr/cephadm/services/monitoring.py16
-rw-r--r--src/pybind/mgr/cephadm/tests/test_services.py28
-rw-r--r--src/python-common/ceph/deployment/service_spec.py4
7 files changed, 72 insertions, 22 deletions
diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py
index 9a9402632b0..4ba00daaefb 100644
--- a/src/cephadm/cephadmlib/daemons/monitoring.py
+++ b/src/cephadm/cephadmlib/daemons/monitoring.py
@@ -16,7 +16,13 @@ from ..daemon_form import register as register_daemon_form
from ..daemon_identity import DaemonIdentity
from ..deployment_utils import to_deployment_container
from ..exceptions import Error
-from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6
+from ..net_utils import (
+ get_fqdn,
+ get_hostname,
+ get_ip_addresses,
+ wrap_ipv6,
+ EndPoint,
+)
@register_daemon_form
@@ -89,11 +95,6 @@ class Monitoring(ContainerDaemonForm):
'image': DefaultImages.ALERTMANAGER.image_ref,
'cpus': '2',
'memory': '2GB',
- 'args': [
- '--cluster.listen-address=:{}'.format(
- port_map['alertmanager'][1]
- ),
- ],
'config-json-files': [
'alertmanager.yml',
],
@@ -248,11 +249,14 @@ class Monitoring(ContainerDaemonForm):
ip = meta['ip']
if 'ports' in meta and meta['ports']:
port = meta['ports'][0]
- if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
+ config = fetch_configs(ctx)
+ if daemon_type in ['prometheus', 'alertmanager']:
ip_to_bind_to = config.get('ip_to_bind_to', '')
if ip_to_bind_to:
ip = ip_to_bind_to
+ web_listen_addr = str(EndPoint(ip, port))
+ r += [f'--web.listen-address={web_listen_addr}']
+ if daemon_type == 'prometheus':
retention_time = config.get('retention_time', '15d')
retention_size = config.get(
'retention_size', '0'
@@ -276,9 +280,11 @@ class Monitoring(ContainerDaemonForm):
r += ['--web.route-prefix=/prometheus/']
else:
r += [f'--web.external-url={scheme}://{host}:{port}']
- r += [f'--web.listen-address={ip}:{port}']
if daemon_type == 'alertmanager':
- config = fetch_configs(ctx)
+ clus_listen_addr = str(
+ EndPoint(ip, self.port_map[daemon_type][1])
+ )
+ r += [f'--cluster.listen-address={clus_listen_addr}']
use_url_prefix = config.get('use_url_prefix', False)
peers = config.get('peers', list()) # type: ignore
for peer in peers:
@@ -294,13 +300,11 @@ class Monitoring(ContainerDaemonForm):
if daemon_type == 'promtail':
r += ['--config.expand-env']
if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
try:
r += [f'--web.config.file={config["web_config"]}']
except KeyError:
pass
if daemon_type == 'node-exporter':
- config = fetch_configs(ctx)
try:
r += [f'--web.config.file={config["web_config"]}']
except KeyError:
diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py
index 9a7f138b1c6..bfa61d933ef 100644
--- a/src/cephadm/cephadmlib/net_utils.py
+++ b/src/cephadm/cephadmlib/net_utils.py
@@ -24,12 +24,22 @@ class EndPoint:
def __init__(self, ip: str, port: int) -> None:
self.ip = ip
self.port = port
+ self.is_ipv4 = True
+ try:
+ if ip and ipaddress.ip_network(ip).version == 6:
+ self.is_ipv4 = False
+ except Exception:
+ logger.exception('Failed to check ip address version')
def __str__(self) -> str:
- return f'{self.ip}:{self.port}'
+ if self.is_ipv4:
+ return f'{self.ip}:{self.port}'
+ return f'[{self.ip}]:{self.port}'
def __repr__(self) -> str:
- return f'{self.ip}:{self.port}'
+ if self.is_ipv4:
+ return f'{self.ip}:{self.port}'
+ return f'[{self.ip}]:{self.port}'
def attempt_bind(ctx, s, address, port):
diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py
index c5094db335f..1736639ed55 100644
--- a/src/cephadm/tests/test_deploy.py
+++ b/src/cephadm/tests/test_deploy.py
@@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch):
runfile_lines = f.read().splitlines()
assert 'podman' in runfile_lines[-1]
assert runfile_lines[-1].endswith(
- 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095'
+ 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095'
)
assert '--user 8765' in runfile_lines[-1]
assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1]
diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py
index 98d2fe99897..04d3712c50a 100644
--- a/src/pybind/mgr/cephadm/schedule.py
+++ b/src/pybind/mgr/cephadm/schedule.py
@@ -385,6 +385,8 @@ class HostAssignment(object):
def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
for subnet in subnets:
+ # to normalize subnet
+ subnet = str(ipaddress.ip_network(subnet))
ips: List[str] = []
# following is to allow loopback interfaces for both ipv4 and ipv6. Since we
# only have the subnet (and no IP) we assume default loopback IP address.
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py
index 1b9cf618570..9c5b5a112f3 100644
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -3,6 +3,7 @@ import logging
import os
import socket
from typing import List, Any, Tuple, Dict, Optional, cast
+import ipaddress
from mgr_module import HandleCommandResult
@@ -57,6 +58,8 @@ class GrafanaService(CephadmService):
if ip_to_bind_to:
daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
grafana_ip = ip_to_bind_to
+ if ipaddress.ip_network(grafana_ip).version == 6:
+ grafana_ip = f"[{grafana_ip}]"
domain = self.mgr.get_fqdn(daemon_spec.host)
mgmt_gw_ips = []
@@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService):
addr = self.mgr.get_fqdn(dd.hostname)
peers.append(build_url(host=addr, port=port).lstrip('/'))
+ ip_to_bind_to = ''
+ if spec.only_bind_port_on_networks and spec.networks:
+ assert daemon_spec.host is not None
+ ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or ''
+ if ip_to_bind_to:
+ daemon_spec.port_ips = {str(port): ip_to_bind_to}
+
deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
if security_enabled:
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
@@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService):
},
'peers': peers,
'web_config': '/etc/alertmanager/web.yml',
- 'use_url_prefix': mgmt_gw_enabled
+ 'use_url_prefix': mgmt_gw_enabled,
+ 'ip_to_bind_to': ip_to_bind_to
}, sorted(deps)
else:
return {
@@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService):
"alertmanager.yml": yml
},
"peers": peers,
- 'use_url_prefix': mgmt_gw_enabled
+ 'use_url_prefix': mgmt_gw_enabled,
+ 'ip_to_bind_to': ip_to_bind_to
}, sorted(deps)
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
index e12185e9999..d872219df80 100644
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -581,7 +581,14 @@ class TestMonitoring:
mock_getfqdn.return_value = purl.hostname
with with_host(cephadm_module, "test"):
- with with_service(cephadm_module, AlertManagerSpec()):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ })
+ with with_service(cephadm_module, AlertManagerSpec('alertmanager',
+ networks=['1.2.3.0/24'],
+ only_bind_port_on_networks=True)):
y = dedent(self._get_config(expected_yaml_url)).lstrip()
_run_cephadm.assert_called_with(
'test',
@@ -595,11 +602,12 @@ class TestMonitoring:
"deploy_arguments": [],
"params": {
'tcp_ports': [9093, 9094],
+ 'port_ips': {"9094": "1.2.3.1"},
},
"meta": {
'service_name': 'alertmanager',
'ports': [9093, 9094],
- 'ip': None,
+ 'ip': '1.2.3.1',
'deployed_by': [],
'rank': None,
'rank_generation': None,
@@ -612,6 +620,7 @@ class TestMonitoring:
},
"peers": [],
"use_url_prefix": False,
+ "ip_to_bind_to": "1.2.3.1",
}
}),
error_ok=True,
@@ -634,8 +643,16 @@ class TestMonitoring:
cephadm_module.secure_monitoring_stack = True
cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+
+ cephadm_module.cache.update_host_networks('test', {
+ 'fd12:3456:789a::/64': {
+ 'if0': ['fd12:3456:789a::10']
+ },
+ })
with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
- with_service(cephadm_module, AlertManagerSpec()):
+ with_service(cephadm_module, AlertManagerSpec('alertmanager',
+ networks=['fd12:3456:789a::/64'],
+ only_bind_port_on_networks=True)):
y = dedent("""
# This file is generated by cephadm.
@@ -688,11 +705,12 @@ class TestMonitoring:
"deploy_arguments": [],
"params": {
'tcp_ports': [9093, 9094],
+ 'port_ips': {"9094": "fd12:3456:789a::10"}
},
"meta": {
'service_name': 'alertmanager',
'ports': [9093, 9094],
- 'ip': None,
+ 'ip': 'fd12:3456:789a::10',
'deployed_by': [],
'rank': None,
'rank_generation': None,
@@ -710,6 +728,7 @@ class TestMonitoring:
'peers': [],
'web_config': '/etc/alertmanager/web.yml',
"use_url_prefix": True,
+ "ip_to_bind_to": "fd12:3456:789a::10",
}
}),
error_ok=True,
@@ -805,6 +824,7 @@ class TestMonitoring:
'peers': [],
'web_config': '/etc/alertmanager/web.yml',
"use_url_prefix": False,
+ "ip_to_bind_to": "",
}
}),
error_ok=True,
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py
index f8a0e41baf2..1ac9fa49e32 100644
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -2331,6 +2331,7 @@ class AlertManagerSpec(MonitoringSpec):
user_data: Optional[Dict[str, Any]] = None,
config: Optional[Dict[str, str]] = None,
networks: Optional[List[str]] = None,
+ only_bind_port_on_networks: bool = False,
port: Optional[int] = None,
secure: bool = False,
extra_container_args: Optional[GeneralArgList] = None,
@@ -2361,6 +2362,7 @@ class AlertManagerSpec(MonitoringSpec):
# <webhook_configs> configuration.
self.user_data = user_data or {}
self.secure = secure
+ self.only_bind_port_on_networks = only_bind_port_on_networks
def get_port_start(self) -> List[int]:
return [self.get_port(), 9094]
@@ -2407,7 +2409,7 @@ class GrafanaSpec(MonitoringSpec):
self.protocol = protocol
# whether ports daemons for this service bind to should
- # bind to only hte networks listed in networks param, or
+ # bind to only the networks listed in networks param, or
# to all networks. Defaults to false which is saying to bind
# on all networks.
self.only_bind_port_on_networks = only_bind_port_on_networks