diff options
Diffstat (limited to 'src/cephadm')
23 files changed, 254 insertions, 147 deletions
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index f2172bf9083..a8616980e4d 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -57,6 +57,7 @@ from cephadmlib.constants import ( LOG_DIR_MODE, SYSCTL_DIR, UNIT_DIR, + DAEMON_FAILED_ERROR, ) from cephadmlib.context import CephadmContext from cephadmlib.context_getters import ( @@ -72,6 +73,7 @@ from cephadmlib.exceptions import ( ClusterAlreadyExists, Error, UnauthorizedRegistryError, + DaemonStartException, ) from cephadmlib.exe_utils import find_executable, find_program from cephadmlib.call_wrappers import ( @@ -109,6 +111,7 @@ from cephadmlib.file_utils import ( unlink_file, write_new, write_tmp, + update_meta_file, ) from cephadmlib.net_utils import ( build_addrv_params, @@ -1246,7 +1249,11 @@ def deploy_daemon_units( call_throws(ctx, ['systemctl', 'enable', unit_name]) if start: clean_cgroup(ctx, ident.fsid, unit_name) - call_throws(ctx, ['systemctl', 'start', unit_name]) + try: + call_throws(ctx, ['systemctl', 'start', unit_name]) + except Exception as e: + logger.error(f'systemctl start failed for {unit_name}: {str(e)}') + raise DaemonStartException() def _osd_unit_run_commands( @@ -3050,7 +3057,10 @@ def get_deployment_type( @deprecated_command def command_deploy(ctx): # type: (CephadmContext) -> None - _common_deploy(ctx) + try: + _common_deploy(ctx) + except DaemonStartException: + sys.exit(DAEMON_FAILED_ERROR) def apply_deploy_config_to_ctx( @@ -3093,7 +3103,10 @@ def command_deploy_from(ctx: CephadmContext) -> None: config_data = read_configuration_source(ctx) logger.debug('Loaded deploy configuration: %r', config_data) apply_deploy_config_to_ctx(config_data, ctx) - _common_deploy(ctx) + try: + _common_deploy(ctx) + except DaemonStartException: + sys.exit(DAEMON_FAILED_ERROR) def _common_deploy(ctx: CephadmContext) -> None: @@ -3441,6 +3454,7 @@ def list_daemons( detail: bool = True, legacy_dir: Optional[str] = None, daemon_name: Optional[str] = None, + type_of_daemon: Optional[str] = None, ) -> List[Dict[str, str]]: host_version: Optional[str] = None ls = [] @@ -3477,6 +3491,8 @@ def list_daemons( if os.path.exists(data_dir): for i in os.listdir(data_dir): if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']: + if type_of_daemon and type_of_daemon != i: + continue daemon_type = i for j in os.listdir(os.path.join(data_dir, i)): if '-' not in j: @@ -3513,6 +3529,8 @@ def list_daemons( if daemon_name and name != daemon_name: continue (daemon_type, daemon_id) = j.split('.', 1) + if type_of_daemon and type_of_daemon != daemon_type: + continue unit_name = get_unit_name(fsid, daemon_type, daemon_id) @@ -4489,8 +4507,9 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None: ################################## -def check_time_sync(ctx, enabler=None): - # type: (CephadmContext, Optional[Packager]) -> bool +def check_time_sync( + ctx: CephadmContext, enabler: Optional[Packager] = None +) -> bool: units = [ 'chrony.service', # 18.04 (at least) 'chronyd.service', # el / opensuse @@ -4692,6 +4711,34 @@ def command_list_images(ctx: CephadmContext) -> None: # print default images cp_obj.write(sys.stdout) + +def update_service_for_daemon(ctx: CephadmContext, + available_daemons: list, + update_daemons: list) -> None: + """ Update the unit.meta file of daemon with required service name for valid daemons""" + + data = {'service_name': ctx.service_name} + # check if all the daemon names are valid + if not set(update_daemons).issubset(set(available_daemons)): + raise Error(f'Error EINVAL: one or more daemons of {update_daemons} does not exist on this host') + for name in update_daemons: + path = os.path.join(ctx.data_dir, ctx.fsid, name, 'unit.meta') + update_meta_file(path, data) + print(f'Successfully updated daemon {name} with service {ctx.service_name}') + + +@infer_fsid +def command_update_osd_service(ctx: CephadmContext) -> int: + """update service for provided daemon""" + update_daemons = [f'osd.{osd_id}' for osd_id in ctx.osd_ids.split(',')] + daemons = list_daemons(ctx, detail=False, type_of_daemon='osd') + if not daemons: + raise Error(f'Daemon {ctx.osd_ids} does not exists on this host') + available_daemons = [d['name'] for d in daemons] + update_service_for_daemon(ctx, available_daemons, update_daemons) + return 0 + + ################################## @@ -5558,6 +5605,14 @@ def _get_parser(): parser_list_images = subparsers.add_parser( 'list-images', help='list all the default images') parser_list_images.set_defaults(func=command_list_images) + + parser_update_service = subparsers.add_parser( + 'update-osd-service', help='update service for provided daemon') + parser_update_service.set_defaults(func=command_update_osd_service) + parser_update_service.add_argument('--fsid', help='cluster FSID') + parser_update_service.add_argument('--osd-ids', required=True, help='Comma-separated OSD IDs') + parser_update_service.add_argument('--service-name', required=True, help='OSD service name') + return parser diff --git a/src/cephadm/cephadmlib/call_wrappers.py b/src/cephadm/cephadmlib/call_wrappers.py index 3fe2171e99d..d3d327c218c 100644 --- a/src/cephadm/cephadmlib/call_wrappers.py +++ b/src/cephadm/cephadmlib/call_wrappers.py @@ -311,14 +311,14 @@ def call_throws( return out, err, ret -def call_timeout(ctx, command, timeout): - # type: (CephadmContext, List[str], int) -> int +def call_timeout( + ctx: CephadmContext, command: List[str], timeout: int +) -> int: logger.debug( 'Running command (timeout=%s): %s' % (timeout, ' '.join(command)) ) - def raise_timeout(command, timeout): - # type: (List[str], int) -> NoReturn + def raise_timeout(command: List[str], timeout: int) -> NoReturn: msg = 'Command `%s` timed out after %s seconds' % (command, timeout) logger.debug(msg) raise TimeoutExpired(msg) diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py index 215d207729d..1df46353fb3 100644 --- a/src/cephadm/cephadmlib/constants.py +++ b/src/cephadm/cephadmlib/constants.py @@ -36,3 +36,4 @@ QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG NO_DEPRECATED = False UID_NOBODY = 65534 GID_NOGROUP = 65534 +DAEMON_FAILED_ERROR = 17 diff --git a/src/cephadm/cephadmlib/container_types.py b/src/cephadm/cephadmlib/container_types.py index f1e829cbdf7..52a68888e78 100644 --- a/src/cephadm/cephadmlib/container_types.py +++ b/src/cephadm/cephadmlib/container_types.py @@ -9,7 +9,7 @@ from typing import Dict, List, Optional, Any, Union, Tuple, Iterable, cast from .call_wrappers import call, call_throws, CallVerbosity from .constants import DEFAULT_TIMEOUT -import ceph.cephadm.images as default_images +from ceph.cephadm.images import DefaultImages from .container_engines import Docker, Podman from .context import CephadmContext from .daemon_identity import DaemonIdentity, DaemonSubIdentity @@ -665,14 +665,8 @@ def enable_shared_namespaces( def get_mgr_images() -> dict: """Return dict of default mgr images""" - mgr_prefix = 'mgr/cephadm/container_image_' - mgr_images = {} - images = vars(default_images) - for key, value in images.items(): - if key.startswith('DEFAULT_') and key.endswith('_IMAGE'): - # flake8 and black disagree about spaces around ":" hence the noqa comment - suffix = key[ - len('DEFAULT_') : -len('_IMAGE') # noqa: E203 - ].lower() - mgr_images[mgr_prefix + suffix] = value + mgr_prefix = 'mgr/cephadm/' + mgr_images = { + f'{mgr_prefix}{image.key}': image.image_ref for image in DefaultImages + } return mgr_images diff --git a/src/cephadm/cephadmlib/daemon_identity.py b/src/cephadm/cephadmlib/daemon_identity.py index 52a18092bf0..bfe1a855186 100644 --- a/src/cephadm/cephadmlib/daemon_identity.py +++ b/src/cephadm/cephadmlib/daemon_identity.py @@ -157,7 +157,7 @@ class DaemonSubIdentity(DaemonIdentity): ) def sidecar_script(self, base_data_dir: Union[str, os.PathLike]) -> str: - sname = f'sidecar-{ self.subcomponent }.run' + sname = f'sidecar-{self.subcomponent}.run' return str(pathlib.Path(self.data_dir(base_data_dir)) / sname) @property diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py index 8f4f6b08991..645654b59c8 100644 --- a/src/cephadm/cephadmlib/daemons/ingress.py +++ b/src/cephadm/cephadmlib/daemons/ingress.py @@ -2,10 +2,7 @@ import os from typing import Dict, List, Optional, Tuple, Union -from ceph.cephadm.images import ( - DEFAULT_HAPROXY_IMAGE, - DEFAULT_KEEPALIVED_IMAGE, -) +from ceph.cephadm.images import DefaultImages from ..constants import ( DATA_DIR_MODE, ) @@ -27,7 +24,7 @@ class HAproxy(ContainerDaemonForm): daemon_type = 'haproxy' required_files = ['haproxy.cfg'] - default_image = DEFAULT_HAPROXY_IMAGE + default_image = DefaultImages.HAPROXY.image_ref @classmethod def for_daemon_type(cls, daemon_type: str) -> bool: @@ -82,8 +79,7 @@ class HAproxy(ContainerDaemonForm): def get_daemon_args(self) -> List[str]: return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] - def validate(self): - # type: () -> None + def validate(self) -> None: if not is_fsid(self.fsid): raise Error('not an fsid: %s' % self.fsid) if not self.daemon_id: @@ -99,12 +95,10 @@ class HAproxy(ContainerDaemonForm): 'required file missing from config-json: %s' % fname ) - def get_daemon_name(self): - # type: () -> str + def get_daemon_name(self) -> str: return '%s.%s' % (self.daemon_type, self.daemon_id) - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str + def get_container_name(self, desc: Optional[str] = None) -> str: cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) if desc: cname = '%s-%s' % (cname, desc) @@ -158,7 +152,7 @@ class Keepalived(ContainerDaemonForm): daemon_type = 'keepalived' required_files = ['keepalived.conf'] - default_image = DEFAULT_KEEPALIVED_IMAGE + default_image = DefaultImages.KEEPALIVED.image_ref @classmethod def for_daemon_type(cls, daemon_type: str) -> bool: @@ -212,8 +206,7 @@ class Keepalived(ContainerDaemonForm): # populate files from the config-json populate_files(data_dir, self.files, uid, gid) - def validate(self): - # type: () -> None + def validate(self) -> None: if not is_fsid(self.fsid): raise Error('not an fsid: %s' % self.fsid) if not self.daemon_id: @@ -229,20 +222,17 @@ class Keepalived(ContainerDaemonForm): 'required file missing from config-json: %s' % fname ) - def get_daemon_name(self): - # type: () -> str + def get_daemon_name(self) -> str: return '%s.%s' % (self.daemon_type, self.daemon_id) - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str + def get_container_name(self, desc: Optional[str] = None) -> str: cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) if desc: cname = '%s-%s' % (cname, desc) return cname @staticmethod - def get_container_envs(): - # type: () -> List[str] + def get_container_envs() -> List[str]: envs = [ 'KEEPALIVED_AUTOCONF=false', 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py index 85f72495909..2be18809aa3 100644 --- a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py +++ b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py @@ -11,7 +11,7 @@ from ..context_getters import fetch_configs from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container -from ceph.cephadm.images import DEFAULT_NGINX_IMAGE +from ceph.cephadm.images import DefaultImages from ..data_utils import dict_get, is_fsid from ..file_utils import populate_files, makedirs, recursive_chown from ..exceptions import Error @@ -32,7 +32,7 @@ class MgmtGateway(ContainerDaemonForm): 'nginx_internal.key', ] - default_image = DEFAULT_NGINX_IMAGE + default_image = DefaultImages.NGINX.image_ref @classmethod def for_daemon_type(cls, daemon_type: str) -> bool: @@ -44,7 +44,7 @@ class MgmtGateway(ContainerDaemonForm): fsid: str, daemon_id: str, config_json: Dict, - image: str = DEFAULT_NGINX_IMAGE, + image: str = DefaultImages.NGINX.image_ref, ): self.ctx = ctx self.fsid = fsid diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index 710093f0f46..4ba00daaefb 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -3,14 +3,7 @@ import os from typing import Dict, List, Tuple from ..call_wrappers import call, CallVerbosity -from ceph.cephadm.images import ( - DEFAULT_ALERTMANAGER_IMAGE, - DEFAULT_GRAFANA_IMAGE, - DEFAULT_LOKI_IMAGE, - DEFAULT_NODE_EXPORTER_IMAGE, - DEFAULT_PROMETHEUS_IMAGE, - DEFAULT_PROMTAIL_IMAGE, -) +from ceph.cephadm.images import DefaultImages from ..constants import ( UID_NOBODY, GID_NOGROUP, @@ -23,7 +16,13 @@ from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container from ..exceptions import Error -from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6 +from ..net_utils import ( + get_fqdn, + get_hostname, + get_ip_addresses, + wrap_ipv6, + EndPoint, +) @register_daemon_form @@ -43,7 +42,7 @@ class Monitoring(ContainerDaemonForm): components = { 'prometheus': { - 'image': DEFAULT_PROMETHEUS_IMAGE, + 'image': DefaultImages.PROMETHEUS.image_ref, 'cpus': '2', 'memory': '4GB', 'args': [ @@ -55,7 +54,7 @@ class Monitoring(ContainerDaemonForm): ], }, 'loki': { - 'image': DEFAULT_LOKI_IMAGE, + 'image': DefaultImages.LOKI.image_ref, 'cpus': '1', 'memory': '1GB', 'args': [ @@ -64,7 +63,7 @@ class Monitoring(ContainerDaemonForm): 'config-json-files': ['loki.yml'], }, 'promtail': { - 'image': DEFAULT_PROMTAIL_IMAGE, + 'image': DefaultImages.PROMTAIL.image_ref, 'cpus': '1', 'memory': '1GB', 'args': [ @@ -75,13 +74,13 @@ class Monitoring(ContainerDaemonForm): ], }, 'node-exporter': { - 'image': DEFAULT_NODE_EXPORTER_IMAGE, + 'image': DefaultImages.NODE_EXPORTER.image_ref, 'cpus': '1', 'memory': '1GB', 'args': ['--no-collector.timex'], }, 'grafana': { - 'image': DEFAULT_GRAFANA_IMAGE, + 'image': DefaultImages.GRAFANA.image_ref, 'cpus': '2', 'memory': '4GB', 'args': [], @@ -93,14 +92,9 @@ class Monitoring(ContainerDaemonForm): ], }, 'alertmanager': { - 'image': DEFAULT_ALERTMANAGER_IMAGE, + 'image': DefaultImages.ALERTMANAGER.image_ref, 'cpus': '2', 'memory': '2GB', - 'args': [ - '--cluster.listen-address=:{}'.format( - port_map['alertmanager'][1] - ), - ], 'config-json-files': [ 'alertmanager.yml', ], @@ -255,11 +249,14 @@ class Monitoring(ContainerDaemonForm): ip = meta['ip'] if 'ports' in meta and meta['ports']: port = meta['ports'][0] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) + config = fetch_configs(ctx) + if daemon_type in ['prometheus', 'alertmanager']: ip_to_bind_to = config.get('ip_to_bind_to', '') if ip_to_bind_to: ip = ip_to_bind_to + web_listen_addr = str(EndPoint(ip, port)) + r += [f'--web.listen-address={web_listen_addr}'] + if daemon_type == 'prometheus': retention_time = config.get('retention_time', '15d') retention_size = config.get( 'retention_size', '0' @@ -283,9 +280,11 @@ class Monitoring(ContainerDaemonForm): r += ['--web.route-prefix=/prometheus/'] else: r += [f'--web.external-url={scheme}://{host}:{port}'] - r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': - config = fetch_configs(ctx) + clus_listen_addr = str( + EndPoint(ip, self.port_map[daemon_type][1]) + ) + r += [f'--cluster.listen-address={clus_listen_addr}'] use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: @@ -301,13 +300,11 @@ class Monitoring(ContainerDaemonForm): if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: pass if daemon_type == 'node-exporter': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py index f09374d5f46..70ccea65b5b 100644 --- a/src/cephadm/cephadmlib/daemons/nfs.py +++ b/src/cephadm/cephadmlib/daemons/nfs.py @@ -42,9 +42,13 @@ class NFSGanesha(ContainerDaemonForm): return cls.daemon_type == daemon_type def __init__( - self, ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE - ): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str = DEFAULT_IMAGE, + ) -> None: self.ctx = ctx self.fsid = fsid self.daemon_id = daemon_id @@ -62,8 +66,9 @@ class NFSGanesha(ContainerDaemonForm): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'NFSGanesha': return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) @classmethod diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py index 2c20a900f45..51b085df2a7 100644 --- a/src/cephadm/cephadmlib/daemons/nvmeof.py +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -8,7 +8,7 @@ from ..container_types import CephContainer from ..context_getters import fetch_configs, get_config_and_keyring from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity -from ceph.cephadm.images import DEFAULT_NVMEOF_IMAGE +from ceph.cephadm.images import DefaultImages from ..context import CephadmContext from ..data_utils import dict_get, is_fsid from ..deployment_utils import to_deployment_container @@ -26,16 +26,20 @@ class CephNvmeof(ContainerDaemonForm): daemon_type = 'nvmeof' required_files = ['ceph-nvmeof.conf'] - default_image = DEFAULT_NVMEOF_IMAGE + default_image = DefaultImages.NVMEOF.image_ref @classmethod def for_daemon_type(cls, daemon_type: str) -> bool: return cls.daemon_type == daemon_type def __init__( - self, ctx, fsid, daemon_id, config_json, image=DEFAULT_NVMEOF_IMAGE - ): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str = DefaultImages.NVMEOF.image_ref, + ) -> None: self.ctx = ctx self.fsid = fsid self.daemon_id = daemon_id @@ -48,8 +52,9 @@ class CephNvmeof(ContainerDaemonForm): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'CephNvmeof': return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) @classmethod @@ -73,13 +78,18 @@ class CephNvmeof(ContainerDaemonForm): os.path.join(data_dir, 'ceph-nvmeof.conf') ] = '/src/ceph-nvmeof.conf:z' mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' - mounts['/dev/hugepages'] = '/dev/hugepages' - mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' mounts[log_dir] = '/var/log/ceph:z' if mtls_dir: mounts[mtls_dir] = '/src/mtls:z' return mounts + def _get_huge_pages_mounts(self, files: Dict[str, str]) -> Dict[str, str]: + mounts = dict() + if 'spdk_mem_size' not in files: + mounts['/dev/hugepages'] = '/dev/hugepages' + mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' + return mounts + def _get_tls_cert_key_mounts( self, data_dir: str, files: Dict[str, str] ) -> Dict[str, str]: @@ -90,6 +100,7 @@ class CephNvmeof(ContainerDaemonForm): 'client_cert', 'client_key', 'root_ca_cert', + 'encryption_key', ]: if fn in files: mounts[ @@ -111,6 +122,7 @@ class CephNvmeof(ContainerDaemonForm): ) else: mounts.update(self._get_container_mounts(data_dir, log_dir)) + mounts.update(self._get_huge_pages_mounts(self.files)) mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files)) def customize_container_binds( @@ -198,11 +210,13 @@ class CephNvmeof(ContainerDaemonForm): ) return cmd.split() - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - 'vm.nr_hugepages = 4096', - ] + def get_sysctl_settings(self) -> List[str]: + if 'spdk_mem_size' not in self.files: + return [ + 'vm.nr_hugepages = 4096', + ] + else: + return [] def container(self, ctx: CephadmContext) -> CephContainer: ctr = daemon_to_container(ctx, self) @@ -222,4 +236,6 @@ class CephNvmeof(ContainerDaemonForm): args.append(ctx.container_engine.unlimited_pids_option) args.extend(['--ulimit', 'memlock=-1:-1']) args.extend(['--ulimit', 'nofile=10240']) - args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE']) + args.extend(['--cap-add=CAP_SYS_NICE']) + if 'spdk_mem_size' not in self.files: + args.extend(['--cap-add=SYS_ADMIN']) diff --git a/src/cephadm/cephadmlib/daemons/oauth2_proxy.py b/src/cephadm/cephadmlib/daemons/oauth2_proxy.py index 14202111c14..c4f4ec5562f 100644 --- a/src/cephadm/cephadmlib/daemons/oauth2_proxy.py +++ b/src/cephadm/cephadmlib/daemons/oauth2_proxy.py @@ -11,7 +11,7 @@ from ..context_getters import fetch_configs from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container -from ceph.cephadm.images import DEFAULT_OAUTH2_PROXY_IMAGE +from ceph.cephadm.images import DefaultImages from ..constants import UID_NOBODY, GID_NOGROUP from ..data_utils import dict_get, is_fsid from ..file_utils import populate_files, makedirs, recursive_chown @@ -25,7 +25,7 @@ logger = logging.getLogger() class OAuth2Proxy(ContainerDaemonForm): """Define the configs for the jaeger tracing containers""" - default_image = DEFAULT_OAUTH2_PROXY_IMAGE + default_image = DefaultImages.OAUTH2_PROXY.image_ref daemon_type = 'oauth2-proxy' required_files = [ 'oauth2-proxy.conf', @@ -43,7 +43,7 @@ class OAuth2Proxy(ContainerDaemonForm): fsid: str, daemon_id: str, config_json: Dict, - image: str = DEFAULT_OAUTH2_PROXY_IMAGE, + image: str = DefaultImages.OAUTH2_PROXY.image_ref, ): self.ctx = ctx self.fsid = fsid diff --git a/src/cephadm/cephadmlib/daemons/smb.py b/src/cephadm/cephadmlib/daemons/smb.py index 33d43cbe6ce..0efde198812 100644 --- a/src/cephadm/cephadmlib/daemons/smb.py +++ b/src/cephadm/cephadmlib/daemons/smb.py @@ -14,7 +14,7 @@ from .. import data_utils from .. import deployment_utils from .. import file_utils from ..call_wrappers import call, CallVerbosity -from ceph.cephadm.images import DEFAULT_SAMBA_IMAGE +from ceph.cephadm.images import DefaultImages from ..container_daemon_form import ContainerDaemonForm, daemon_to_container from ..container_engines import Podman from ..container_types import ( @@ -368,7 +368,7 @@ class SMB(ContainerDaemonForm): daemon_type = 'smb' daemon_base = '/usr/sbin/smbd' - default_image = DEFAULT_SAMBA_IMAGE + default_image = DefaultImages.SAMBA.image_ref @classmethod def for_daemon_type(cls, daemon_type: str) -> bool: diff --git a/src/cephadm/cephadmlib/daemons/snmp.py b/src/cephadm/cephadmlib/daemons/snmp.py index ab84a302f2c..0557a2ef972 100644 --- a/src/cephadm/cephadmlib/daemons/snmp.py +++ b/src/cephadm/cephadmlib/daemons/snmp.py @@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union from urllib.error import HTTPError, URLError from urllib.request import urlopen -from ceph.cephadm.images import DEFAULT_SNMP_GATEWAY_IMAGE +from ceph.cephadm.images import DefaultImages from ..container_daemon_form import ContainerDaemonForm, daemon_to_container from ..container_types import CephContainer from ..context import CephadmContext @@ -24,7 +24,7 @@ class SNMPGateway(ContainerDaemonForm): daemon_type = 'snmp-gateway' SUPPORTED_VERSIONS = ['V2c', 'V3'] - default_image = DEFAULT_SNMP_GATEWAY_IMAGE + default_image = DefaultImages.SNMP_GATEWAY.image_ref DEFAULT_PORT = 9464 env_filename = 'snmp-gateway.conf' diff --git a/src/cephadm/cephadmlib/daemons/tracing.py b/src/cephadm/cephadmlib/daemons/tracing.py index 4cf74339455..44548a61d14 100644 --- a/src/cephadm/cephadmlib/daemons/tracing.py +++ b/src/cephadm/cephadmlib/daemons/tracing.py @@ -2,12 +2,7 @@ import logging from typing import Any, Dict, List, Tuple -from ceph.cephadm.images import ( - DEFAULT_ELASTICSEARCH_IMAGE, - DEFAULT_JAEGER_AGENT_IMAGE, - DEFAULT_JAEGER_COLLECTOR_IMAGE, - DEFAULT_JAEGER_QUERY_IMAGE, -) +from ceph.cephadm.images import DefaultImages from ..container_daemon_form import ContainerDaemonForm, daemon_to_container from ..container_types import CephContainer from ..context import CephadmContext @@ -27,17 +22,17 @@ class Tracing(ContainerDaemonForm): components: Dict[str, Dict[str, Any]] = { 'elasticsearch': { - 'image': DEFAULT_ELASTICSEARCH_IMAGE, + 'image': DefaultImages.ELASTICSEARCH.image_ref, 'envs': ['discovery.type=single-node'], }, 'jaeger-agent': { - 'image': DEFAULT_JAEGER_AGENT_IMAGE, + 'image': DefaultImages.JAEGER_AGENT.image_ref, }, 'jaeger-collector': { - 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE, + 'image': DefaultImages.JAEGER_COLLECTOR.image_ref, }, 'jaeger-query': { - 'image': DEFAULT_JAEGER_QUERY_IMAGE, + 'image': DefaultImages.JAEGER_QUERY.image_ref, }, } # type: ignore diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py index 0ab8b38d2b5..9caef3f72e5 100644 --- a/src/cephadm/cephadmlib/data_utils.py +++ b/src/cephadm/cephadmlib/data_utils.py @@ -189,8 +189,9 @@ def normalize_image_digest(digest: str) -> str: return digest -def get_legacy_config_fsid(cluster, legacy_dir=None): - # type: (str, Optional[str]) -> Optional[str] +def get_legacy_config_fsid( + cluster: str, legacy_dir: Optional[str] = None +) -> Optional[str]: config_file = '/etc/ceph/%s.conf' % cluster if legacy_dir is not None: config_file = os.path.abspath(legacy_dir + config_file) diff --git a/src/cephadm/cephadmlib/exceptions.py b/src/cephadm/cephadmlib/exceptions.py index 0d215fdd332..762ce782127 100644 --- a/src/cephadm/cephadmlib/exceptions.py +++ b/src/cephadm/cephadmlib/exceptions.py @@ -19,3 +19,16 @@ class UnauthorizedRegistryError(Error): class PortOccupiedError(Error): pass + + +class DaemonStartException(Exception): + """ + Special exception type we raise when the + systemctl start command fails during daemon + deployment. Necessary because the cephadm mgr module + needs to handle this case differently than a failure + earlier in the deploy process where no attempt was made + to actually start the daemon + """ + + pass diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py index 399729f2dcc..4dd88cc3671 100644 --- a/src/cephadm/cephadmlib/file_utils.py +++ b/src/cephadm/cephadmlib/file_utils.py @@ -5,6 +5,7 @@ import datetime import logging import os import tempfile +import json from contextlib import contextmanager from pathlib import Path @@ -52,8 +53,9 @@ def write_new( os.rename(tempname, destination) -def populate_files(config_dir, config_files, uid, gid): - # type: (str, Dict, int, int) -> None +def populate_files( + config_dir: str, config_files: Dict, uid: int, gid: int +) -> None: """create config files for different services""" for fname in config_files: config_file = os.path.join(config_dir, fname) @@ -71,8 +73,7 @@ def touch( os.chown(file_path, uid, gid) -def write_tmp(s, uid, gid): - # type: (str, int, int) -> IO[str] +def write_tmp(s: str, uid: int, gid: int) -> IO[str]: tmp_f = tempfile.NamedTemporaryFile(mode='w', prefix='ceph-tmp') os.fchown(tmp_f.fileno(), uid, gid) tmp_f.write(s) @@ -97,8 +98,7 @@ def recursive_chown(path: str, uid: int, gid: int) -> None: os.chown(os.path.join(dirpath, filename), uid, gid) -def read_file(path_list, file_name=''): - # type: (List[str], str) -> str +def read_file(path_list: List[str], file_name: str = '') -> str: """Returns the content of the first file found within the `path_list` :param path_list: list of file paths to search @@ -123,14 +123,12 @@ def read_file(path_list, file_name=''): return 'Unknown' -def pathify(p): - # type: (str) -> str +def pathify(p: str) -> str: p = os.path.expanduser(p) return os.path.abspath(p) -def get_file_timestamp(fn): - # type: (str) -> Optional[str] +def get_file_timestamp(fn: str) -> Optional[str]: try: mt = os.path.getmtime(fn) return datetime.datetime.fromtimestamp( @@ -160,3 +158,26 @@ def unlink_file( except Exception: if not ignore_errors: raise + + +def update_meta_file(file_path: str, update_key_val: dict) -> None: + """Update key in the file with provided value""" + try: + with open(file_path, 'r') as fh: + data = json.load(fh) + file_stat = os.stat(file_path) + except FileNotFoundError: + raise + except Exception: + logger.exception(f'Failed to update {file_path}') + raise + data.update( + {key: value for key, value in update_key_val.items() if key in data} + ) + + with write_new( + file_path, + owner=(file_stat.st_uid, file_stat.st_gid), + perms=(file_stat.st_mode & 0o777), + ) as fh: + fh.write(json.dumps(data, indent=4) + '\n') diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py index 9a7f138b1c6..bfa61d933ef 100644 --- a/src/cephadm/cephadmlib/net_utils.py +++ b/src/cephadm/cephadmlib/net_utils.py @@ -24,12 +24,22 @@ class EndPoint: def __init__(self, ip: str, port: int) -> None: self.ip = ip self.port = port + self.is_ipv4 = True + try: + if ip and ipaddress.ip_network(ip).version == 6: + self.is_ipv4 = False + except Exception: + logger.exception('Failed to check ip address version') def __str__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def __repr__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def attempt_bind(ctx, s, address, port): diff --git a/src/cephadm/cephadmlib/systemd.py b/src/cephadm/cephadmlib/systemd.py index a07757eccad..1956957d457 100644 --- a/src/cephadm/cephadmlib/systemd.py +++ b/src/cephadm/cephadmlib/systemd.py @@ -11,8 +11,7 @@ from .packagers import Packager logger = logging.getLogger() -def check_unit(ctx, unit_name): - # type: (CephadmContext, str) -> Tuple[bool, str, bool] +def check_unit(ctx: CephadmContext, unit_name: str) -> Tuple[bool, str, bool]: # NOTE: we ignore the exit code here because systemctl outputs # various exit codes based on the state of the service, but the # string result is more explicit (and sufficient). @@ -56,8 +55,9 @@ def check_unit(ctx, unit_name): return (enabled, state, installed) -def check_units(ctx, units, enabler=None): - # type: (CephadmContext, List[str], Optional[Packager]) -> bool +def check_units( + ctx: CephadmContext, units: List[str], enabler: Optional[Packager] = None +) -> bool: for u in units: (enabled, state, installed) = check_unit(ctx, u) if enabled and state == 'running': diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py index 52cce74e1fb..8e453e3ac3c 100644 --- a/src/cephadm/tests/test_agent.py +++ b/src/cephadm/tests/test_agent.py @@ -668,7 +668,7 @@ def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json agent.mgr_listener.run() # verify payload was correctly extracted - assert _handle_json_payload.called_with(json.loads(payload)) + _handle_json_payload.assert_called_with(json.loads(payload)) FakeConn.send.assert_called_once_with(b'ACK') # second run, with bad json data received diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index f27b9bcd362..bbaaf2d39f8 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -1,5 +1,6 @@ # type: ignore +import contextlib import copy import errno import json @@ -38,6 +39,13 @@ def get_ceph_conf( mon_host = {mon_host} ''' +@contextlib.contextmanager +def bootstrap_test_ctx(*args, **kwargs): + with with_cephadm_ctx(*args, **kwargs) as ctx: + ctx.no_cleanup_on_failure = True + yield ctx + + class TestCephAdm(object): @mock.patch('cephadm.logger') @@ -1432,13 +1440,13 @@ class TestBootstrap(object): '--config', conf_file, ) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: msg = r'No such file or directory' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cephadm_fs.create_file(conf_file) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1446,7 +1454,7 @@ class TestBootstrap(object): funkypatch.patch('cephadmlib.systemd.call') cmd = self._get_cmd() - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: msg = r'must specify --mon-ip or --mon-addrv' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) @@ -1455,13 +1463,13 @@ class TestBootstrap(object): funkypatch.patch('cephadmlib.systemd.call') cmd = self._get_cmd('--mon-ip', '192.168.1.1') - with with_cephadm_ctx(cmd, list_networks={}) as ctx: + with bootstrap_test_ctx(cmd, list_networks={}) as ctx: msg = r'--skip-mon-network' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cmd += ['--skip-mon-network'] - with with_cephadm_ctx(cmd, list_networks={}) as ctx: + with bootstrap_test_ctx(cmd, list_networks={}) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1540,12 +1548,12 @@ class TestBootstrap(object): cmd = self._get_cmd('--mon-ip', mon_ip) if not result: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: msg = r'--skip-mon-network' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) else: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1604,11 +1612,11 @@ class TestBootstrap(object): cmd = self._get_cmd('--mon-addrv', mon_addrv) if err: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: with pytest.raises(_cephadm.Error, match=err): _cephadm.command_bootstrap(ctx) else: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1621,13 +1629,13 @@ class TestBootstrap(object): '--skip-mon-network', ) - with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + with bootstrap_test_ctx(cmd, hostname=hostname) as ctx: msg = r'--allow-fqdn-hostname' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cmd += ['--allow-fqdn-hostname'] - with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + with bootstrap_test_ctx(cmd, hostname=hostname) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1646,7 +1654,7 @@ class TestBootstrap(object): '--fsid', fsid, ) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: if err: with pytest.raises(_cephadm.Error, match=err): _cephadm.command_bootstrap(ctx) @@ -1661,7 +1669,7 @@ class TestShell(object): fsid = '00000000-0000-0000-0000-0000deadbeef' cmd = ['shell', '--fsid', fsid] - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: retval = _cephadm.command_shell(ctx) assert retval == 0 assert ctx.fsid == fsid diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index 58f212beff6..1736639ed55 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch): runfile_lines = f.read().splitlines() assert 'podman' in runfile_lines[-1] assert runfile_lines[-1].endswith( - 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095' + 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095' ) assert '--user 8765' in runfile_lines[-1] assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1] @@ -495,6 +495,7 @@ def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch): def test_deploy_and_rm_iscsi(cephadm_fs, funkypatch): # Test that the deploy and remove paths for iscsi (which has sidecar container) # create and remove the correct unit files. + funkypatch.patch('shutil.rmtree') # fakefs + shutil.rmtree breaks on py3.12 mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini index b999a0f552b..d643b1ba74f 100644 --- a/src/cephadm/tox.ini +++ b/src/cephadm/tox.ini @@ -12,14 +12,14 @@ skipsdist = true max-line-length = 100 inline-quotes = ' ignore = - E501, \ + E501, W503, exclude = - .tox, \ - .vagrant, \ - __pycache__, \ - *.pyc, \ - templates, \ + .tox, + .vagrant, + __pycache__, + *.pyc, + templates, .eggs statistics = True @@ -53,7 +53,7 @@ commands = mypy --config-file ../mypy.ini {posargs:cephadm.py cephadmlib} [testenv:flake8] allowlist_externals = bash deps = - flake8 == 5.0.4 + flake8 flake8-quotes commands = flake8 --config=tox.ini {posargs:cephadm.py cephadmlib} |