diff options
Diffstat (limited to 'src/pybind/mgr/cephadm')
17 files changed, 493 insertions, 180 deletions
diff --git a/src/pybind/mgr/cephadm/cert_mgr.py b/src/pybind/mgr/cephadm/cert_mgr.py index 9b68e85ca44..0c56c704788 100644 --- a/src/pybind/mgr/cephadm/cert_mgr.py +++ b/src/pybind/mgr/cephadm/cert_mgr.py @@ -1,6 +1,6 @@ from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException -from typing import TYPE_CHECKING, Tuple, Union, List +from typing import TYPE_CHECKING, Tuple, Union, List, Optional if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -28,5 +28,10 @@ class CertMgr: def get_root_ca(self) -> str: return self.ssl_certs.get_root_cert() - def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]: - return self.ssl_certs.generate_cert(host_fqdn, node_ip) + def generate_cert( + self, + host_fqdn: Union[str, List[str]], + node_ip: Union[str, List[str]], + custom_san_list: Optional[List[str]] = None, + ) -> Tuple[str, str]: + return self.ssl_certs.generate_cert(host_fqdn, node_ip, custom_san_list=custom_san_list) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 8a16ef8ae80..550604fc55b 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -187,11 +187,12 @@ class Inventory: def add_label(self, host: str, label: str) -> None: host = self._get_stored_name(host) - + labels = label.split(',') if ',' in label else [label] if 'labels' not in self._inventory[host]: self._inventory[host]['labels'] = list() - if label not in self._inventory[host]['labels']: - self._inventory[host]['labels'].append(label) + for label in labels: + if label not in self._inventory[host]['labels']: + self._inventory[host]['labels'].append(label) self.save() def rm_label(self, host: str, label: str) -> None: @@ -437,6 +438,7 @@ class SpecStore(): for key_attr in [ 'server_key', 'client_key', + 'encryption_key', ]: key = getattr(nvmeof_spec, key_attr, None) if key: @@ -489,6 +491,7 @@ class SpecStore(): self.mgr.cert_key_store.rm_cert('nvmeof_root_ca_cert', service_name=spec.service_name()) self.mgr.cert_key_store.rm_key('nvmeof_server_key', service_name=spec.service_name()) self.mgr.cert_key_store.rm_key('nvmeof_client_key', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_key('nvmeof_encryption_key', service_name=spec.service_name()) def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]: return self.spec_created.get(spec.service_name()) @@ -637,6 +640,9 @@ class TunedProfileStore(): logger.error( f'Attempted to set setting "{setting}" for nonexistent os tuning profile "{profile}"') + def add_settings(self, profile: str, settings: dict) -> None: + self.process_settings(profile, settings, action='add') + def rm_setting(self, profile: str, setting: str) -> None: if profile in self.profiles: if setting in self.profiles[profile].settings: @@ -650,6 +656,39 @@ class TunedProfileStore(): logger.error( f'Attempted to remove setting "{setting}" from nonexistent os tuning profile "{profile}"') + def rm_settings(self, profile: str, settings: List[str]) -> None: + self.process_settings(profile, settings, action='remove') + + def process_settings(self, profile: str, settings: Union[dict, list], action: str) -> None: + """ + Process settings by either adding or removing them based on the action specified. + """ + if profile not in self.profiles: + logger.error(f'Attempted to {action} settings for nonexistent os tuning profile "{profile}"') + return + profile_settings = self.profiles[profile].settings + if action == 'remove' and isinstance(settings, list): + invalid_settings = [s for s in settings if '=' in s or s not in profile_settings] + if invalid_settings: + raise OrchestratorError( + f"Invalid settings: {', '.join(invalid_settings)}. " + "Ensure settings are specified without '=' and exist in the profile. Correct format: key1,key2" + ) + if action == 'add' and isinstance(settings, dict): + for setting, value in settings.items(): + self.profiles[profile].settings[setting] = value + elif action == 'remove' and isinstance(settings, list): + for setting in settings: + self.profiles[profile].settings.pop(setting, '') + else: + logger.error( + f'Invalid action "{action}" for settings modification for tuned profile ' + f'"{profile}". Valid actions are "add" and "remove"' + ) + return + self.profiles[profile]._last_updated = datetime_to_str(datetime_now()) + self.save() + def add_profile(self, spec: TunedProfileSpec) -> None: spec._last_updated = datetime_to_str(datetime_now()) self.profiles[spec.profile_name] = spec @@ -1932,6 +1971,7 @@ class CertKeyStore(): 'ingress_ssl_key', 'nvmeof_server_key', 'nvmeof_client_key', + 'nvmeof_encryption_key', ] known_certs: Dict[str, Any] = {} @@ -1968,6 +2008,7 @@ class CertKeyStore(): 'ingress_ssl_key': {}, # service-name -> key 'nvmeof_server_key': {}, # service-name -> key 'nvmeof_client_key': {}, # service-name -> key + 'nvmeof_encryption_key': {}, # service-name -> key } def get_cert(self, entity: str, service_name: str = '', host: str = '') -> str: @@ -1995,8 +2036,8 @@ class CertKeyStore(): var = service_name if entity in self.service_name_cert else host j = {} self.known_certs[entity][var] = cert_obj - for service_name in self.known_certs[entity].keys(): - j[var] = Cert.to_json(self.known_certs[entity][var]) + for cert_key in self.known_certs[entity]: + j[cert_key] = Cert.to_json(self.known_certs[entity][cert_key]) else: self.known_certs[entity] = cert_obj j = Cert.to_json(cert_obj) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 1acc2ad2f2d..6690153d435 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -30,6 +30,7 @@ import multiprocessing.pool import subprocess from prettytable import PrettyTable +from ceph.cephadm.images import DefaultImages from ceph.deployment import inventory from ceph.deployment.drive_group import DriveGroupSpec from ceph.deployment.service_spec import \ @@ -130,28 +131,7 @@ def os_exit_noop(status: int) -> None: os._exit = os_exit_noop # type: ignore - -# Default container images ----------------------------------------------------- DEFAULT_IMAGE = 'quay.io/ceph/ceph' -DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.51.0' -DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.7.0' -DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.2.17' -DEFAULT_LOKI_IMAGE = 'quay.io/ceph/loki:3.0.0' -DEFAULT_PROMTAIL_IMAGE = 'quay.io/ceph/promtail:3.0.0' -DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.27.0' -DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/grafana:10.4.8' -DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3' -DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4' -DEFAULT_SNMP_GATEWAY_IMAGE = 'quay.io/ceph/snmp-notifier:v1.2.1' -DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23' -DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' -DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' -DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:sclorg-nginx-126' -DEFAULT_OAUTH2_PROXY_IMAGE = 'quay.io/oauth2-proxy/oauth2-proxy:v7.6.0' -DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' -DEFAULT_SAMBA_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64' -DEFAULT_SAMBA_METRICS_IMAGE = 'quay.io/samba.org/samba-metrics:latest' -# ------------------------------------------------------------------------------ def host_exists(hostname_position: int = 1) -> Callable: @@ -238,96 +218,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, runtime=True, ), Option( - 'container_image_prometheus', - default=DEFAULT_PROMETHEUS_IMAGE, - desc='Prometheus container image', - ), - Option( - 'container_image_nvmeof', - default=DEFAULT_NVMEOF_IMAGE, - desc='Nvme-of container image', - ), - Option( - 'container_image_grafana', - default=DEFAULT_GRAFANA_IMAGE, - desc='Prometheus container image', - ), - Option( - 'container_image_alertmanager', - default=DEFAULT_ALERT_MANAGER_IMAGE, - desc='Prometheus container image', - ), - Option( - 'container_image_node_exporter', - default=DEFAULT_NODE_EXPORTER_IMAGE, - desc='Prometheus container image', - ), - Option( - 'container_image_loki', - default=DEFAULT_LOKI_IMAGE, - desc='Loki container image', - ), - Option( - 'container_image_promtail', - default=DEFAULT_PROMTAIL_IMAGE, - desc='Promtail container image', - ), - Option( - 'container_image_haproxy', - default=DEFAULT_HAPROXY_IMAGE, - desc='HAproxy container image', - ), - Option( - 'container_image_keepalived', - default=DEFAULT_KEEPALIVED_IMAGE, - desc='Keepalived container image', - ), - Option( - 'container_image_snmp_gateway', - default=DEFAULT_SNMP_GATEWAY_IMAGE, - desc='SNMP Gateway container image', - ), - Option( - 'container_image_nginx', - default=DEFAULT_NGINX_IMAGE, - desc='Nginx container image', - ), - Option( - 'container_image_oauth2_proxy', - default=DEFAULT_OAUTH2_PROXY_IMAGE, - desc='oauth2-proxy container image', - ), - Option( - 'container_image_elasticsearch', - default=DEFAULT_ELASTICSEARCH_IMAGE, - desc='elasticsearch container image', - ), - Option( - 'container_image_jaeger_agent', - default=DEFAULT_JAEGER_AGENT_IMAGE, - desc='Jaeger agent container image', - ), - Option( - 'container_image_jaeger_collector', - default=DEFAULT_JAEGER_COLLECTOR_IMAGE, - desc='Jaeger collector container image', - ), - Option( - 'container_image_jaeger_query', - default=DEFAULT_JAEGER_QUERY_IMAGE, - desc='Jaeger query container image', - ), - Option( - 'container_image_samba', - default=DEFAULT_SAMBA_IMAGE, - desc='Samba/SMB container image', - ), - Option( - 'container_image_samba_metrics', - default=DEFAULT_SAMBA_METRICS_IMAGE, - desc='Samba/SMB metrics exporter container image', - ), - Option( 'warn_on_stray_hosts', type='bool', default=True, @@ -563,6 +453,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, desc="Default address for RedFish API (oob management)." ), ] + for image in DefaultImages: + MODULE_OPTIONS.append(Option(image.key, default=image.image_ref, desc=image.desc)) def __init__(self, *args: Any, **kwargs: Any): super(CephadmOrchestrator, self).__init__(*args, **kwargs) @@ -1903,7 +1795,7 @@ Then run the following: self.inventory.add_host(spec) self.offline_hosts_remove(spec.hostname) if spec.status == 'maintenance': - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Added host %s' % spec.hostname) return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr) @@ -2074,6 +1966,7 @@ Then run the following: self.ssh.reset_con(host) # if host was in offline host list, we should remove it now. self.offline_hosts_remove(host) + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Removed host %s' % host) return "Removed {} host '{}'".format('offline' if offline else '', host) @@ -2188,7 +2081,7 @@ Then run the following: self.log.info(msg) return msg - def _set_maintenance_healthcheck(self) -> None: + def set_maintenance_healthcheck(self) -> None: """Raise/update or clear the maintenance health check as needed""" in_maintenance = self.inventory.get_host_with_state("maintenance") @@ -2272,12 +2165,12 @@ Then run the following: self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode' @handle_orch_error @host_exists() - def exit_host_maintenance(self, hostname: str) -> str: + def exit_host_maintenance(self, hostname: str, force: bool = False, offline: bool = False) -> str: """Exit maintenance mode and return a host to an operational state Returning from maintenance will enable the clusters systemd target and @@ -2285,6 +2178,8 @@ Then run the following: host has osd daemons :param hostname: (str) host name + :param force: (bool) force removal of the host from maintenance mode + :param offline: (bool) to remove hosts that are offline from maintenance mode :raises OrchestratorError: Unable to return from maintenance, or unset the noout flag @@ -2293,37 +2188,74 @@ Then run the following: if tgt_host['status'] != "maintenance": raise OrchestratorError(f"Host {hostname} is not in maintenance mode") - with self.async_timeout_handler(hostname, 'cephadm host-maintenance exit'): - outs, errs, _code = self.wait_async( - CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, - 'host-maintenance', ['exit'], error_ok=True)) - returned_msg = errs[0].split('\n')[-1] - if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'): - raise OrchestratorError( - f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}") - - if "osd" in self.cache.get_daemon_types(hostname): - crush_node = hostname if '.' not in hostname else hostname.split('.')[0] - rc, _out, _err = self.mon_command({ - 'prefix': 'osd unset-group', - 'flags': 'noout', - 'who': [crush_node], - 'format': 'json' - }) - if rc: + # Given we do not regularly check maintenance mode hosts for being offline, + # we have no idea at this point whether the host is online or not. + # Keep in mind this goes both ways, as users could have run + # "ceph cephadm check-host <hostname>" when the host was in maintenance + # mode and offline and the host could have since come online. This following + # "cephadm check-host" command is being run purely so we know if the host + # is online or offline, as those should be handled differently + try: + with self.async_timeout_handler(hostname, 'cephadm check-host'): + outs, errs, _code = self.wait_async( + CephadmServe(self)._run_cephadm( + hostname, cephadmNoImage, + 'check-host', [], error_ok=False + ) + ) + except OrchestratorError: + pass + + host_offline = hostname in self.offline_hosts + + if host_offline and not offline: + raise OrchestratorValidationError( + f'{hostname} is offline, please use --offline and --force to take this host out of maintenance mode') + + if not host_offline and offline: + raise OrchestratorValidationError( + f'{hostname} is online, please take host out of maintenance mode without --offline.') + + if offline and not force: + raise OrchestratorValidationError("Taking an offline host out of maintenance mode requires --force") + + # no point trying these parts if we know the host is offline + if not host_offline: + with self.async_timeout_handler(hostname, 'cephadm host-maintenance exit'): + outs, errs, _code = self.wait_async( + CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, + 'host-maintenance', ['exit'], error_ok=True)) + returned_msg = errs[0].split('\n')[-1] + if (returned_msg.startswith('failed') or returned_msg.startswith('ERROR')): self.log.warning( - f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})") - raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})") - else: - self.log.info( - f"exit maintenance request has UNSET for the noout group on host {hostname}") + f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}") + if not force: + raise OrchestratorError( + f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}") + + if "osd" in self.cache.get_daemon_types(hostname): + crush_node = hostname if '.' not in hostname else hostname.split('.')[0] + rc, _out, _err = self.mon_command({ + 'prefix': 'osd unset-group', + 'flags': 'noout', + 'who': [crush_node], + 'format': 'json' + }) + if rc: + self.log.warning( + f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})") + if not force: + raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})") + else: + self.log.info( + f"exit maintenance request has UNSET for the noout group on host {hostname}") # update the host record status tgt_host['status'] = "" self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode" @@ -2528,7 +2460,7 @@ Then run the following: @handle_orch_error def service_action(self, action: str, service_name: str) -> List[str]: - if service_name not in self.spec_store.all_specs.keys(): + if service_name not in self.spec_store.all_specs.keys() and service_name != 'osd': raise OrchestratorError(f'Invalid service name "{service_name}".' + ' View currently running services using "ceph orch ls"') dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name) @@ -3479,6 +3411,33 @@ Then run the following: return f'Added setting {setting} with value {value} to tuned profile {profile_name}' @handle_orch_error + def tuned_profile_add_settings(self, profile_name: str, settings: dict) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f"Tuned profile {profile_name} does not exist. Cannot add setting." + ) + self.tuned_profiles.add_settings(profile_name, settings) + results = [ + f"Added setting {key} with value {value} to tuned profile {profile_name}" + for key, value in settings.items() + ] + self._kick_serve_loop() + return "\n".join(results) + + @handle_orch_error + def tuned_profile_rm_settings(self, profile_name: str, settings: List[str]) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f"Tuned profile {profile_name} does not exist. Cannot remove setting." + ) + self.tuned_profiles.rm_settings(profile_name, settings) + results = [ + f'Removed setting {settings} from tuned profile {profile_name}' + ] + self._kick_serve_loop() + return "\n".join(results) + + @handle_orch_error def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> str: if profile_name not in self.tuned_profiles: raise OrchestratorError( @@ -3966,6 +3925,51 @@ Then run the following: return self.to_remove_osds.all_osds() @handle_orch_error + def set_osd_spec(self, service_name: str, osd_ids: List[str]) -> str: + """ + Update unit.meta file for osd with service name + """ + if service_name not in self.spec_store: + raise OrchestratorError(f"Cannot find service '{service_name}' in the inventory. " + "Please try again after applying an OSD service that matches " + "the service name to which you want to attach OSDs.") + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd') + update_osd = defaultdict(list) + for daemon in daemons: + if daemon.daemon_id in osd_ids and daemon.hostname: + update_osd[daemon.hostname].append(daemon.daemon_id) + + if not update_osd: + raise OrchestratorError(f"Unable to find OSDs: {osd_ids}") + + failed_osds = [] + success_osds = [] + for host in update_osd: + osds = ",".join(update_osd[host]) + # run cephadm command with all host osds on specific host, + # if it fails, continue with other hosts + try: + with self.async_timeout_handler(host): + outs, errs, _code = self.wait_async( + CephadmServe(self)._run_cephadm(host, + cephadmNoImage, + 'update-osd-service', + ['--service-name', service_name, '--osd-ids', osds])) + if _code: + self.log.error(f"Failed to update service for {osds} osd. Cephadm error: {errs}") + failed_osds.extend(update_osd[host]) + else: + success_osds.extend(update_osd[host]) + except Exception: + self.log.exception(f"Failed to set service name for {osds}") + failed_osds.extend(update_osd[host]) + self.cache.invalidate_host_daemons(host) + self._kick_serve_loop() + return f"Updated service for osd {','.join(success_osds)}" + (f" and failed for {','.join(failed_osds)}" if failed_osds else "") + + @handle_orch_error + @host_exists() def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str: """ Drain all daemons from a host. diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 98d2fe99897..04d3712c50a 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -385,6 +385,8 @@ class HostAssignment(object): def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: for subnet in subnets: + # to normalize subnet + subnet = str(ipaddress.ip_network(subnet)) ips: List[str] = [] # following is to allow loopback interfaces for both ipv4 and ipv6. Since we # only have the subnet (and no IP) we assume default loopback IP address. diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 4a7959ae045..8e9cd00fa81 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1436,8 +1436,24 @@ class CephadmServe: config_blobs=daemon_spec.final_config, ).dump_json_str(), use_current_daemon_image=reconfig, + error_ok=True ) + # return number corresponding to DAEMON_FAILED_ERROR + # in src/cephadm/cephadmlib/constants. + # TODO: link these together so one cannot be changed without the other + if code == 17: + # daemon failed on systemctl start command, meaning while + # deployment failed the daemon is present and we should handle + # this as if the deploy command "succeeded" and mark the daemon + # as failed later when we fetch its status + self.mgr.log.error(f'Deployment of {daemon_spec.name()} failed during "systemctl start" command') + elif code: + # some other failure earlier in the deploy process. Just raise an exception + # the same as we would in _run_cephadm on a nonzero rc + raise OrchestratorError( + f'cephadm exited with an error code: {code}, stderr: {err}') + if daemon_spec.daemon_type == 'agent': self.mgr.agent_cache.agent_timestamp[daemon_spec.host] = datetime_now() self.mgr.agent_cache.agent_counter[daemon_spec.host] = 1 diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 9043577bc5a..4f83d7bb0fb 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1015,12 +1015,6 @@ class RgwService(CephService): # set rgw_realm rgw_zonegroup and rgw_zone, if present self.set_realm_zg_zone(spec) - if spec.generate_cert and not spec.rgw_frontend_ssl_certificate: - # generate a self-signed cert for the rgw service - cert, key = self.mgr.cert_mgr.ssl_certs.generate_root_cert(custom_san_list=spec.zonegroup_hostnames) - spec.rgw_frontend_ssl_certificate = ''.join([key, cert]) - self.mgr.spec_store.save(spec) - if spec.rgw_frontend_ssl_certificate: if isinstance(spec.rgw_frontend_ssl_certificate, list): cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate) @@ -1068,6 +1062,19 @@ class RgwService(CephService): # and it matches the spec. port = spec.get_port() + if spec.generate_cert: + cert, key = self.mgr.cert_mgr.generate_cert( + daemon_spec.host, + self.mgr.inventory.get_addr(daemon_spec.host), + custom_san_list=spec.zonegroup_hostnames + ) + pem = ''.join([key, cert]) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'rgw/cert/{daemon_spec.name()}', + 'val': pem, + }) + # configure frontend args = [] ftype = spec.rgw_frontend_type or "beast" @@ -1078,7 +1085,10 @@ class RgwService(CephService): f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") else: args.append(f"ssl_port={port}") - args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + if spec.generate_cert: + args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}") + else: + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") @@ -1091,7 +1101,10 @@ class RgwService(CephService): args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s") else: args.append(f"port={port}s") # note the 's' suffix on port - args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + if spec.generate_cert: + args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}") + else: + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") @@ -1144,6 +1157,14 @@ class RgwService(CephService): 'value': str(spec.rgw_bucket_counters_cache_size), }) + if getattr(spec, 'disable_multisite_sync_traffic', None) is not None: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': daemon_name, + 'name': 'rgw_run_sync_thread', + 'value': 'false' if spec.disable_multisite_sync_traffic else 'true', + }) + daemon_spec.keyring = keyring daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) @@ -1180,6 +1201,10 @@ class RgwService(CephService): 'who': utils.name_to_config_section(daemon.name()), 'name': 'rgw_frontends', }) + self.mgr.check_mon_command({ + 'prefix': 'config-key rm', + 'key': f'rgw/cert/{daemon.name()}', + }) def ok_to_stop( self, diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 1b9cf618570..9c5b5a112f3 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -3,6 +3,7 @@ import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast +import ipaddress from mgr_module import HandleCommandResult @@ -57,6 +58,8 @@ class GrafanaService(CephadmService): if ip_to_bind_to: daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + if ipaddress.ip_network(grafana_ip).version == 6: + grafana_ip = f"[{grafana_ip}]" domain = self.mgr.get_fqdn(daemon_spec.host) mgmt_gw_ips = [] @@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService): addr = self.mgr.get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + ip_to_bind_to = '' + if spec.only_bind_port_on_networks and spec.networks: + assert daemon_spec.host is not None + ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or '' + if ip_to_bind_to: + daemon_spec.port_ips = {str(port): ip_to_bind_to} + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() @@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService): }, 'peers': peers, 'web_config': '/etc/alertmanager/web.yml', - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) else: return { @@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService): "alertmanager.yml": yml }, "peers": peers, - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 4451e29878d..8acec94f382 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -38,6 +38,8 @@ class NvmeofService(CephService): spec = cast(NvmeofServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) nvmeof_gw_id = daemon_spec.daemon_id host_ip = self.mgr.inventory.get_addr(daemon_spec.host) + map_addr = spec.addr_map.get(daemon_spec.host) if spec.addr_map else None + map_discovery_addr = spec.discovery_addr_map.get(daemon_spec.host) if spec.discovery_addr_map else None keyring = self.get_keyring_with_caps(self.get_auth_entity(nvmeof_gw_id), ['mon', 'profile rbd', @@ -45,10 +47,17 @@ class NvmeofService(CephService): # TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps transport_tcp_options = json.dumps(spec.transport_tcp_options) if spec.transport_tcp_options else None + iobuf_options = json.dumps(spec.iobuf_options) if spec.iobuf_options else None name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), nvmeof_gw_id) rados_id = name[len('client.'):] if name.startswith('client.') else name - addr = spec.addr or host_ip - discovery_addr = spec.discovery_addr or host_ip + + # The address is first searched in the per node address map, + # then in the spec address configuration. + # If neither is defined, the host IP is used as a fallback. + addr = map_addr or spec.addr or host_ip + self.mgr.log.info(f"gateway address: {addr} from {map_addr=} {spec.addr=} {host_ip=}") + discovery_addr = map_discovery_addr or spec.discovery_addr or host_ip + self.mgr.log.info(f"discovery address: {discovery_addr} from {map_discovery_addr=} {spec.discovery_addr=} {host_ip=}") context = { 'spec': spec, 'name': name, @@ -59,6 +68,7 @@ class NvmeofService(CephService): 'rpc_socket_dir': '/var/tmp/', 'rpc_socket_name': 'spdk.sock', 'transport_tcp_options': transport_tcp_options, + 'iobuf_options': iobuf_options, 'rados_id': rados_id } gw_conf = self.mgr.template.render('services/nvmeof/ceph-nvmeof.conf.j2', context) @@ -66,6 +76,10 @@ class NvmeofService(CephService): daemon_spec.keyring = keyring daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf} + # Indicate to the daemon whether to utilize huge pages + if spec.spdk_mem_size: + daemon_spec.extra_files['spdk_mem_size'] = str(spec.spdk_mem_size) + if spec.enable_auth: if ( not spec.client_cert @@ -87,6 +101,9 @@ class NvmeofService(CephService): daemon_spec.extra_files['client_key'] = spec.client_key daemon_spec.extra_files['root_ca_cert'] = spec.root_ca_cert + if spec.encryption_key: + daemon_spec.extra_files['encryption_key'] = spec.encryption_key + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) daemon_spec.deps = [] return daemon_spec @@ -185,19 +202,21 @@ class NvmeofService(CephService): # to clean the keyring up super().post_remove(daemon, is_failed_deploy=is_failed_deploy) service_name = daemon.service_name() + daemon_name = daemon.name() # remove config for dashboard nvmeof gateways if any - ret, out, err = self.mgr.mon_command({ + ret, _, err = self.mgr.mon_command({ 'prefix': 'dashboard nvmeof-gateway-rm', 'name': service_name, + 'daemon_name': daemon_name }) if not ret: - logger.info(f'{daemon.hostname} removed from nvmeof gateways dashboard config') + logger.info(f'{daemon_name} removed from nvmeof gateways dashboard config') spec = cast(NvmeofServiceSpec, self.mgr.spec_store.all_specs.get(daemon.service_name(), None)) if not spec: - self.mgr.log.error(f'Failed to find spec for {daemon.name()}') + self.mgr.log.error(f'Failed to find spec for {daemon_name}') return pool = spec.pool group = spec.group diff --git a/src/pybind/mgr/cephadm/ssh.py b/src/pybind/mgr/cephadm/ssh.py index 1622cb001ab..acb5a77c51b 100644 --- a/src/pybind/mgr/cephadm/ssh.py +++ b/src/pybind/mgr/cephadm/ssh.py @@ -358,7 +358,7 @@ class SSHManager: await self._check_execute_command(host, chown, addr=addr) chmod = RemoteCommand(Executables.CHMOD, [oct(mode)[2:], tmp_path]) await self._check_execute_command(host, chmod, addr=addr) - mv = RemoteCommand(Executables.MV, [tmp_path, path]) + mv = RemoteCommand(Executables.MV, ['-Z', tmp_path, path]) await self._check_execute_command(host, mv, addr=addr) except Exception as e: msg = f"Unable to write {host}:{path}: {e}" diff --git a/src/pybind/mgr/cephadm/ssl_cert_utils.py b/src/pybind/mgr/cephadm/ssl_cert_utils.py index 930b276c8de..467b32a4df0 100644 --- a/src/pybind/mgr/cephadm/ssl_cert_utils.py +++ b/src/pybind/mgr/cephadm/ssl_cert_utils.py @@ -70,7 +70,12 @@ class SSLCerts: return (cert_str, key_str) - def generate_cert(self, _hosts: Union[str, List[str]], _addrs: Union[str, List[str]]) -> Tuple[str, str]: + def generate_cert( + self, + _hosts: Union[str, List[str]], + _addrs: Union[str, List[str]], + custom_san_list: Optional[List[str]] = None, + ) -> Tuple[str, str]: addrs = [_addrs] if isinstance(_addrs, str) else _addrs hosts = [_hosts] if isinstance(_hosts, str) else _hosts @@ -97,6 +102,8 @@ class SSLCerts: san_list: List[x509.GeneralName] = [x509.DNSName(host) for host in hosts] if valid_ips: san_list.extend(ips) + if custom_san_list: + san_list.extend([x509.DNSName(n) for n in custom_san_list]) builder = builder.add_extension( x509.SubjectAlternativeName( diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 index de993cb6ce3..b6955caf616 100644 --- a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 @@ -8,6 +8,8 @@ global: tls_config: {% if security_enabled %} ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key {% else %} insecure_skip_verify: true {% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 index b9773ceeeb3..14af0fd48ca 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 @@ -9,6 +9,7 @@ events { http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; diff --git a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 index 03ff8a32ca2..b85ccd7b7fb 100644 --- a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 @@ -2,7 +2,9 @@ NFS_CORE_PARAM { Enable_NLM = {{ enable_nlm }}; Enable_RQUOTA = false; - Protocols = 4; + Protocols = 3, 4; + mount_path_pseudo = true; + Enable_UDP = false; NFS_Port = {{ port }}; allow_set_io_flusher_fail = true; {% if bind_addr %} diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index 760bc97e515..2a9ab309568 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -8,10 +8,17 @@ enable_auth = {{ spec.enable_auth }} state_update_notify = {{ spec.state_update_notify }} state_update_interval_sec = {{ spec.state_update_interval_sec }} enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }} +enable_key_encryption = {{ spec.enable_key_encryption }} +encryption_key = /encryption.key +rebalance_period_sec = {{ spec.rebalance_period_sec }} +max_gws_in_grp = {{ spec.max_gws_in_grp }} +max_ns_to_change_lb_grp = {{ spec.max_ns_to_change_lb_grp }} enable_prometheus_exporter = {{ spec.enable_prometheus_exporter }} prometheus_exporter_ssl = False -prometheus_port = 10008 +prometheus_port = {{ spec.prometheus_port }} +prometheus_stats_interval = {{ spec.prometheus_stats_interval }} verify_nqns = {{ spec.verify_nqns }} +verify_keys = {{ spec.verify_keys }} omap_file_lock_duration = {{ spec.omap_file_lock_duration }} omap_file_lock_retries = {{ spec.omap_file_lock_retries }} omap_file_lock_retry_sleep_interval = {{ spec.omap_file_lock_retry_sleep_interval }} @@ -22,6 +29,10 @@ ping_spdk_under_lock = {{ spec.ping_spdk_under_lock }} enable_monitor_client = {{ spec.enable_monitor_client }} max_hosts_per_namespace = {{ spec.max_hosts_per_namespace }} max_namespaces_with_netmask = {{ spec.max_namespaces_with_netmask }} +max_subsystems = {{ spec.max_subsystems }} +max_namespaces = {{ spec.max_namespaces }} +max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }} +max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }} [gateway-logs] log_level = {{ spec.log_level }} @@ -65,6 +76,9 @@ protocol_log_level = {{ spec.spdk_protocol_log_level }} log_file_dir = {{ spec.spdk_log_file_dir }} {% endif %} conn_retries = {{ spec.conn_retries }} +{% if spec.spdk_mem_size %} +mem_size = {{ spec.spdk_mem_size }} +{% endif %} transports = {{ spec.transports }} {% if transport_tcp_options %} transport_tcp_options = {{ transport_tcp_options }} @@ -72,6 +86,9 @@ transport_tcp_options = {{ transport_tcp_options }} {% if spec.tgt_cmd_extra_args %} tgt_cmd_extra_args = {{ spec.tgt_cmd_extra_args }} {% endif %} +{% if iobuf_options %} +iobuf_options = {{ iobuf_options }} +{% endif %} [monitor] timeout = {{ spec.monitor_timeout }} diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index ecfd899af71..961da145dac 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -28,6 +28,8 @@ alerting: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} - scheme: http http_sd_configs: @@ -56,6 +58,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} honor_labels: true http_sd_configs: @@ -81,6 +85,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ node_exporter_sd_url }} @@ -104,6 +110,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ haproxy_sd_url }} @@ -128,6 +136,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} honor_labels: true http_sd_configs: @@ -149,6 +159,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ nvmeof_sd_url }} @@ -169,6 +181,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ nfs_sd_url }} @@ -189,6 +203,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ smb_sd_url }} diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 975c125225d..22bd26def91 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -136,7 +136,7 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str mock.call(host, 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False), - mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, use_current_daemon_image=False), + mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, error_ok=True, use_current_daemon_image=False), mock.call(host, 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False), @@ -499,7 +499,7 @@ class TestCephadm(object): CephadmServe(cephadm_module)._check_daemons() - assert _save_host.called_with('test') + _save_host.assert_called_with('test') assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None @mock.patch("cephadm.serve.CephadmServe._run_cephadm") @@ -563,6 +563,7 @@ class TestCephadm(object): }, }, }), + error_ok=True, use_current_daemon_image=True, ) @@ -618,6 +619,7 @@ class TestCephadm(object): "crush_location": "datacenter=a", }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -660,6 +662,7 @@ class TestCephadm(object): "keyring": "[client.crash.test]\nkey = None\n", }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -702,6 +705,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -752,6 +756,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -806,6 +811,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1735,16 +1741,23 @@ class TestCephadm(object): nvmeof_client_cert = 'fake-nvmeof-client-cert' nvmeof_server_cert = 'fake-nvmeof-server-cert' nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert' + grafana_cert_host_1 = 'grafana-cert-host-1' + grafana_cert_host_2 = 'grafana-cert-host-2' cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_root_ca_cert', nvmeof_root_ca_cert, service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_1, host='host-1', user_made=True) + cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_2, host='host-2', user_made=True) expected_calls = [ mock.call(f'{CERT_STORE_CERT_PREFIX}rgw_frontend_ssl_cert', json.dumps({'rgw.foo': Cert(rgw_frontend_rgw_foo_host2_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_server_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_server_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_client_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_client_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_root_ca_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_root_ca_cert, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json(), + 'host-2': Cert(grafana_cert_host_2, True).to_json()})) ] _set_store.assert_has_calls(expected_calls) @@ -1789,17 +1802,23 @@ class TestCephadm(object): cephadm_module.cert_key_store._init_known_cert_key_dicts() grafana_host1_key = 'fake-grafana-host1-key' + grafana_host2_key = 'fake-grafana-host2-key' nvmeof_client_key = 'nvmeof-client-key' nvmeof_server_key = 'nvmeof-server-key' - grafana_host1_key = 'fake-grafana-host1-cert' + nvmeof_encryption_key = 'nvmeof-encryption-key' cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') + cephadm_module.cert_key_store.save_key('grafana_key', grafana_host2_key, host='host2') cephadm_module.cert_key_store.save_key('nvmeof_client_key', nvmeof_client_key, service_name='nvmeof.foo') cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo') + cephadm_module.cert_key_store.save_key('nvmeof_encryption_key', nvmeof_encryption_key, service_name='nvmeof.foo') expected_calls = [ mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json()})), + mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json(), + 'host2': PrivKey(grafana_host2_key).to_json()})), mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()})), mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()})), + mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_encryption_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_encryption_key).to_json()})), ] _set_store.assert_has_calls(expected_calls) @@ -1816,15 +1835,19 @@ class TestCephadm(object): 'ingress_ssl_key': False, 'nvmeof_client_key': False, 'nvmeof_server_key': False, + 'nvmeof_encryption_key': False, } assert cephadm_module.cert_key_store.key_ls() == expected_ls cephadm_module.cert_key_store.save_key('nvmeof_client_key', 'xxx', service_name='nvmeof.foo') cephadm_module.cert_key_store.save_key('nvmeof_server_key', 'xxx', service_name='nvmeof.foo') + cephadm_module.cert_key_store.save_key('nvmeof_encryption_key', 'xxx', service_name='nvmeof.foo') expected_ls['nvmeof_server_key'] = {} expected_ls['nvmeof_server_key']['nvmeof.foo'] = True expected_ls['nvmeof_client_key'] = {} expected_ls['nvmeof_client_key']['nvmeof.foo'] = True + expected_ls['nvmeof_encryption_key'] = {} + expected_ls['nvmeof_encryption_key']['nvmeof.foo'] = True assert cephadm_module.cert_key_store.key_ls() == expected_ls @mock.patch("cephadm.module.CephadmOrchestrator.get_store_prefix") @@ -1838,6 +1861,7 @@ class TestCephadm(object): nvmeof_root_ca_cert = 'nvmeof-root-ca-cert' nvmeof_server_key = 'nvmeof-server-key' nvmeof_client_key = 'nvmeof-client-key' + nvmeof_encryption_key = 'nvmeof-encryption-key' def _fake_prefix_store(key): if key == 'cert_store.cert.': @@ -1852,6 +1876,7 @@ class TestCephadm(object): f'{CERT_STORE_KEY_PREFIX}grafana_key': json.dumps({'host1': PrivKey(grafana_host1_key).to_json()}), f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key': json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()}), f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key': json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()}), + f'{CERT_STORE_KEY_PREFIX}nvmeof_encryption_key': json.dumps({'nvmeof.foo': PrivKey(nvmeof_encryption_key).to_json()}), } else: raise Exception(f'Get store with unexpected value {key}') @@ -1865,6 +1890,7 @@ class TestCephadm(object): assert cephadm_module.cert_key_store.known_keys['grafana_key']['host1'] == PrivKey(grafana_host1_key) assert cephadm_module.cert_key_store.known_keys['nvmeof_server_key']['nvmeof.foo'] == PrivKey(nvmeof_server_key) assert cephadm_module.cert_key_store.known_keys['nvmeof_client_key']['nvmeof.foo'] == PrivKey(nvmeof_client_key) + assert cephadm_module.cert_key_store.known_keys['nvmeof_encryption_key']['nvmeof.foo'] == PrivKey(nvmeof_encryption_key) def test_cert_store_get_cert_key(self, cephadm_module: CephadmOrchestrator): cephadm_module.cert_key_store._init_known_cert_key_dicts() @@ -1892,13 +1918,16 @@ class TestCephadm(object): grafana_host1_key = 'fake-grafana-host1-cert' nvmeof_server_key = 'nvmeof-server-key' + nvmeof_encryption_key = 'nvmeof-encryption-key' cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo') + cephadm_module.cert_key_store.save_key('nvmeof_encryption_key', nvmeof_encryption_key, service_name='nvmeof.foo') assert cephadm_module.cert_key_store.get_key('grafana_key', host='host1') == grafana_host1_key assert cephadm_module.cert_key_store.get_key('nvmeof_server_key', service_name='nvmeof.foo') == nvmeof_server_key assert cephadm_module.cert_key_store.get_key('nvmeof_client_key', service_name='nvmeof.foo') == '' + assert cephadm_module.cert_key_store.get_key('nvmeof_encryption_key', service_name='nvmeof.foo') == nvmeof_encryption_key with pytest.raises(OrchestratorError, match='Attempted to access priv key for unknown entity'): cephadm_module.cert_key_store.get_key('unknown_entity') @@ -2720,6 +2749,7 @@ Traceback (most recent call last): cephadm_module.cache.facts = facts assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + @mock.patch("cephadm.CephadmOrchestrator.set_maintenance_healthcheck", lambda _: None) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) def test_tuned_profiles_validation(self, cephadm_module): with with_host(cephadm_module, 'test'): @@ -2840,16 +2870,23 @@ Traceback (most recent call last): with cephadm_module.async_timeout_handler('hostC', 'very slow', 999): cephadm_module.wait_async(_timeout()) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) @mock.patch("cephadm.CephadmOrchestrator.remove_osds") @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None) @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: []) def test_host_drain_zap(self, _rm_osds, cephadm_module): # pass force=true in these tests to bypass _admin label check - cephadm_module.drain_host('host1', force=True, zap_osd_devices=False) - assert _rm_osds.called_with([], zap=False) + with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True): + cephadm_module.drain_host('test', force=True, zap_osd_devices=False) + _rm_osds.assert_called_with([], zap=False) + + with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True): + cephadm_module.drain_host('test', force=True, zap_osd_devices=True) + _rm_osds.assert_called_with([], zap=True) - cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) - assert _rm_osds.called_with([], zap=True) + with pytest.raises(OrchestratorError, match=r"Cannot find host 'host1' in the inventory."): + cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) + _rm_osds.assert_called_with([], zap=True) def test_process_ls_output(self, cephadm_module): sample_ls_output = """[ diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 84d7c8f5b13..d872219df80 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -349,6 +349,7 @@ log_to_file = False""" }, } }), + error_ok=True, use_current_daemon_image=False, ) @@ -397,10 +398,17 @@ enable_auth = False state_update_notify = True state_update_interval_sec = 5 enable_spdk_discovery_controller = False +enable_key_encryption = True +encryption_key = /encryption.key +rebalance_period_sec = 7 +max_gws_in_grp = 16 +max_ns_to_change_lb_grp = 8 enable_prometheus_exporter = True prometheus_exporter_ssl = False prometheus_port = 10008 +prometheus_stats_interval = 10 verify_nqns = True +verify_keys = True omap_file_lock_duration = 20 omap_file_lock_retries = 30 omap_file_lock_retry_sleep_interval = 1.0 @@ -409,8 +417,12 @@ allowed_consecutive_spdk_ping_failures = 1 spdk_ping_interval_in_seconds = 2.0 ping_spdk_under_lock = False enable_monitor_client = True -max_hosts_per_namespace = 1 +max_hosts_per_namespace = 8 max_namespaces_with_netmask = 1000 +max_subsystems = 128 +max_namespaces = 1024 +max_namespaces_per_subsystem = 256 +max_hosts_per_subsystem = 32 [gateway-logs] log_level = INFO @@ -489,6 +501,7 @@ timeout = 1.0\n""" } } }), + error_ok=True, use_current_daemon_image=False, ) @@ -568,7 +581,14 @@ class TestMonitoring: mock_getfqdn.return_value = purl.hostname with with_host(cephadm_module, "test"): - with with_service(cephadm_module, AlertManagerSpec()): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['1.2.3.0/24'], + only_bind_port_on_networks=True)): y = dedent(self._get_config(expected_yaml_url)).lstrip() _run_cephadm.assert_called_with( 'test', @@ -582,11 +602,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "1.2.3.1"}, }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': '1.2.3.1', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -599,8 +620,10 @@ class TestMonitoring: }, "peers": [], "use_url_prefix": False, + "ip_to_bind_to": "1.2.3.1", } }), + error_ok=True, use_current_daemon_image=False, ) @@ -620,8 +643,16 @@ class TestMonitoring: cephadm_module.secure_monitoring_stack = True cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + + cephadm_module.cache.update_host_networks('test', { + 'fd12:3456:789a::/64': { + 'if0': ['fd12:3456:789a::10'] + }, + }) with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \ - with_service(cephadm_module, AlertManagerSpec()): + with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['fd12:3456:789a::/64'], + only_bind_port_on_networks=True)): y = dedent(""" # This file is generated by cephadm. @@ -632,6 +663,8 @@ class TestMonitoring: http_config: tls_config: ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key route: receiver: 'default' @@ -672,11 +705,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "fd12:3456:789a::10"} }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': 'fd12:3456:789a::10', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -694,8 +728,10 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": True, + "ip_to_bind_to": "fd12:3456:789a::10", } }), + error_ok=True, use_current_daemon_image=False, ) @@ -726,6 +762,8 @@ class TestMonitoring: http_config: tls_config: ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key route: receiver: 'default' @@ -786,8 +824,10 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": False, + "ip_to_bind_to": "", } }), + error_ok=True, use_current_daemon_image=False, ) @@ -833,6 +873,7 @@ class TestMonitoring: "files": { "ceph-exporter.crt": "mycert", "ceph-exporter.key": "mykey"}}}), + error_ok=True, use_current_daemon_image=False) @patch("cephadm.serve.CephadmServe._run_cephadm") @@ -876,6 +917,7 @@ class TestMonitoring: }, "config_blobs": {} }), + error_ok=True, use_current_daemon_image=False, ) @@ -936,6 +978,7 @@ class TestMonitoring: 'web_config': '/etc/node-exporter/web.yml', } }), + error_ok=True, use_current_daemon_image=False, ) @@ -1071,6 +1114,7 @@ class TestMonitoring: "use_url_prefix": False }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1150,6 +1194,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key scrape_configs: - job_name: 'ceph' @@ -1171,6 +1217,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'node' relabel_configs: @@ -1189,6 +1237,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'haproxy' relabel_configs: @@ -1205,6 +1255,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'ceph-exporter' relabel_configs: @@ -1222,6 +1274,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'nvmeof' honor_labels: true @@ -1235,6 +1289,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'nfs' honor_labels: true @@ -1248,6 +1304,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'smb' honor_labels: true @@ -1261,6 +1319,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key """).lstrip() @@ -1303,6 +1363,7 @@ class TestMonitoring: "use_url_prefix": False }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1379,6 +1440,7 @@ class TestMonitoring: }, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1438,6 +1500,7 @@ class TestMonitoring: }, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1588,6 +1651,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1718,6 +1782,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1831,6 +1896,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2005,6 +2071,7 @@ spec: }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2044,6 +2111,26 @@ class TestRGWService: }) assert f == expected + @pytest.mark.parametrize( + "disable_sync_traffic", + [ + (True), + (False), + ] + ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_disable_sync_traffic(self, disable_sync_traffic, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + s = RGWSpec(service_id="foo", + disable_multisite_sync_traffic=disable_sync_traffic) + with with_service(cephadm_module, s) as dds: + _, f, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': f'client.{dds[0]}', + 'key': 'rgw_run_sync_thread', + }) + assert f == ('false' if disable_sync_traffic else 'true') + class TestMonService: @@ -2112,6 +2199,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2160,6 +2248,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2212,6 +2301,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2269,6 +2359,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3189,7 +3280,9 @@ class TestIngressService: 'NFS_CORE_PARAM {\n' ' Enable_NLM = true;\n' ' Enable_RQUOTA = false;\n' - ' Protocols = 4;\n' + ' Protocols = 3, 4;\n' + ' mount_path_pseudo = true;\n' + ' Enable_UDP = false;\n' ' NFS_Port = 2049;\n' ' allow_set_io_flusher_fail = true;\n' ' HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n' @@ -3359,6 +3452,7 @@ class TestJaeger: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3399,6 +3493,7 @@ class TestJaeger: }, "config_blobs": es_config, }), + error_ok=True, use_current_daemon_image=False, ) with with_service(cephadm_module, collector_spec): @@ -3427,6 +3522,7 @@ class TestJaeger: }, "config_blobs": collector_config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3467,6 +3563,7 @@ class TestJaeger: }, "config_blobs": collector_config, }), + error_ok=True, use_current_daemon_image=False, ) with with_service(cephadm_module, agent_spec): @@ -3495,6 +3592,7 @@ class TestJaeger: }, "config_blobs": agent_config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3552,6 +3650,7 @@ class TestCustomContainer: }, } ), + error_ok=True, use_current_daemon_image=False, ) @@ -3639,6 +3738,7 @@ class TestCustomContainer: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -3692,6 +3792,7 @@ class TestSMB: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -3764,6 +3865,7 @@ class TestSMB: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -3832,6 +3934,7 @@ class TestMgmtGateway: http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; @@ -4009,6 +4112,7 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -4078,6 +4182,7 @@ class TestMgmtGateway: http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; @@ -4350,6 +4455,7 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -4473,5 +4579,6 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) |