diff options
Diffstat (limited to 'src/pybind/mgr/dashboard/services/service.py')
-rw-r--r-- | src/pybind/mgr/dashboard/services/service.py | 61 |
1 files changed, 43 insertions, 18 deletions
diff --git a/src/pybind/mgr/dashboard/services/service.py b/src/pybind/mgr/dashboard/services/service.py index 792604dcc59..9b789c0c859 100644 --- a/src/pybind/mgr/dashboard/services/service.py +++ b/src/pybind/mgr/dashboard/services/service.py @@ -28,68 +28,94 @@ def verify_service_restart(service_type: str, service_id: str): orch = OrchClient.instance() service_name = f'{service_type}.{service_id}' - logger.info("Getting initial service info for: %s", service_name) info = orch.services.get(service_name)[0].to_dict() last_refreshed = info['status']['last_refresh'] - logger.info("Reloading service: %s", service_name) orch.services.reload(service_type, service_id) - logger.info("Waiting for service refresh: %s", service_name) wait_for_refresh(orch, service_name, last_refreshed) - logger.info("Checking daemon status for: %s", service_name) - daemon_status = wait_for_daemon_to_start(orch, service_name) + daemon_status = wait_for_daemon_to_start(service_name) return daemon_status def wait_for_refresh(orch, service_name, last_refreshed): orch = OrchClient.instance() - logger.info("Waiting for service %s to refresh", service_name) - while True: updated_info = orch.services.get(service_name)[0].to_dict() if updated_info['status']['last_refresh'] != last_refreshed: - logger.info("Service %s refreshed", service_name) break -def wait_for_daemon_to_start(orch, service_name): +def wait_for_daemon_to_start(service_name, timeout=30): orch = OrchClient.instance() start_time = time.time() - logger.info("Waiting for daemon %s to start", service_name) while True: daemons = [d.to_dict() for d in orch.services.list_daemons(service_name=service_name)] + logger.info("Daemon list for service %s: %s", service_name, daemons) + + if not daemons: + logger.info("No daemons found for service %s. Retrying...", service_name) + # Check if timeout has been reached + daemon_start_time = time.time() + if time.time() - daemon_start_time > timeout: + logger.error("Timeout reached while waiting for daemon list for service %s", service_name) # noqa E501 # pylint: disable=line-too-long + raise DashboardException( + code='daemon_list_timeout', + msg="Timeout reached while waiting for daemon list for service %s." % service_name # noqa E501 # pylint: disable=line-too-long + ) + time.sleep(1) + continue # Retry getting daemon list + all_running = True for daemon in daemons: daemon_state = daemon['status_desc'] - logger.debug("Daemon %s state: %s", daemon['daemon_id'], daemon_state) if daemon_state in ('unknown', 'error', 'stopped'): logger.error("Failed to restart daemon %s for service %s. State is %s", daemon['daemon_id'], service_name, daemon_state) # noqa E501 # pylint: disable=line-too-long raise DashboardException( code='daemon_restart_failed', - msg="Failed to restart the daemon %s. Daemon state is %s." % (service_name, daemon_state) # noqa E501 # pylint: disable=line-too-long + msg="Failed to restart the daemon %s. Daemon state is %s." % (daemon['daemon_id'], daemon_state) # noqa E501 # pylint: disable=line-too-long ) - if daemon_state != 'running': + + if daemon_state == 'starting': + all_running = False + + elif daemon_state != 'running': all_running = False if all_running: logger.info("All daemons for service %s are running", service_name) return True - if time.time() - start_time > 10: + if time.time() - start_time > timeout: logger.error("Timeout reached while waiting for daemon %s to start", service_name) raise DashboardException( code='daemon_restart_timeout', msg="Timeout reached while waiting for daemon %s to start." % service_name ) - return False + + time.sleep(1) # Adding a short delay before retrying class RgwServiceManager: + user = 'dashboard' + + def find_available_port(self, starting_port=80): + orch = OrchClient.instance() + daemons = [d.to_dict() for d in orch.services.list_daemons(daemon_type='rgw')] + used_ports = set() + for daemon in daemons: + ports = daemon.get('ports', []) + if ports: + used_ports.update(ports) + port = starting_port + while port in used_ports: + port += 1 + return port + def restart_rgw_daemons_and_set_credentials(self): # Restart RGW daemons and set credentials. logger.info("Restarting RGW daemons and setting credentials") @@ -148,7 +174,6 @@ class RgwServiceManager: def configure_rgw_credentials(self): logger.info('Configuring dashboard RGW credentials') - user = 'dashboard' realms = [] access_key = '' secret_key = '' @@ -162,7 +187,7 @@ class RgwServiceManager: realm_access_keys = {} realm_secret_keys = {} for realm in realms: - realm_access_key, realm_secret_key = self._get_user_keys(user, realm) + realm_access_key, realm_secret_key = self._get_user_keys(self.user, realm) if realm_access_key: realm_access_keys[realm] = realm_access_key realm_secret_keys[realm] = realm_secret_key @@ -170,7 +195,7 @@ class RgwServiceManager: access_key = json.dumps(realm_access_keys) secret_key = json.dumps(realm_secret_keys) else: - access_key, secret_key = self._get_user_keys(user) + access_key, secret_key = self._get_user_keys(self.user) assert access_key and secret_key Settings.RGW_API_ACCESS_KEY = access_key |