summaryrefslogtreecommitdiffstats
path: root/src/pybind/mgr/dashboard/services/service.py
blob: 9b789c0c85929b279fc5a91afca71532764fe02c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import json
import logging
import time
from subprocess import SubprocessError

try:
    from typing import Optional, Tuple
except ImportError:
    pass  # For typing only

from .. import mgr
from ..exceptions import DashboardException
from ..settings import Settings
from .orchestrator import OrchClient

logger = logging.getLogger('service')


class NoCredentialsException(Exception):
    def __init__(self):
        super(NoCredentialsException, self).__init__(
            'No RGW credentials found, '
            'please consult the documentation on how to enable RGW for '
            'the dashboard.')


def verify_service_restart(service_type: str, service_id: str):
    orch = OrchClient.instance()
    service_name = f'{service_type}.{service_id}'

    info = orch.services.get(service_name)[0].to_dict()
    last_refreshed = info['status']['last_refresh']

    orch.services.reload(service_type, service_id)

    wait_for_refresh(orch, service_name, last_refreshed)

    daemon_status = wait_for_daemon_to_start(service_name)
    return daemon_status


def wait_for_refresh(orch, service_name, last_refreshed):
    orch = OrchClient.instance()
    while True:
        updated_info = orch.services.get(service_name)[0].to_dict()
        if updated_info['status']['last_refresh'] != last_refreshed:
            break


def wait_for_daemon_to_start(service_name, timeout=30):
    orch = OrchClient.instance()
    start_time = time.time()

    while True:
        daemons = [d.to_dict() for d in orch.services.list_daemons(service_name=service_name)]
        logger.info("Daemon list for service %s: %s", service_name, daemons)

        if not daemons:
            logger.info("No daemons found for service %s. Retrying...", service_name)
            # Check if timeout has been reached
            daemon_start_time = time.time()
            if time.time() - daemon_start_time > timeout:
                logger.error("Timeout reached while waiting for daemon list for service %s", service_name)  # noqa E501  # pylint: disable=line-too-long
                raise DashboardException(
                    code='daemon_list_timeout',
                    msg="Timeout reached while waiting for daemon list for service %s." % service_name  # noqa E501  # pylint: disable=line-too-long
                )
            time.sleep(1)
            continue  # Retry getting daemon list

        all_running = True

        for daemon in daemons:
            daemon_state = daemon['status_desc']

            if daemon_state in ('unknown', 'error', 'stopped'):
                logger.error("Failed to restart daemon %s for service %s. State is %s", daemon['daemon_id'], service_name, daemon_state)  # noqa E501  # pylint: disable=line-too-long
                raise DashboardException(
                    code='daemon_restart_failed',
                    msg="Failed to restart the daemon %s. Daemon state is %s." % (daemon['daemon_id'], daemon_state)  # noqa E501  # pylint: disable=line-too-long
                )

            if daemon_state == 'starting':
                all_running = False

            elif daemon_state != 'running':
                all_running = False

        if all_running:
            logger.info("All daemons for service %s are running", service_name)
            return True

        if time.time() - start_time > timeout:
            logger.error("Timeout reached while waiting for daemon %s to start", service_name)
            raise DashboardException(
                code='daemon_restart_timeout',
                msg="Timeout reached while waiting for daemon %s to start." % service_name
            )

        time.sleep(1)  # Adding a short delay before retrying


class RgwServiceManager:
    user = 'dashboard'

    def find_available_port(self, starting_port=80):
        orch = OrchClient.instance()
        daemons = [d.to_dict() for d in orch.services.list_daemons(daemon_type='rgw')]
        used_ports = set()
        for daemon in daemons:
            ports = daemon.get('ports', [])
            if ports:
                used_ports.update(ports)
        port = starting_port
        while port in used_ports:
            port += 1
        return port

    def restart_rgw_daemons_and_set_credentials(self):
        # Restart RGW daemons and set credentials.
        logger.info("Restarting RGW daemons and setting credentials")
        orch = OrchClient.instance()
        services, _ = orch.services.list(service_type='rgw', offset=0)

        all_daemons_up = True
        for service in services:
            logger.info("Verifying service restart for: %s", service['service_id'])
            daemons_up = verify_service_restart('rgw', service['service_id'])
            if not daemons_up:
                logger.error("Service %s restart verification failed", service['service_id'])
                all_daemons_up = False

        if all_daemons_up:
            logger.info("All daemons are up, configuring RGW credentials")
            self.configure_rgw_credentials()
        else:
            logger.error("Not all daemons are up, skipping RGW credentials configuration")

    def _parse_secrets(self, user: str, data: dict) -> Tuple[str, str]:
        for key in data.get('keys', []):
            if key.get('user') == user and data.get('system') in ['true', True]:
                access_key = key.get('access_key')
                secret_key = key.get('secret_key')
                return access_key, secret_key
        return '', ''

    def _get_user_keys(self, user: str, realm: Optional[str] = None) -> Tuple[str, str]:
        access_key = ''
        secret_key = ''
        rgw_user_info_cmd = ['user', 'info', '--uid', user]
        cmd_realm_option = ['--rgw-realm', realm] if realm else []
        if realm:
            rgw_user_info_cmd += cmd_realm_option
        try:
            _, out, err = mgr.send_rgwadmin_command(rgw_user_info_cmd)
            if out:
                access_key, secret_key = self._parse_secrets(user, out)
            if not access_key:
                rgw_create_user_cmd = [
                    'user', 'create',
                    '--uid', user,
                    '--display-name', 'Ceph Dashboard',
                    '--system',
                ] + cmd_realm_option
                _, out, err = mgr.send_rgwadmin_command(rgw_create_user_cmd)
                if out:
                    access_key, secret_key = self._parse_secrets(user, out)
            if not access_key:
                logger.error('Unable to create rgw user "%s": %s', user, err)
        except SubprocessError as error:
            logger.exception(error)

        return access_key, secret_key

    def configure_rgw_credentials(self):
        logger.info('Configuring dashboard RGW credentials')
        realms = []
        access_key = ''
        secret_key = ''
        try:
            _, out, err = mgr.send_rgwadmin_command(['realm', 'list'])
            if out:
                realms = out.get('realms', [])
            if err:
                logger.error('Unable to list RGW realms: %s', err)
            if realms:
                realm_access_keys = {}
                realm_secret_keys = {}
                for realm in realms:
                    realm_access_key, realm_secret_key = self._get_user_keys(self.user, realm)
                    if realm_access_key:
                        realm_access_keys[realm] = realm_access_key
                        realm_secret_keys[realm] = realm_secret_key
                if realm_access_keys:
                    access_key = json.dumps(realm_access_keys)
                    secret_key = json.dumps(realm_secret_keys)
            else:
                access_key, secret_key = self._get_user_keys(self.user)

            assert access_key and secret_key
            Settings.RGW_API_ACCESS_KEY = access_key
            Settings.RGW_API_SECRET_KEY = secret_key
        except (AssertionError, SubprocessError) as error:
            logger.exception(error)
            raise NoCredentialsException