diff options
Diffstat (limited to 'src/pybind/mgr/cephadm/services/osd.py')
-rw-r--r-- | src/pybind/mgr/cephadm/services/osd.py | 50 |
1 files changed, 43 insertions, 7 deletions
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 75b3fc58c76..80bf92772c4 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -551,6 +551,12 @@ class RemoveUtil(object): "Zaps all devices that are associated with an OSD" if osd.hostname is not None: cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)] + if osd.replace_block: + cmd.append('--replace-block') + if osd.replace_db: + cmd.append('--replace-db') + if osd.replace_wal: + cmd.append('--replace-wal') if not osd.no_destroy: cmd.append('--destroy') with self.mgr.async_timeout_handler(osd.hostname, f'cephadm ceph-volume {" ".join(cmd)}'): @@ -618,10 +624,14 @@ class OSD: started: bool = False, stopped: bool = False, replace: bool = False, + replace_block: bool = False, + replace_db: bool = False, + replace_wal: bool = False, force: bool = False, hostname: Optional[str] = None, zap: bool = False, - no_destroy: bool = False): + no_destroy: bool = False, + original_weight: Optional[float] = None): # the ID of the OSD self.osd_id = osd_id @@ -648,6 +658,12 @@ class OSD: # If this is a replace or remove operation self.replace = replace + # If this is a block device replacement + self.replace_block = replace_block + # If this is a db device replacement + self.replace_db = replace_db + # If this is a wal device replacement + self.replace_wal = replace_wal # If we wait for the osd to be drained self.force = force # The name of the node @@ -656,7 +672,7 @@ class OSD: # mgr obj to make mgr/mon calls self.rm_util: RemoveUtil = remove_util - self.original_weight: Optional[float] = None + self.original_weight: Optional[float] = original_weight # Whether devices associated with the OSD should be zapped (DATA ERASED) self.zap = zap @@ -675,7 +691,7 @@ class OSD: if self.stopped: logger.debug(f"Won't start draining {self}. OSD draining is stopped.") return False - if self.replace: + if self.any_replace_params: self.rm_util.set_osd_flag([self], 'out') else: self.rm_util.reweight_osd(self, 0.0) @@ -685,7 +701,7 @@ class OSD: return True def stop_draining(self) -> bool: - if self.replace: + if self.any_replace_params: self.rm_util.set_osd_flag([self], 'in') else: if self.original_weight: @@ -763,6 +779,9 @@ class OSD: out['draining'] = self.draining out['stopped'] = self.stopped out['replace'] = self.replace + out['replace_block'] = self.replace_block + out['replace_db'] = self.replace_db + out['replace_wal'] = self.replace_wal out['force'] = self.force out['zap'] = self.zap out['hostname'] = self.hostname # type: ignore @@ -788,6 +807,13 @@ class OSD: inp['hostname'] = hostname return cls(**inp) + @property + def any_replace_params(self) -> bool: + return any([self.replace, + self.replace_block, + self.replace_db, + self.replace_wal]) + def __hash__(self) -> int: return hash(self.osd_id) @@ -811,7 +837,7 @@ class OSDRemovalQueue(object): # network calls, like mon commands. self.lock = Lock() - def process_removal_queue(self) -> None: + def process_removal_queue(self) -> bool: """ Performs actions in the _serve() loop to remove an OSD when criteria is met. @@ -819,6 +845,8 @@ class OSDRemovalQueue(object): we can't hold self.lock, as we're calling _remove_daemon in the loop """ + result: bool = False + # make sure that we don't run on OSDs that are not in the cluster anymore. self.cleanup() @@ -862,16 +890,23 @@ class OSDRemovalQueue(object): if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'): CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname) logger.info(f"Successfully removed {osd} on {osd.hostname}") + result = True else: logger.info(f"Daemon {osd} on {osd.hostname} was already removed") - if osd.replace: + any_replace_params: bool = any([osd.replace, + osd.replace_block, + osd.replace_db, + osd.replace_wal]) + if any_replace_params: # mark destroyed in osdmap if not osd.destroy(): raise orchestrator.OrchestratorError( f"Could not destroy {osd}") logger.info( f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement") + if any_replace_params: + osd.zap = True else: # purge from osdmap if not osd.purge(): @@ -883,7 +918,7 @@ class OSDRemovalQueue(object): logger.info(f"Zapping devices for {osd} on {osd.hostname}") osd.do_zap() logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") - + self.mgr.cache.invalidate_host_devices(osd.hostname) logger.debug(f"Removing {osd} from the queue.") # self could change while this is processing (osds get added from the CLI) @@ -892,6 +927,7 @@ class OSDRemovalQueue(object): with self.lock: self.osds.intersection_update(new_queue) self._save_to_store() + return result def cleanup(self) -> None: # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs |