From 575b7c9a98608c4f266d8e406118544916f1b8d6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 8 Nov 2021 12:04:27 -0500 Subject: [PATCH 1/2] mgr/orchestrator: pass 'force' flag down for remove_service Signed-off-by: Sage Weil --- src/pybind/mgr/cephadm/module.py | 2 +- src/pybind/mgr/orchestrator/_interface.py | 2 +- src/pybind/mgr/orchestrator/module.py | 2 +- src/pybind/mgr/rook/module.py | 2 +- src/pybind/mgr/test_orchestrator/module.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 27032cb873c..145bcd4175c 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1917,7 +1917,7 @@ Then run the following: return self._remove_daemons(args) @handle_orch_error - def remove_service(self, service_name: str) -> str: + def remove_service(self, service_name: str, force: bool = False) -> str: self.log.info('Remove service %s' % service_name) self._trigger_preview_refresh(service_name=service_name) if service_name in self.spec_store: diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 7fa3b5f3409..9f86abe8db0 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -490,7 +490,7 @@ class Orchestrator(object): """ raise NotImplementedError() - def remove_service(self, service_name: str) -> OrchResult[str]: + def remove_service(self, service_name: str, force: bool = False) -> OrchResult[str]: """ Remove a service (a collection of daemons). diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index a3d809a68cb..20bab4202a9 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -1019,7 +1019,7 @@ Usage: """Remove a service""" if service_name in ['mon', 'mgr'] and not force: raise OrchestratorError('The mon and mgr services cannot be removed') - completion = self.remove_service(service_name) + completion = self.remove_service(service_name, force=force) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index c0c1c32c9f2..93021d0c2ef 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -463,7 +463,7 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): return num_replicas, leaf_type @handle_orch_error - def remove_service(self, service_name: str) -> str: + def remove_service(self, service_name: str, force: bool = False) -> str: if service_name == 'rbd-mirror': return self.rook_cluster.rm_service('cephrbdmirrors', 'default-rbd-mirror') service_type, service_name = service_name.split('.', 1) diff --git a/src/pybind/mgr/test_orchestrator/module.py b/src/pybind/mgr/test_orchestrator/module.py index 9d172737777..d89c23bf159 100644 --- a/src/pybind/mgr/test_orchestrator/module.py +++ b/src/pybind/mgr/test_orchestrator/module.py @@ -227,7 +227,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator): return 'done' @handle_orch_error - def remove_service(self, service_name): + def remove_service(self, service_name, force = False): assert isinstance(service_name, str) return 'done' From bda5c304f33cb7a401a289d92f35b5d15f2fce15 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 5 Nov 2021 11:39:07 -0400 Subject: [PATCH 2/2] mgr/cephadm: allow osd spec removal OSD specs/drivegroups are essentially templates for OSD creation but do not map to the full lifecycle of the OSDs that they create. When a spec is removed, remove it immediately. If no --force is provided, the error lists which OSDs will be left behind. If --force is passed, the service is removed. This leaves behind a few oddities: - When you list services, OSDs that were created by the drivegroup may still exist, causing the drivegroup to appear in the list as unmanaged services. - If you create a new drivegroup with the same name, the prior OSDs will appear to belong to the new spec instance, regardless of whether the spec/drivegroup parameters are the same. Signed-off-by: Sage Weil --- src/pybind/mgr/cephadm/module.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 145bcd4175c..5ebe44038c6 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1925,9 +1925,9 @@ Then run the following: return f'Unable to remove {service_name} service.\n' \ f'Note, you might want to mark the {service_name} service as "unmanaged"' - # Report list of affected OSDs - osds_msg = {} - if service_name.startswith('osd.'): + # Report list of affected OSDs? + if not force and service_name.startswith('osd.'): + osds_msg = {} for h, dm in self.cache.get_daemons_with_volatile_status(): osds_to_remove = [] for name, dd in dm.items(): @@ -1935,23 +1935,17 @@ Then run the following: osds_to_remove.append(str(dd.daemon_id)) if osds_to_remove: osds_msg[h] = osds_to_remove - - found = self.spec_store.rm(service_name) or osds_msg - if found: - self._kick_serve_loop() if osds_msg: - return f'The service {service_name} will be deleted once the following OSDs: {osds_msg} ' \ - f'will be deleted manually.' - else: - return f'Removed service {service_name}' - else: - # must be idempotent: still a success. - try: - self.cache.get_daemon(service_name) - return (f'Failed to remove service <{service_name}>. "{service_name}" is the name of a daemon, not a service. ' - + 'Running service names can be found with "ceph orch ls"') - except OrchestratorError: - return f'Failed to remove service. <{service_name}> was not found. Running service names can be found with "ceph orch ls"' + msg = '' + for h, ls in osds_msg.items(): + msg += f'\thost {h}: {" ".join([f"osd.{id}" for id in ls])}' + raise OrchestratorError(f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}') + + found = self.spec_store.rm(service_name) + if found and service_name.startswith('osd.'): + self.spec_store.finally_rm(service_name) + self._kick_serve_loop() + return f'Removed service {service_name}' @handle_orch_error def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh: bool = False) -> List[orchestrator.InventoryHost]: