From 1fee255ee4ceab99684c34e3e64532b2eb555a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=8E=AE=E6=96=87?= Date: Sun, 13 Jun 2021 14:23:56 +0800 Subject: [PATCH] cephadm: workaround unit replace failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should be a bug in systemd. It failed to cleanup cgroups when stop the unit. Then if we start a new unit with the same name, the 'ExecStartPre' command will fail with status=219/CGROUP (Only when systemd unified cgroup hierarchy is enabled), because cgroup v2 does not allow process in non-leaf group. This should be fixed in systemd commit e08dabfec7304dfa0d59997dc4219ffaf22af717. By now, we just remove these left over cgroups before start new unit. Fixes: https://tracker.ceph.com/issues/50998 Signed-off-by: 胡玮文 --- src/cephadm/cephadm | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index ffbcbb567de..0686b15ac3e 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -2704,6 +2704,35 @@ def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, backgro + (' &' if background else '') + '\n') +def clean_cgroup(ctx: CephadmContext, unit_name: str): + # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail. + # see https://tracker.ceph.com/issues/50998 + + # In bootstrap we set the context fsid at the end. + if not ctx.fsid: + return + + CGROUPV2_PATH = Path('/sys/fs/cgroup') + if not (CGROUPV2_PATH / 'system.slice').exists(): + # Only unified cgroup is affected, skip if not the case + return + + slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d')) + cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service' + if not cg_path.exists(): + return + + def cg_trim(path: Path): + for p in path.iterdir(): + if p.is_dir(): + cg_trim(p) + path.rmdir() + try: + cg_trim(cg_path) + except OSError: + logger.warning(f'Failed to trim old cgroups {cg_path}') + + def deploy_daemon_units( ctx: CephadmContext, fsid: str, @@ -2840,6 +2869,7 @@ def deploy_daemon_units( if enable: call_throws(ctx, ['systemctl', 'enable', unit_name]) if start: + clean_cgroup(ctx, unit_name) call_throws(ctx, ['systemctl', 'start', unit_name]) @@ -5538,7 +5568,7 @@ def command_rm_cluster(ctx): call(ctx, ['systemctl', 'disable', unit_name], verbosity=CallVerbosity.DEBUG) - slice_name = 'system-%s.slice' % (('ceph-%s' % ctx.fsid).replace('-', '\\x2d')) + slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d')) call(ctx, ['systemctl', 'stop', slice_name], verbosity=CallVerbosity.DEBUG)