Merge pull request #41829 from SMIL-Infra/clean-cg

cephadm: workaround unit replace failure

Reviewed-by: Juan Miguel Olmo Martínez <jolmomar@redhat.com>
This commit is contained in:
Kefu Chai 2021-07-02 23:15:38 +08:00 committed by GitHub
commit 01d3a10e22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2709,6 +2709,35 @@ def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, backgro
+ (' &' if background else '') + '\n')
def clean_cgroup(ctx: CephadmContext, unit_name: str):
# systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
# see https://tracker.ceph.com/issues/50998
# In bootstrap we set the context fsid at the end.
if not ctx.fsid:
return
CGROUPV2_PATH = Path('/sys/fs/cgroup')
if not (CGROUPV2_PATH / 'system.slice').exists():
# Only unified cgroup is affected, skip if not the case
return
slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
if not cg_path.exists():
return
def cg_trim(path: Path):
for p in path.iterdir():
if p.is_dir():
cg_trim(p)
path.rmdir()
try:
cg_trim(cg_path)
except OSError:
logger.warning(f'Failed to trim old cgroups {cg_path}')
def deploy_daemon_units(
ctx: CephadmContext,
fsid: str,
@ -2845,6 +2874,7 @@ def deploy_daemon_units(
if enable:
call_throws(ctx, ['systemctl', 'enable', unit_name])
if start:
clean_cgroup(ctx, unit_name)
call_throws(ctx, ['systemctl', 'start', unit_name])
@ -5548,7 +5578,7 @@ def command_rm_cluster(ctx):
call(ctx, ['systemctl', 'disable', unit_name],
verbosity=CallVerbosity.DEBUG)
slice_name = 'system-%s.slice' % (('ceph-%s' % ctx.fsid).replace('-', '\\x2d'))
slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
call(ctx, ['systemctl', 'stop', slice_name],
verbosity=CallVerbosity.DEBUG)