Merge PR #33189 into master

* refs/pull/33189/head:
	mgr/cephadm: fix redeploy (etc) of crash containers
	cephadm: do not use special unit, naming for crash agent
	cephadm: 'crash' is a ceph daemon

Reviewed-by: Michael Fritch <mfritch@suse.com>
Reviewed-by: Sebastian Wagner <swagner@suse.com>
This commit is contained in:
Sage Weil 2020-02-11 10:42:46 -06:00
commit 6dcb514a57
2 changed files with 32 additions and 77 deletions

View File

@ -89,7 +89,8 @@ class TimeoutExpired(Error):
class Ceph(object):
daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror')
daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
'crash')
class Monitoring(object):
@ -876,7 +877,7 @@ def get_daemon_args(fsid, daemon_type, daemon_id):
# type: (str, str, Union[int, str]) -> List[str]
r = list() # type: List[str]
if daemon_type in Ceph.daemons:
if daemon_type in Ceph.daemons and daemon_type != 'crash':
r += [
'--setuser', 'ceph',
'--setgroup', 'ceph',
@ -1055,12 +1056,13 @@ def get_container_mounts(fsid, daemon_type, daemon_id,
cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
else:
cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
mounts[data_dir] = cdata_dir + ':z'
if daemon_type != 'crash':
mounts[data_dir] = cdata_dir + ':z'
if not no_config:
mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
if daemon_type == 'rbd-mirror':
# rbd-mirror does not search for its keyring in a data directory
mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.rbd-mirror.%s.keyring' % daemon_id
if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
# these do not search for their keyrings in a data directory
mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
if daemon_type in ['mon', 'osd']:
mounts['/dev'] = '/dev' # FIXME: narrow this down?
@ -1098,6 +1100,9 @@ def get_container(fsid, daemon_type, daemon_id, privileged=False,
elif daemon_type == 'rbd-mirror':
entrypoint = '/usr/bin/rbd-mirror'
name = 'client.rbd-mirror.%s' % daemon_id
elif daemon_type == 'crash':
entrypoint = '/usr/bin/ceph-crash'
name = 'client.crash.%s' % daemon_id
elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
entrypoint = '/usr/bin/ceph-' + daemon_type
name = '%s.%s' % (daemon_type, daemon_id)
@ -1105,10 +1110,13 @@ def get_container(fsid, daemon_type, daemon_id, privileged=False,
entrypoint = ''
name = ''
ceph_args = ['-n', name, '-f']
if daemon_type in Monitoring.components:
ceph_args = []
elif daemon_type == 'crash':
ceph_args = ['-n', name]
else:
ceph_args = ['-n', name, '-f']
return CephContainer(
image=args.image,
@ -1383,59 +1391,9 @@ def deploy_crash(fsid, uid, gid, config, keyring):
# type: (str, int, int, str, str) -> None
crash_dir = os.path.join(args.data_dir, fsid, 'crash')
makedirs(crash_dir, uid, gid, DATA_DIR_MODE)
with open(os.path.join(crash_dir, 'keyring'), 'w') as f:
os.fchmod(f.fileno(), 0o600)
os.fchown(f.fileno(), uid, gid)
f.write(keyring)
with open(os.path.join(crash_dir, 'config'), 'w') as f:
os.fchmod(f.fileno(), 0o600)
os.fchown(f.fileno(), uid, gid)
f.write(config)
# ceph-crash unit
mounts = {
crash_dir: '/var/lib/ceph/crash:z',
os.path.join(crash_dir, 'config'): '/etc/ceph/ceph.conf:z',
os.path.join(crash_dir, 'keyring'): '/etc/ceph/ceph.keyring:z',
}
c = CephContainer(
image=args.image,
entrypoint='/usr/bin/ceph-crash',
args=['-n', 'client.crash.%s' % get_hostname()],
volume_mounts=mounts,
cname='ceph-%s-crash' % (fsid),
)
unit_name = 'ceph-%s-crash.service' % fsid
with open(os.path.join(args.unit_dir, unit_name + '.new'), 'w') as f:
f.write("""# generated by cephadm
[Unit]
Description=Ceph crash collector for {fsid}
PartOf=ceph-{fsid}.target
Before=ceph-{fsid}.target
[Service]
Type=simple
ExecStartPre=-{container_path} rm ceph-{fsid}-crash
ExecStart={cmd}
ExecStop=-{container_path} stop ceph-{fsid}-crash
KillMode=none
Restart=always
RestartSec=10
StartLimitInterval=10min
StartLimitBurst=10
[Install]
WantedBy=ceph-{fsid}.target
""".format(
container_path=container_path,
fsid=fsid,
cmd=' '.join(c.run_cmd()))
)
os.rename(os.path.join(args.unit_dir, unit_name + '.new'),
os.path.join(args.unit_dir, unit_name))
subprocess.check_output(['systemctl', 'enable', unit_name])
subprocess.check_output(['systemctl', 'start', unit_name])
c = get_container(args.fsid, 'crash', get_hostname())
deploy_daemon(args.fsid, 'crash', get_hostname(), c, uid, gid,
config, keyring)
def get_unit_file(fsid, uid, gid):
# type: (str, int, int) -> str
@ -2243,10 +2201,7 @@ def list_daemons(detail=True, legacy_dir=None):
elif is_fsid(i):
fsid = i
for j in os.listdir(os.path.join(data_dir, i)):
if j == 'crash':
name = 'crash'
unit_name = 'ceph-%s-crash.service' % fsid
elif '.' in j:
if '.' in j:
name = j
(daemon_type, daemon_id) = j.split('.', 1)
unit_name = get_unit_name(fsid,
@ -2493,8 +2448,7 @@ def command_rm_cluster():
verbose_on_failure=False)
# cluster units
for unit_name in ['ceph-%s.target' % args.fsid,
'ceph-%s-crash.service' % args.fsid]:
for unit_name in ['ceph-%s.target' % args.fsid]:
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
call(['systemctl', 'reset-failed', unit_name],
@ -2510,8 +2464,6 @@ def command_rm_cluster():
# rm units
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s@.service' % args.fsid])
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s-crash.service' % args.fsid])
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s.target' % args.fsid])
call_throws(['rm', '-rf',

View File

@ -1320,7 +1320,7 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin):
if '.' in d['name']:
sd.service_instance = '.'.join(d['name'].split('.')[1:])
elif d['name'] != '*':
sd.service_instance = host # e.g., crash
sd.service_instance = host
if service_id and service_id != sd.service_instance:
continue
if service_name and not sd.service_instance.startswith(service_name + '.'):
@ -1632,13 +1632,16 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin):
if extra_config:
config += extra_config
# crash_keyring
ret, crash_keyring, err = self.mon_command({
'prefix': 'auth get-or-create',
'entity': 'client.crash.%s' % host,
'caps': ['mon', 'profile crash',
'mgr', 'profile crash'],
})
if daemon_type != 'crash':
# crash_keyring
ret, crash_keyring, err = self.mon_command({
'prefix': 'auth get-or-create',
'entity': 'client.crash.%s' % host,
'caps': ['mon', 'profile crash',
'mgr', 'profile crash'],
})
else:
crash_keyring = None
j = json.dumps({
'config': config,