mgr/cephadm: add support for proper osd daemon tracking

Signed-off-by: Joshua Schmid <jschmid@suse.de>
This commit is contained in:
Joshua Schmid 2020-04-30 11:47:22 +02:00
parent ea5c668fea
commit 1c0a0a6f61
4 changed files with 47 additions and 9 deletions

View File

@ -1746,6 +1746,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
sd.container_image_name = d.get('container_image_name')
sd.container_image_id = d.get('container_image_id')
sd.version = d.get('version')
if sd.daemon_type == 'osd':
sd.osdspec_affinity = self.get_osdspec_affinity(sd.daemon_id)
if 'state' in d:
sd.status_desc = d['state']
sd.status = {
@ -1815,6 +1817,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
self._refresh_host_daemons(host)
# <service_map>
sm = {} # type: Dict[str, orchestrator.ServiceDescription]
osd_count = 0
for h, dm in self.cache.get_daemons_with_volatile_status():
for name, dd in dm.items():
if service_type and service_type != dd.daemon_type:
@ -1823,9 +1826,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
if service_name and service_name != n:
continue
if dd.daemon_type == 'osd':
continue # ignore OSDs for now
if dd.service_name() in self.spec_store.specs:
spec = self.spec_store.specs[dd.service_name()]
"""
OSDs do not know the affinity to their spec out of the box.
"""
n = f"osd.{dd.osdspec_affinity}"
if n in self.spec_store.specs:
spec = self.spec_store.specs[n]
else:
spec = ServiceSpec(
unmanaged=True,
@ -1842,9 +1848,19 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
container_image_name=dd.container_image_name,
spec=spec,
)
if dd.service_name() in self.spec_store.specs:
sm[n].size = self._get_spec_size(spec)
sm[n].created = self.spec_store.spec_created[dd.service_name()]
if n in self.spec_store.specs:
if dd.daemon_type == 'osd':
"""
The osd count can't be determined by the Placement spec.
It's rather pointless to show a actual/expected representation
here. So we're setting running = size for now.
"""
osd_count += 1
sm[n].size = osd_count
else:
sm[n].size = self._get_spec_size(spec)
sm[n].created = self.spec_store.spec_created[n]
if service_type == 'nfs':
spec = cast(NFSServiceSpec, spec)
sm[n].rados_config_location = spec.rados_config_location()
@ -2056,6 +2072,22 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
def apply_drivegroups(self, specs: List[DriveGroupSpec]):
return [self._apply(spec) for spec in specs]
def get_osdspec_affinity(self, osd_id: str) -> str:
ret, out, err = self.mon_command({
'prefix': 'osd metadata',
'id': int(osd_id),
'format': 'json'
})
if ret != 0:
self.log.warning(f"Caught error on calling 'osd metadata {osd_id}' -> {err}")
return ''
try:
metadata = json.loads(out)
except json.decoder.JSONDecodeError:
self.log.error(f"Could not decode json -> {out}")
return ''
return metadata.get('osdspec_affinity', '')
def find_destroyed_osds(self) -> Dict[str, List[str]]:
osd_host_map: Dict[str, List[str]] = dict()
ret, out, err = self.mon_command({

View File

@ -297,17 +297,17 @@ class TestCephadm(object):
# no preview and only one disk, prepare is used due the hack that is in place.
(['/dev/sda'], False, "lvm prepare --bluestore --data /dev/sda --no-systemd"),
# no preview and multiple disks, uses batch
(['/dev/sda', '/dev/sdb'], False, "lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"),
(['/dev/sda', '/dev/sdb'], False, "CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"),
# preview and only one disk needs to use batch again to generate the preview
(['/dev/sda'], True, "lvm batch --no-auto /dev/sda --report --format json"),
# preview and multiple disks work the same
(['/dev/sda', '/dev/sdb'], True, "lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"),
(['/dev/sda', '/dev/sdb'], True, "CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"),
]
)
@mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_command):
with self._with_host(cephadm_module, 'test'):
dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices))
dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices))
ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
preview = preview
out = cephadm_module.driveselection_to_ceph_volume(dg, ds, [], preview)

View File

@ -1245,6 +1245,7 @@ class DaemonDescription(object):
created=None,
started=None,
last_configured=None,
osdspec_affinity=None,
last_deployed=None):
# Host is at the same granularity as InventoryHost
self.hostname = hostname
@ -1282,6 +1283,9 @@ class DaemonDescription(object):
self.last_configured = last_configured # type: Optional[datetime.datetime]
self.last_deployed = last_deployed # type: Optional[datetime.datetime]
# Affinity to a certain OSDSpec
self.osdspec_affinity = osdspec_affinity # type: Optional[str]
def name(self):
return '%s.%s' % (self.daemon_type, self.daemon_id)

View File

@ -101,4 +101,6 @@ class to_ceph_volume(object):
cmd += " --report"
cmd += " --format json"
cmd = f"CEPH_VOLUME_OSDSPEC_AFFINITY={self.spec.service_id} " + cmd
return cmd