diff --git a/tasks/ceph.py b/tasks/ceph.py index 13022969402..595ac351591 100644 --- a/tasks/ceph.py +++ b/tasks/ceph.py @@ -1478,6 +1478,7 @@ def task(ctx, config): mon, ctx=ctx, logger=log.getChild('ceph_manager.' + config['cluster']), + cluster=config['cluster'], ) yield finally: diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py index a76f5769ae2..af2d963c8bf 100644 --- a/tasks/ceph_manager.py +++ b/tasks/ceph_manager.py @@ -847,12 +847,14 @@ class CephManager: REPLICATED_POOL = 1 ERASURE_CODED_POOL = 3 - def __init__(self, controller, ctx=None, config=None, logger=None): + def __init__(self, controller, ctx=None, config=None, logger=None, + cluster='ceph'): self.lock = threading.RLock() self.ctx = ctx self.config = config self.controller = controller self.next_pool_id = 0 + self.cluster = cluster if (logger): self.log = lambda x: logger.info(x) else: @@ -886,6 +888,8 @@ class CephManager: 'timeout', '120', 'ceph', + '--cluster', + self.cluster, ] ceph_args.extend(args) proc = self.controller.run( @@ -907,6 +911,8 @@ class CephManager: 'timeout', '120', 'ceph', + '--cluster', + self.cluster, ] ceph_args.extend(args) proc = self.controller.run( @@ -920,8 +926,15 @@ class CephManager: Execute "ceph -w" in the background with stdout connected to a StringIO, and return the RemoteProcess. """ - return self.controller.run(args=["sudo", "daemon-helper", "kill", "ceph", "-w"], - wait=False, stdout=StringIO(), stdin=run.PIPE) + return self.controller.run( + args=["sudo", + "daemon-helper", + "kill", + "ceph", + '--cluster', + self.cluster, + "-w"], + wait=False, stdout=StringIO(), stdin=run.PIPE) def do_rados(self, remote, cmd, check_status=True): """ @@ -933,6 +946,8 @@ class CephManager: 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'rados', + '--cluster', + self.cluster, ] pre.extend(cmd) proc = remote.run( @@ -1050,8 +1065,11 @@ class CephManager: 'timeout', str(timeout), 'ceph', + '--cluster', + self.cluster, '--admin-daemon', - '/var/run/ceph/ceph-{type}.{id}.asok'.format( + '/var/run/ceph/{cluster}-{type}.{id}.asok'.format( + cluster=self.cluster, type=service_type, id=service_id), ] @@ -1210,11 +1228,12 @@ class CephManager: for x in filter(lambda x: not x.running(), self.ctx.daemons. - iter_daemons_of_role('osd'))] + iter_daemons_of_role('osd', self.cluster))] live_osds = [int(x.id_) for x in filter(lambda x: x.running(), - self.ctx.daemons.iter_daemons_of_role('osd'))] + self.ctx.daemons.iter_daemons_of_role('osd', + self.cluster))] return {'in': in_osds, 'out': out_osds, 'up': up_osds, 'down': down_osds, 'dead': dead_osds, 'live': live_osds, 'raw': osd_lines} @@ -1855,7 +1874,7 @@ class CephManager: "Check ipmi config.") remote.console.power_off() else: - self.ctx.daemons.get_daemon('osd', osd).stop() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() def blackhole_kill_osd(self, osd): """ @@ -1864,7 +1883,7 @@ class CephManager: self.raw_cluster_cmd('--', 'tell', 'osd.%d' % osd, 'injectargs', '--filestore-blackhole') time.sleep(2) - self.ctx.daemons.get_daemon('osd', osd).stop() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() def revive_osd(self, osd, timeout=150, skip_admin_check=False): """ @@ -1887,8 +1906,8 @@ class CephManager: teuthology.reconnect(self.ctx, 60, [remote]) mount_osd_data(self.ctx, remote, str(osd)) self.make_admin_daemon_dir(remote) - self.ctx.daemons.get_daemon('osd', osd).reset() - self.ctx.daemons.get_daemon('osd', osd).restart() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).reset() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).restart() if not skip_admin_check: # wait for dump_ops_in_flight; this command doesn't appear @@ -1916,14 +1935,16 @@ class CephManager: Wrapper to local get_daemon call which sends the given signal to the given osd. """ - self.ctx.daemons.get_daemon('osd', osd).signal(sig, silent=silent) + self.ctx.daemons.get_daemon('osd', osd, + self.cluster).signal(sig, silent=silent) ## monitors def signal_mon(self, mon, sig, silent=False): """ Wrapper to local get_deamon call """ - self.ctx.daemons.get_daemon('mon', mon).signal(sig, silent=silent) + self.ctx.daemons.get_daemon('mon', mon, + self.cluster).signal(sig, silent=silent) def kill_mon(self, mon): """ @@ -1942,7 +1963,7 @@ class CephManager: remote.console.power_off() else: - self.ctx.daemons.get_daemon('mon', mon).stop() + self.ctx.daemons.get_daemon('mon', mon, self.cluster).stop() def revive_mon(self, mon): """ @@ -1961,13 +1982,13 @@ class CephManager: remote.console.power_on() self.make_admin_daemon_dir(remote) - self.ctx.daemons.get_daemon('mon', mon).restart() + self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart() def get_mon_status(self, mon): """ Extract all the monitor status information from the cluster """ - addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr'] + addr = self.ctx.ceph[self.cluster].conf['mon.%s' % mon]['mon addr'] out = self.raw_cluster_cmd('-m', addr, 'mon_status') return json.loads(out) @@ -2019,7 +2040,7 @@ class CephManager: """ Return path to osd data with {id} needing to be replaced """ - return "/var/lib/ceph/osd/ceph-{id}" + return '/var/lib/ceph/osd/' + self.cluster + '-{id}' def make_admin_daemon_dir(self, remote): """