mirror of
https://github.com/ceph/ceph
synced 2025-01-21 02:31:19 +00:00
Merge PR #32411 into master
* refs/pull/32411/head: qa/tasks/ceph_manager: enable ceph-objectstore-tool via cephadm qa/tasks/cephadm: support crush_tunables config option Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
commit
4b7f5de235
@ -35,19 +35,24 @@ DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf'
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# this is for cephadm clusters
|
||||
def shell(ctx, cluster_name, remote, args, **kwargs):
|
||||
def shell(ctx, cluster_name, remote, args, name=None, **kwargs):
|
||||
testdir = teuthology.get_testdir(ctx)
|
||||
extra_args = []
|
||||
if name:
|
||||
extra_args = ['-n', name]
|
||||
else:
|
||||
extra_args = ['-c', '{}/{}.conf'.format(testdir, cluster_name)]
|
||||
return remote.run(
|
||||
args=[
|
||||
'sudo',
|
||||
ctx.cephadm,
|
||||
'--image', ctx.ceph[cluster_name].image,
|
||||
'shell',
|
||||
'-c', '{}/{}.conf'.format(testdir, cluster_name),
|
||||
] + extra_args + [
|
||||
'-k', '{}/{}.keyring'.format(testdir, cluster_name),
|
||||
'--fsid', ctx.ceph[cluster_name].fsid,
|
||||
'--',
|
||||
] + args,
|
||||
] + args,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@ -200,6 +205,8 @@ class OSDThrasher(Thrasher):
|
||||
self.logger.info(msg, *args, **kwargs)
|
||||
|
||||
def cmd_exists_on_osds(self, cmd):
|
||||
if self.ceph_manager.cephadm:
|
||||
return True
|
||||
allremotes = self.ceph_manager.ctx.cluster.only(\
|
||||
teuthology.is_type('osd', self.cluster)).remotes.keys()
|
||||
allremotes = list(set(allremotes))
|
||||
@ -211,6 +218,21 @@ class OSDThrasher(Thrasher):
|
||||
return False;
|
||||
return True;
|
||||
|
||||
def run_ceph_objectstore_tool(self, remote, osd, cmd):
|
||||
if self.ceph_manager.cephadm:
|
||||
return shell(
|
||||
self.ceph_manager.ctx, self.ceph_manager.cluster, remote,
|
||||
args=['ceph-objectstore-tool'] + cmd,
|
||||
name=osd,
|
||||
wait=True, check_status=False,
|
||||
stdout=StringIO(),
|
||||
stderr=StringIO())
|
||||
else:
|
||||
return remote.run(
|
||||
args=['sudo', 'adjust-ulimits', 'ceph-objectstore-tool'] + cmd,
|
||||
wait=True, check_status=False, stdout=StringIO(),
|
||||
stderr=StringIO())
|
||||
|
||||
def kill_osd(self, osd=None, mark_down=False, mark_out=False):
|
||||
"""
|
||||
:param osd: Osd to be killed.
|
||||
@ -240,40 +262,29 @@ class OSDThrasher(Thrasher):
|
||||
random.random() < self.chance_move_pg):
|
||||
exp_osd = random.choice(self.dead_osds[:-1])
|
||||
exp_remote = self.ceph_manager.find_remote('osd', exp_osd)
|
||||
if ('keyvaluestore_backend' in
|
||||
self.ceph_manager.ctx.ceph[self.cluster].conf['osd']):
|
||||
prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
|
||||
"--data-path {fpath} --journal-path {jpath} "
|
||||
"--type keyvaluestore "
|
||||
"--log-file="
|
||||
"/var/log/ceph/objectstore_tool.\\$pid.log ".
|
||||
format(fpath=FSPATH, jpath=JPATH))
|
||||
else:
|
||||
prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
|
||||
"--data-path {fpath} --journal-path {jpath} "
|
||||
"--log-file="
|
||||
"/var/log/ceph/objectstore_tool.\\$pid.log ".
|
||||
format(fpath=FSPATH, jpath=JPATH))
|
||||
cmd = (prefix + "--op list-pgs").format(id=exp_osd)
|
||||
prefix = ['--data-path', FSPATH.format(id=osd),
|
||||
'--journal-path', JPATH.format(id=osd),
|
||||
'--log-file=/var/log/ceph/objectstore_tool.$pid.log']
|
||||
cmd = prefix + ['--op', 'list-pgs']
|
||||
|
||||
# ceph-objectstore-tool might be temporarily absent during an
|
||||
# upgrade - see http://tracker.ceph.com/issues/18014
|
||||
with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
|
||||
while proceed():
|
||||
proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'],
|
||||
wait=True, check_status=False, stdout=StringIO(),
|
||||
stderr=StringIO())
|
||||
if proc.exitstatus == 0:
|
||||
break
|
||||
log.debug("ceph-objectstore-tool binary not present, trying again")
|
||||
if not self.ceph_manager.cephadm:
|
||||
# ceph-objectstore-tool might be temporarily absent during an
|
||||
# upgrade - see http://tracker.ceph.com/issues/18014
|
||||
with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
|
||||
while proceed():
|
||||
proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'],
|
||||
wait=True, check_status=False, stdout=StringIO(),
|
||||
stderr=StringIO())
|
||||
if proc.exitstatus == 0:
|
||||
break
|
||||
log.debug("ceph-objectstore-tool binary not present, trying again")
|
||||
|
||||
# ceph-objectstore-tool might bogusly fail with "OSD has the store locked"
|
||||
# see http://tracker.ceph.com/issues/19556
|
||||
with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed:
|
||||
while proceed():
|
||||
proc = exp_remote.run(args=cmd, wait=True,
|
||||
check_status=False,
|
||||
stdout=StringIO(), stderr=StringIO())
|
||||
proc = self.run_ceph_objectstore_tool(
|
||||
exp_remote, 'osd.%s' % osd, cmd)
|
||||
if proc.exitstatus == 0:
|
||||
break
|
||||
elif proc.exitstatus == 1 and proc.stderr == "OSD has the store locked":
|
||||
@ -288,25 +299,35 @@ class OSDThrasher(Thrasher):
|
||||
self.log("No PGs found for osd.{osd}".format(osd=exp_osd))
|
||||
return
|
||||
pg = random.choice(pgs)
|
||||
exp_path = teuthology.get_testdir(self.ceph_manager.ctx)
|
||||
exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster))
|
||||
exp_path = os.path.join(exp_path,
|
||||
#exp_path = teuthology.get_testdir(self.ceph_manager.ctx)
|
||||
#exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster))
|
||||
exp_path = os.path.join('/var/log/ceph', # available inside 'shell' container
|
||||
"exp.{pg}.{id}".format(
|
||||
pg=pg,
|
||||
id=exp_osd))
|
||||
if self.ceph_manager.cephadm:
|
||||
exp_host_path = os.path.join(
|
||||
'/var/log/ceph',
|
||||
self.ceph_manager.ctx.ceph[self.ceph_manager.cluster].fsid,
|
||||
"exp.{pg}.{id}".format(
|
||||
pg=pg,
|
||||
id=exp_osd))
|
||||
else:
|
||||
exp_host_path = exp_path
|
||||
|
||||
# export
|
||||
# Can't use new export-remove op since this is part of upgrade testing
|
||||
cmd = prefix + "--op export --pgid {pg} --file {file}"
|
||||
cmd = cmd.format(id=exp_osd, pg=pg, file=exp_path)
|
||||
proc = exp_remote.run(args=cmd)
|
||||
cmd = prefix + ['--op', 'export', '--pgid', pg, '--file', exp_path]
|
||||
proc = self.run_ceph_objectstore_tool(exp_remote, 'osd.%s' % osd,
|
||||
cmd)
|
||||
if proc.exitstatus:
|
||||
raise Exception("ceph-objectstore-tool: "
|
||||
"export failure with status {ret}".
|
||||
format(ret=proc.exitstatus))
|
||||
# remove
|
||||
cmd = prefix + "--force --op remove --pgid {pg}"
|
||||
cmd = cmd.format(id=exp_osd, pg=pg)
|
||||
proc = exp_remote.run(args=cmd)
|
||||
cmd = prefix + ['--force', '--op', 'remove', '--pgid', pg]
|
||||
proc = self.run_ceph_objectstore_tool(exp_remote, 'osd.%s' % osd,
|
||||
cmd)
|
||||
if proc.exitstatus:
|
||||
raise Exception("ceph-objectstore-tool: "
|
||||
"remove failure with status {ret}".
|
||||
@ -314,9 +335,10 @@ class OSDThrasher(Thrasher):
|
||||
# If there are at least 2 dead osds we might move the pg
|
||||
if exp_osd != imp_osd:
|
||||
# If pg isn't already on this osd, then we will move it there
|
||||
cmd = (prefix + "--op list-pgs").format(id=imp_osd)
|
||||
proc = imp_remote.run(args=cmd, wait=True,
|
||||
check_status=False, stdout=StringIO())
|
||||
cmd = prefix + ['--op', 'list-pgs']
|
||||
proc = self.run_ceph_objectstore_tool(imp_remote,
|
||||
'osd.%s' % osd,
|
||||
cmd)
|
||||
if proc.exitstatus:
|
||||
raise Exception("ceph-objectstore-tool: "
|
||||
"imp list-pgs failure with status {ret}".
|
||||
@ -329,18 +351,21 @@ class OSDThrasher(Thrasher):
|
||||
# Copy export file to the other machine
|
||||
self.log("Transfer export file from {srem} to {trem}".
|
||||
format(srem=exp_remote, trem=imp_remote))
|
||||
tmpexport = Remote.get_file(exp_remote, exp_path)
|
||||
Remote.put_file(imp_remote, tmpexport, exp_path)
|
||||
tmpexport = Remote.get_file(exp_remote, exp_host_path)
|
||||
Remote.put_file(imp_remote, tmpexport, exp_host_path)
|
||||
os.remove(tmpexport)
|
||||
else:
|
||||
# Can't move the pg after all
|
||||
imp_osd = exp_osd
|
||||
imp_remote = exp_remote
|
||||
# import
|
||||
cmd = (prefix + "--op import --file {file}")
|
||||
cmd = cmd.format(id=imp_osd, file=exp_path)
|
||||
proc = imp_remote.run(args=cmd, wait=True, check_status=False,
|
||||
stderr=StringIO())
|
||||
cmd = ['--data-path', FSPATH.format(id=imp_osd),
|
||||
'--journal-path', JPATH.format(id=imp_osd),
|
||||
'--log-file',
|
||||
"/var/log/ceph/objectstore_tool.\\$pid.log",
|
||||
'--op', 'import', '--file', exp_path]
|
||||
proc = self.run_ceph_objectstore_tool(
|
||||
imp_remote, 'osd.%s' % imp_osd, cmd)
|
||||
if proc.exitstatus == 1:
|
||||
bogosity = "The OSD you are using is older than the exported PG"
|
||||
if bogosity in proc.stderr.getvalue():
|
||||
@ -362,24 +387,25 @@ class OSDThrasher(Thrasher):
|
||||
raise Exception("ceph-objectstore-tool: "
|
||||
"import failure with status {ret}".
|
||||
format(ret=proc.exitstatus))
|
||||
cmd = "rm -f {file}".format(file=exp_path)
|
||||
cmd = "sudo rm -f {file}".format(file=exp_host_path)
|
||||
exp_remote.run(args=cmd)
|
||||
if imp_remote != exp_remote:
|
||||
imp_remote.run(args=cmd)
|
||||
|
||||
# apply low split settings to each pool
|
||||
for pool in self.ceph_manager.list_pools():
|
||||
no_sudo_prefix = prefix[5:]
|
||||
cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 "
|
||||
"--filestore-split-multiple 1' sudo -E "
|
||||
+ no_sudo_prefix + "--op apply-layout-settings --pool " + pool).format(id=osd)
|
||||
proc = remote.run(args=cmd, wait=True, check_status=False, stderr=StringIO())
|
||||
output = proc.stderr.getvalue()
|
||||
if 'Couldn\'t find pool' in output:
|
||||
continue
|
||||
if proc.exitstatus:
|
||||
raise Exception("ceph-objectstore-tool apply-layout-settings"
|
||||
" failed with {status}".format(status=proc.exitstatus))
|
||||
if not self.ceph_manager.cephadm:
|
||||
for pool in self.ceph_manager.list_pools():
|
||||
no_sudo_prefix = ' '.join(prefix[1:])
|
||||
cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 "
|
||||
"--filestore-split-multiple 1' sudo -E "
|
||||
+ no_sudo_prefix + "--op apply-layout-settings --pool " + pool).format(id=osd)
|
||||
proc = remote.run(args=cmd, wait=True, check_status=False, stderr=StringIO())
|
||||
output = proc.stderr.getvalue()
|
||||
if 'Couldn\'t find pool' in output:
|
||||
continue
|
||||
if proc.exitstatus:
|
||||
raise Exception("ceph-objectstore-tool apply-layout-settings"
|
||||
" failed with {status}".format(status=proc.exitstatus))
|
||||
|
||||
|
||||
def blackhole_kill_osd(self, osd=None):
|
||||
|
@ -825,6 +825,18 @@ def distribute_config_and_admin_keyring(ctx, config):
|
||||
'/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
|
||||
])
|
||||
|
||||
@contextlib.contextmanager
|
||||
def crush_setup(ctx, config):
|
||||
cluster_name = config['cluster']
|
||||
first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
|
||||
(mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
|
||||
|
||||
profile = config.get('crush_tunables', 'default')
|
||||
log.info('Setting crush tunables to %s', profile)
|
||||
_shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
|
||||
args=['ceph', 'osd', 'crush', 'tunables', profile])
|
||||
yield
|
||||
|
||||
@contextlib.contextmanager
|
||||
def task(ctx, config):
|
||||
if config is None:
|
||||
@ -908,6 +920,7 @@ def task(ctx, config):
|
||||
lambda: ceph_log(ctx=ctx, config=config),
|
||||
lambda: ceph_crash(ctx=ctx, config=config),
|
||||
lambda: ceph_bootstrap(ctx=ctx, config=config),
|
||||
lambda: crush_setup(ctx=ctx, config=config),
|
||||
lambda: ceph_mons(ctx=ctx, config=config),
|
||||
lambda: ceph_mgrs(ctx=ctx, config=config),
|
||||
lambda: ceph_osds(ctx=ctx, config=config),
|
||||
|
Loading…
Reference in New Issue
Block a user