ceph/tasks/mds_scrub_checks.py
Greg Farnum 8d2715eb45 mds_scrub_checks: only direct ops at the active MDS
Change the config option from mds_id to mds_rank to reflect the
fact that it's the rank we want to make use of (and will continue
to want when we're doing stuff like force exporting from one rank
to another).

Fixes: #10361

Signed-off-by: Greg Farnum <gfarnum@redhat.com>
2015-01-27 11:17:04 -08:00

240 lines
8.4 KiB
Python

"""
MDS admin socket scrubbing-related tests.
"""
from cStringIO import StringIO
import json
import logging
from teuthology.orchestra import run
from teuthology import misc as teuthology
from tasks.cephfs.filesystem import Filesystem
log = logging.getLogger(__name__)
def run_test(ctx, config, filesystem):
"""
Run flush and scrub commands on the specified files in the filesystem. This
task will run through a sequence of operations, but it is not comprehensive
on its own -- it doesn't manipulate the mds cache state to test on both
in- and out-of-memory parts of the hierarchy. So it's designed to be run
multiple times within a single test run, so that the test can manipulate
memory state.
Usage:
mds_scrub_checks:
mds_rank: 0
path: path/to/test/dir
client: 0
run_seq: [0-9]+
Increment the run_seq on subsequent invocations within a single test run;
it uses that value to generate unique folder and file names.
"""
mds_rank = config.get("mds_rank")
test_path = config.get("path")
run_seq = config.get("run_seq")
client_id = config.get("client")
if mds_rank is None or test_path is None or run_seq is None:
raise ValueError("Must specify each of mds_rank, test_path, run_seq,"
"client_id in config!")
teuthdir = teuthology.get_testdir(ctx)
client_path = "{teuthdir}/mnt.{id_}/{test_path}".\
format(teuthdir=teuthdir,
id_=client_id,
test_path=test_path)
log.info("Cloning repo into place (if not present)")
repo_path = clone_repo(ctx, client_id, client_path)
log.info("Initiating mds_scrub_checks on mds.{id_}, "
"test_path {path}, run_seq {seq}".format(
id_=mds_rank, path=test_path, seq=run_seq))
def json_validator(json, rc, element, expected_value):
if (rc != 0):
return False, "asok command returned error {rc}".format(rc=str(rc))
element_value = json.get(element)
if element_value != expected_value:
return False, "unexpectedly got {jv} instead of {ev}!".format(
jv=element_value, ev=expected_value)
return True, "Succeeded"
success_validator = lambda j, r: json_validator(j, r, "return_code", 0)
nep = "{test_path}/i/dont/exist".format(test_path=test_path)
command = "flush_path {nep}".format(nep=nep)
asok_command(ctx, mds_rank, command,
lambda j, r: json_validator(j, r, "return_code", -2),
filesystem)
command = "scrub_path {nep}".format(nep=nep)
asok_command(ctx, mds_rank, command,
lambda j, r: json_validator(j, r, "return_code", -2),
filesystem)
test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=test_path)
dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
if (run_seq == 0):
log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
command = "flush_path {dirpath}".format(dirpath=dirpath)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
command = "scrub_path {dirpath}".format(dirpath=dirpath)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
repo_path=test_repo_path)
if (run_seq == 0):
log.info("First run: flushing {filepath}".format(filepath=filepath))
command = "flush_path {filepath}".format(filepath=filepath)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
command = "scrub_path {filepath}".format(filepath=filepath)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml".\
format(repo_path=test_repo_path)
command = "scrub_path {filepath}".format(filepath=filepath)
asok_command(ctx, mds_rank, command,
lambda j, r: json_validator(j, r, "performed_validation",
False),
filesystem)
if (run_seq == 0):
log.info("First run: flushing base dir /")
command = "flush_path /"
asok_command(ctx, mds_rank, command, success_validator, filesystem)
command = "scrub_path /"
asok_command(ctx, mds_rank, command, success_validator, filesystem)
client = ctx.manager.find_remote("client", client_id)
new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
i=run_seq)
client.run(args=[
"mkdir", new_dir])
command = "flush_path {dir}".format(dir=test_new_dir)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
i=run_seq)
test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
i=run_seq)
client.run(args=[
"echo", "hello", run.Raw('>'), new_file])
command = "flush_path {file}".format(file=test_new_file)
asok_command(ctx, mds_rank, command, success_validator, filesystem)
# check that scrub fails on errors. First, get ino
client = ctx.manager.find_remote("client", 0)
proc = client.run(
args=[
"ls", "-li", new_file, run.Raw('|'),
"grep", "-o", run.Raw('"^[0-9]*"')
],
wait=False,
stdout=StringIO()
)
proc.wait()
ino = int(proc.stdout.getvalue().strip())
rados_obj_name = "{ino}.00000000".format(ino=hex(ino).split('x')[1])
client.run(
args=[
"rados", "-p", "data", "rmxattr",
rados_obj_name, "parent"
]
)
command = "scrub_path {file}".format(file=test_new_file)
asok_command(ctx, mds_rank, command,
lambda j, r: json_validator(j, r, "return_code", -61), filesystem)
client.run(
args=[
"rados", "-p", "data", "rm", rados_obj_name
]
)
asok_command(ctx, mds_rank, command,
lambda j, r: json_validator(j, r, "return_code", -2), filesystem)
command = "flush_path /"
asok_command(ctx, mds_rank, command, success_validator, filesystem)
class AsokCommandFailedError(Exception):
"""
Exception thrown when we get an unexpected response
on an admin socket command
"""
def __init__(self, command, rc, json, errstring):
self.command = command
self.rc = rc
self.json = json
self.errstring = errstring
def __str__(self):
return "Admin socket: {command} failed with rc={rc},"
"json output={json}, because '{es}'".format(
command=self.command, rc=self.rc,
json=self.json, es=self.errstring)
def asok_command(ctx, mds_rank, command, validator, filesystem):
log.info("Running command '{command}'".format(command=command))
command_list = command.split()
# we just assume there's an active mds for every rank
mds_id = filesystem.get_active_names()[mds_rank]
proc = ctx.manager.admin_socket('mds', mds_id,
command_list, check_status=False)
rout = proc.exitstatus
sout = proc.stdout.getvalue()
if sout.strip():
jout = json.loads(sout)
else:
jout = None
log.info("command '{command}' got response code "
"'{rout}' and stdout '{sout}'".format(
command=command, rout=rout, sout=sout))
success, errstring = validator(jout, rout)
if not success:
raise AsokCommandFailedError(command, rout, jout, errstring)
return jout
def clone_repo(ctx, client_id, path):
repo = "ceph-qa-suite"
repo_path = "{path}/{repo}".format(path=path, repo=repo)
client = ctx.manager.find_remote("client", client_id)
client.run(
args=[
"mkdir", "-p", path
]
)
client.run(
args=[
"ls", repo_path, run.Raw('||'),
"git", "clone", '--branch', 'giant',
"http://github.com/ceph/{repo}".format(repo=repo),
"{path}/{repo}".format(path=path, repo=repo)
]
)
return repo_path
def task(ctx, config):
fs = Filesystem(ctx)
run_test(ctx, config, fs)