mirror of
https://github.com/ceph/ceph
synced 2025-03-25 11:48:05 +00:00
repair_test: add test for repairing read errs and truncations
Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
parent
2a1cdda90d
commit
d81babffe5
@ -314,6 +314,30 @@ class CephManager:
|
||||
)
|
||||
return proc
|
||||
|
||||
def do_put(self, pool, obj, fname):
|
||||
return self.do_rados(
|
||||
self.controller,
|
||||
[
|
||||
'-p',
|
||||
pool,
|
||||
'put',
|
||||
obj,
|
||||
fname
|
||||
]
|
||||
)
|
||||
|
||||
def do_get(self, pool, obj, fname='/dev/null'):
|
||||
return self.do_rados(
|
||||
self.controller,
|
||||
[
|
||||
'-p',
|
||||
pool,
|
||||
'stat',
|
||||
obj,
|
||||
fname
|
||||
]
|
||||
)
|
||||
|
||||
def osd_admin_socket(self, osdnum, command, check_status=True):
|
||||
testdir = teuthology.get_testdir(self.ctx)
|
||||
remote = None
|
||||
@ -339,14 +363,32 @@ class CephManager:
|
||||
check_status=check_status
|
||||
)
|
||||
|
||||
def get_pgid(self, pool, pgnum):
|
||||
poolnum = self.get_pool_num(pool)
|
||||
pg_str = "{poolnum}.{pgnum}".format(
|
||||
poolnum=poolnum,
|
||||
pgnum=pgnum)
|
||||
return pg_str
|
||||
|
||||
def get_pg_replica(self, pool, pgnum):
|
||||
"""
|
||||
get replica for pool, pgnum (e.g. (data, 0)->0
|
||||
"""
|
||||
output = self.raw_cluster_cmd("pg", "dump", '--format=json')
|
||||
j = json.loads('\n'.join(output.split('\n')[1:]))
|
||||
pg_str = self.get_pgid(pool, pgnum)
|
||||
for pg in j['pg_stats']:
|
||||
if pg['pgid'] == pg_str:
|
||||
return int(pg['acting'][-1])
|
||||
assert False
|
||||
|
||||
def get_pg_primary(self, pool, pgnum):
|
||||
"""
|
||||
get primary for pool, pgnum (e.g. (data, 0)->0
|
||||
"""
|
||||
poolnum = self.get_pool_num(pool)
|
||||
output = self.raw_cluster_cmd("pg", "dump", '--format=json')
|
||||
j = json.loads('\n'.join(output.split('\n')[1:]))
|
||||
pg_str = "%d.%d" % (poolnum, pgnum)
|
||||
pg_str = self.get_pgid(pool, pgnum)
|
||||
for pg in j['pg_stats']:
|
||||
if pg['pgid'] == pg_str:
|
||||
return int(pg['acting'][0])
|
||||
@ -554,6 +596,32 @@ class CephManager:
|
||||
ret[status] += 1
|
||||
return ret
|
||||
|
||||
def pg_scrubbing(self, pool, pgnum):
|
||||
pgstr = self.get_pgid(pool, pgnum)
|
||||
stats = self.get_single_pg_stats(pgstr)
|
||||
return 'scrub' in stats['state']
|
||||
|
||||
def pg_repairing(self, pool, pgnum):
|
||||
pgstr = self.get_pgid(pool, pgnum)
|
||||
stats = self.get_single_pg_stats(pgstr)
|
||||
return 'repair' in stats['state']
|
||||
|
||||
def pg_inconsistent(self, pool, pgnum):
|
||||
pgstr = self.get_pgid(pool, pgnum)
|
||||
stats = self.get_single_pg_stats(pgstr)
|
||||
return 'inconsistent' in stats['state']
|
||||
|
||||
def get_last_scrub_stamp(self, pool, pgnum):
|
||||
stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
|
||||
return stats["last_scrub_stamp"]
|
||||
|
||||
def do_pg_scrub(self, pool, pgnum, stype):
|
||||
init = self.get_last_scrub_stamp(pool, pgnum)
|
||||
self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
|
||||
while init == self.get_last_scrub_stamp(pool, pgnum):
|
||||
self.log("waiting for scrub type %s"%(stype,))
|
||||
time.sleep(10)
|
||||
|
||||
def get_single_pg_stats(self, pgid):
|
||||
all_stats = self.get_pg_stats()
|
||||
|
||||
|
118
teuthology/task/repair_test.py
Normal file
118
teuthology/task/repair_test.py
Normal file
@ -0,0 +1,118 @@
|
||||
import logging
|
||||
|
||||
import ceph_manager
|
||||
from teuthology import misc as teuthology
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def setup(ctx, config):
|
||||
ctx.manager.wait_for_clean()
|
||||
ctx.manager.create_pool("repair_test_pool", 1)
|
||||
return "repair_test_pool"
|
||||
|
||||
def teardown(ctx, config, pool):
|
||||
ctx.manager.remove_pool(pool)
|
||||
ctx.manager.wait_for_clean()
|
||||
|
||||
def run_test(ctx, config, test):
|
||||
s = setup(ctx, config)
|
||||
test(ctx, config, s)
|
||||
teardown(ctx, config, s)
|
||||
|
||||
def choose_primary(ctx):
|
||||
def ret(pool, num):
|
||||
log.info("Choosing primary")
|
||||
return ctx.manager.get_pg_primary(pool, num)
|
||||
return ret
|
||||
|
||||
def choose_replica(ctx):
|
||||
def ret(pool, num):
|
||||
log.info("Choosing replica")
|
||||
return ctx.manager.get_pg_replica(pool, num)
|
||||
return ret
|
||||
|
||||
def trunc(ctx):
|
||||
def ret(osd, pool, obj):
|
||||
log.info("truncating object")
|
||||
return ctx.manager.osd_admin_socket(
|
||||
osd,
|
||||
['truncobj', pool, obj, '1'])
|
||||
return ret
|
||||
|
||||
def dataerr(ctx):
|
||||
def ret(osd, pool, obj):
|
||||
log.info("injecting data err on object")
|
||||
return ctx.manager.osd_admin_socket(
|
||||
osd,
|
||||
['injectdataerr', pool, obj])
|
||||
return ret
|
||||
|
||||
def mdataerr(ctx):
|
||||
def ret(osd, pool, obj):
|
||||
log.info("injecting mdata err on object")
|
||||
return ctx.manager.osd_admin_socket(
|
||||
osd,
|
||||
['injectmdataerr', pool, obj])
|
||||
return ret
|
||||
|
||||
def gen_repair_test(corrupter, chooser, scrub_type):
|
||||
def ret(ctx, config, pool):
|
||||
log.info("starting repair test")
|
||||
victim_osd = chooser(pool, 0)
|
||||
|
||||
# create object
|
||||
log.info("doing put")
|
||||
ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
|
||||
|
||||
# corrupt object
|
||||
log.info("corrupting object")
|
||||
corrupter(victim_osd, pool, 'repair_test_obj')
|
||||
|
||||
# verify inconsistent
|
||||
log.info("scrubbing")
|
||||
ctx.manager.do_pg_scrub(pool, 0, scrub_type)
|
||||
|
||||
assert ctx.manager.pg_inconsistent(pool, 0)
|
||||
|
||||
# repair
|
||||
log.info("repairing")
|
||||
ctx.manager.do_pg_scrub(pool, 0, "repair")
|
||||
|
||||
log.info("re-scrubbing")
|
||||
ctx.manager.do_pg_scrub(pool, 0, scrub_type)
|
||||
|
||||
# verify consistent
|
||||
assert not ctx.manager.pg_inconsistent(pool, 0)
|
||||
log.info("done")
|
||||
return ret
|
||||
|
||||
def task(ctx, config):
|
||||
"""
|
||||
Test [deep] repair in several situations:
|
||||
Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
|
||||
"""
|
||||
if config is None:
|
||||
config = {}
|
||||
assert isinstance(config, dict), \
|
||||
'repair_test task only accepts a dict for config'
|
||||
|
||||
if not hasattr(ctx, 'manager'):
|
||||
first_mon = teuthology.get_first_mon(ctx, config)
|
||||
(mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
|
||||
ctx.manager = ceph_manager.CephManager(
|
||||
mon,
|
||||
ctx=ctx,
|
||||
logger=log.getChild('ceph_manager')
|
||||
)
|
||||
|
||||
tests = [
|
||||
gen_repair_test(mdataerr(ctx), choose_primary(ctx), "scrub"),
|
||||
gen_repair_test(mdataerr(ctx), choose_replica(ctx), "scrub"),
|
||||
gen_repair_test(dataerr(ctx), choose_primary(ctx), "deep-scrub"),
|
||||
gen_repair_test(dataerr(ctx), choose_replica(ctx), "deep-scrub"),
|
||||
gen_repair_test(trunc(ctx), choose_primary(ctx), "scrub"),
|
||||
gen_repair_test(trunc(ctx), choose_replica(ctx), "scrub")
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
run_test(ctx, config, test)
|
Loading…
Reference in New Issue
Block a user