2013-10-12 08:28:27 +00:00
|
|
|
"""
|
|
|
|
Thrash -- Simulate random osd failures.
|
|
|
|
"""
|
2011-06-13 23:36:21 +00:00
|
|
|
import contextlib
|
|
|
|
import logging
|
|
|
|
import ceph_manager
|
2011-08-31 20:56:42 +00:00
|
|
|
from teuthology import misc as teuthology
|
|
|
|
|
2011-06-13 23:36:21 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def task(ctx, config):
|
|
|
|
"""
|
2011-08-25 22:18:42 +00:00
|
|
|
"Thrash" the OSDs by randomly marking them out/down (and then back
|
2011-08-31 20:21:30 +00:00
|
|
|
in) until the task is ended. This loops, and every op_delay
|
|
|
|
seconds it randomly chooses to add or remove an OSD (even odds)
|
|
|
|
unless there are fewer than min_out OSDs out of the cluster, or
|
|
|
|
more than min_in OSDs in the cluster.
|
|
|
|
|
2011-08-25 22:18:42 +00:00
|
|
|
All commands are run on mon0 and it stops when __exit__ is called.
|
2011-08-25 22:27:30 +00:00
|
|
|
|
2011-08-25 22:18:42 +00:00
|
|
|
The config is optional, and is a dict containing some or all of:
|
2011-06-13 23:36:21 +00:00
|
|
|
|
2016-03-23 20:05:15 +00:00
|
|
|
cluster: (default 'ceph') the name of the cluster to thrash
|
|
|
|
|
2014-01-10 19:00:55 +00:00
|
|
|
min_in: (default 3) the minimum number of OSDs to keep in the
|
2011-08-31 20:21:30 +00:00
|
|
|
cluster
|
|
|
|
|
|
|
|
min_out: (default 0) the minimum number of OSDs to keep out of the
|
|
|
|
cluster
|
|
|
|
|
|
|
|
op_delay: (5) the length of time to sleep between changing an
|
|
|
|
OSD's status
|
|
|
|
|
2012-01-11 00:20:50 +00:00
|
|
|
min_dead: (0) minimum number of osds to leave down/dead.
|
|
|
|
|
2012-01-17 17:24:54 +00:00
|
|
|
max_dead: (0) maximum number of osds to leave down/dead before waiting
|
2012-01-10 21:57:55 +00:00
|
|
|
for clean. This should probably be num_replicas - 1.
|
|
|
|
|
2011-08-31 20:21:30 +00:00
|
|
|
clean_interval: (60) the approximate length of time to loop before
|
|
|
|
waiting until the cluster goes clean. (In reality this is used
|
|
|
|
to probabilistically choose when to wait, and the method used
|
|
|
|
makes it closer to -- but not identical to -- the half-life.)
|
|
|
|
|
2014-04-22 17:59:53 +00:00
|
|
|
scrub_interval: (-1) the approximate length of time to loop before
|
|
|
|
waiting until a scrub is performed while cleaning. (In reality
|
|
|
|
this is used to probabilistically choose when to wait, and it
|
|
|
|
only applies to the cases where cleaning is being performed).
|
|
|
|
-1 is used to indicate that no scrubbing will be done.
|
|
|
|
|
2013-01-24 01:44:05 +00:00
|
|
|
chance_down: (0.4) the probability that the thrasher will mark an
|
2011-08-31 20:21:30 +00:00
|
|
|
OSD down rather than marking it out. (The thrasher will not
|
|
|
|
consider that OSD out of the cluster, since presently an OSD
|
|
|
|
wrongly marked down will mark itself back up again.) This value
|
|
|
|
can be either an integer (eg, 75) or a float probability (eg
|
|
|
|
0.75).
|
2011-09-09 01:09:11 +00:00
|
|
|
|
2012-11-07 20:36:37 +00:00
|
|
|
chance_test_min_size: (0) chance to run test_pool_min_size,
|
|
|
|
which:
|
|
|
|
- kills all but one osd
|
|
|
|
- waits
|
|
|
|
- kills that osd
|
|
|
|
- revives all other osds
|
|
|
|
- verifies that the osds fully recover
|
|
|
|
|
2011-09-09 01:09:11 +00:00
|
|
|
timeout: (360) the number of seconds to wait for the cluster
|
2011-11-17 19:11:33 +00:00
|
|
|
to become clean after each cluster change. If this doesn't
|
|
|
|
happen within the timeout, an exception will be raised.
|
2011-09-09 01:09:11 +00:00
|
|
|
|
2014-08-25 15:51:40 +00:00
|
|
|
revive_timeout: (150) number of seconds to wait for an osd asok to
|
2013-05-06 21:10:11 +00:00
|
|
|
appear after attempting to revive the osd
|
|
|
|
|
2014-02-17 21:16:42 +00:00
|
|
|
thrash_primary_affinity: (true) randomly adjust primary-affinity
|
|
|
|
|
2012-12-11 22:21:48 +00:00
|
|
|
chance_pgnum_grow: (0) chance to increase a pool's size
|
|
|
|
chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
|
|
|
|
pool_grow_by: (10) amount to increase pgnum by
|
|
|
|
max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
|
|
|
|
|
2013-01-24 00:13:22 +00:00
|
|
|
pause_short: (3) duration of short pause
|
2013-01-24 20:50:24 +00:00
|
|
|
pause_long: (80) duration of long pause
|
|
|
|
pause_check_after: (50) assert osd down after this long
|
2013-01-24 00:13:22 +00:00
|
|
|
chance_inject_pause_short: (1) chance of injecting short stall
|
2013-01-25 01:31:38 +00:00
|
|
|
chance_inject_pause_long: (0) chance of injecting long stall
|
2013-01-24 00:13:22 +00:00
|
|
|
|
2013-07-22 23:24:41 +00:00
|
|
|
clean_wait: (0) duration to wait before resuming thrashing once clean
|
|
|
|
|
2015-07-28 19:11:14 +00:00
|
|
|
sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
|
|
|
|
random live osd
|
|
|
|
|
2013-01-23 02:13:19 +00:00
|
|
|
powercycle: (false) whether to power cycle the node instead
|
2013-01-31 17:14:06 +00:00
|
|
|
of just the osd process. Note that this assumes that a single
|
|
|
|
osd is the only important process on the node.
|
2013-01-23 02:13:19 +00:00
|
|
|
|
2013-03-21 21:37:38 +00:00
|
|
|
chance_test_backfill_full: (0) chance to simulate full disks stopping
|
|
|
|
backfill
|
|
|
|
|
2013-07-26 02:43:08 +00:00
|
|
|
chance_test_map_discontinuity: (0) chance to test map discontinuity
|
|
|
|
map_discontinuity_sleep_time: (40) time to wait for map trims
|
|
|
|
|
2014-08-04 20:07:19 +00:00
|
|
|
ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
|
2014-08-14 18:46:29 +00:00
|
|
|
chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
|
2014-08-04 20:07:19 +00:00
|
|
|
|
2016-03-15 18:29:17 +00:00
|
|
|
optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
|
|
|
|
enablement to all osds
|
|
|
|
|
|
|
|
dump_ops_enable: (true) continuously dump ops on all live osds
|
|
|
|
|
2016-04-11 23:37:07 +00:00
|
|
|
noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
|
|
|
|
|
2011-06-13 23:36:21 +00:00
|
|
|
example:
|
|
|
|
|
|
|
|
tasks:
|
|
|
|
- ceph:
|
2011-07-12 01:00:03 +00:00
|
|
|
- thrashosds:
|
2016-03-23 20:05:15 +00:00
|
|
|
cluster: ceph
|
2011-08-31 20:21:30 +00:00
|
|
|
chance_down: 10
|
|
|
|
op_delay: 3
|
|
|
|
min_in: 1
|
2011-09-09 01:09:11 +00:00
|
|
|
timeout: 600
|
2011-06-13 23:36:21 +00:00
|
|
|
- interactive:
|
|
|
|
"""
|
2011-09-09 17:31:08 +00:00
|
|
|
if config is None:
|
|
|
|
config = {}
|
|
|
|
assert isinstance(config, dict), \
|
|
|
|
'thrashosds task only accepts a dict for configuration'
|
2015-07-28 19:11:14 +00:00
|
|
|
# add default value for sighup_delay
|
|
|
|
config['sighup_delay'] = config.get('sighup_delay', 0.1)
|
2016-03-15 18:29:17 +00:00
|
|
|
# add default value for optrack_toggle_delay
|
|
|
|
config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
|
|
|
|
# add default value for dump_ops_enable
|
|
|
|
config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
|
2016-04-11 23:37:07 +00:00
|
|
|
# add default value for noscrub_toggle_delay
|
|
|
|
config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
|
2014-10-22 18:19:01 +00:00
|
|
|
overrides = ctx.config.get('overrides', {})
|
|
|
|
teuthology.deep_merge(config, overrides.get('thrashosds', {}))
|
2016-03-23 20:05:15 +00:00
|
|
|
cluster = config.get('cluster', 'ceph')
|
2013-03-13 15:11:06 +00:00
|
|
|
|
|
|
|
if 'powercycle' in config:
|
|
|
|
|
2013-05-20 19:26:49 +00:00
|
|
|
# sync everyone first to avoid collateral damage to / etc.
|
|
|
|
log.info('Doing preliminary sync to avoid collateral damage...')
|
|
|
|
ctx.cluster.run(args=['sync'])
|
|
|
|
|
2013-03-13 15:11:06 +00:00
|
|
|
if 'ipmi_user' in ctx.teuthology_config:
|
|
|
|
for t, key in ctx.config['targets'].iteritems():
|
|
|
|
host = t.split('@')[-1]
|
|
|
|
shortname = host.split('.')[0]
|
2014-08-07 14:24:59 +00:00
|
|
|
from teuthology.orchestra import remote as oremote
|
2013-06-07 01:43:43 +00:00
|
|
|
console = oremote.getRemoteConsole(
|
2013-03-21 21:10:13 +00:00
|
|
|
name=host,
|
|
|
|
ipmiuser=ctx.teuthology_config['ipmi_user'],
|
|
|
|
ipmipass=ctx.teuthology_config['ipmi_password'],
|
|
|
|
ipmidomain=ctx.teuthology_config['ipmi_domain'])
|
|
|
|
cname = '{host}.{domain}'.format(
|
|
|
|
host=shortname,
|
|
|
|
domain=ctx.teuthology_config['ipmi_domain'])
|
2013-03-13 15:11:06 +00:00
|
|
|
log.debug('checking console status of %s' % cname)
|
|
|
|
if not console.check_status():
|
2013-03-21 21:10:13 +00:00
|
|
|
log.info(
|
|
|
|
'Failed to get console status for '
|
|
|
|
'%s, disabling console...'
|
|
|
|
% cname)
|
2013-03-13 15:11:06 +00:00
|
|
|
console=None
|
|
|
|
else:
|
|
|
|
# find the remote for this console and add it
|
2013-03-21 21:10:13 +00:00
|
|
|
remotes = [
|
|
|
|
r for r in ctx.cluster.remotes.keys() if r.name == t]
|
2013-03-13 15:11:06 +00:00
|
|
|
if len(remotes) != 1:
|
2013-03-21 21:10:13 +00:00
|
|
|
raise Exception(
|
|
|
|
'Too many (or too few) remotes '
|
|
|
|
'found for target {t}'.format(t=t))
|
2013-03-13 15:11:06 +00:00
|
|
|
remotes[0].console = console
|
|
|
|
log.debug('console ready on %s' % cname)
|
|
|
|
|
|
|
|
# check that all osd remotes have a valid console
|
2016-03-23 20:05:15 +00:00
|
|
|
osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
|
2013-03-13 15:11:06 +00:00
|
|
|
for remote, _ in osds.remotes.iteritems():
|
|
|
|
if not remote.console:
|
2013-03-21 21:10:13 +00:00
|
|
|
raise Exception(
|
|
|
|
'IPMI console required for powercycling, '
|
|
|
|
'but not available on osd role: {r}'.format(
|
|
|
|
r=remote.name))
|
2013-03-13 15:11:06 +00:00
|
|
|
|
2011-06-13 23:36:21 +00:00
|
|
|
log.info('Beginning thrashosds...')
|
2016-03-23 20:08:33 +00:00
|
|
|
cluster_manager = ctx.managers[cluster]
|
2011-06-13 23:36:21 +00:00
|
|
|
thrash_proc = ceph_manager.Thrasher(
|
2016-03-23 20:08:33 +00:00
|
|
|
cluster_manager,
|
2011-08-25 22:18:42 +00:00
|
|
|
config,
|
|
|
|
logger=log.getChild('thrasher')
|
2011-06-13 23:36:21 +00:00
|
|
|
)
|
|
|
|
try:
|
|
|
|
yield
|
|
|
|
finally:
|
|
|
|
log.info('joining thrashosds')
|
|
|
|
thrash_proc.do_join()
|
2016-03-23 20:08:33 +00:00
|
|
|
cluster_manager.wait_for_recovery(config.get('timeout', 360))
|