2013-10-12 08:28:27 +00:00
"""
Thrash mds by simulating failures
"""
2013-01-09 22:02:42 +00:00
import logging
import contextlib
import ceph_manager
2017-01-27 17:53:56 +00:00
import itertools
2013-01-09 22:02:42 +00:00
import random
import time
2016-01-08 10:23:20 +00:00
2017-02-02 00:20:35 +00:00
from gevent import sleep
2014-02-27 14:17:54 +00:00
from gevent . greenlet import Greenlet
from gevent . event import Event
2013-01-09 22:02:42 +00:00
from teuthology import misc as teuthology
2016-01-08 10:23:20 +00:00
from tasks . cephfs . filesystem import MDSCluster , Filesystem
2019-08-05 10:52:10 +00:00
from tasks . thrasher import Thrasher
2016-01-08 10:23:20 +00:00
2013-01-09 22:02:42 +00:00
log = logging . getLogger ( __name__ )
2019-08-23 09:59:27 +00:00
class MDSThrasher ( Greenlet , Thrasher ) :
2014-02-27 14:09:52 +00:00
"""
MDSThrasher : :
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
The MDSThrasher thrashes MDSs during execution of other tasks ( workunits , etc ) .
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
The config is optional . Many of the config parameters are a a maximum value
to use when selecting a random value from a range . To always use the maximum
value , set no_random to true . The config is a dict containing some or all of :
2013-01-09 22:02:42 +00:00
2016-08-30 21:23:46 +00:00
max_thrash : [ default : 1 ] the maximum number of active MDSs per FS that will be thrashed at
2014-02-27 14:09:52 +00:00
any given time .
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
max_thrash_delay : [ default : 30 ] maximum number of seconds to delay before
thrashing again .
2013-01-09 22:02:42 +00:00
2016-08-30 21:23:46 +00:00
max_replay_thrash_delay : [ default : 4 ] maximum number of seconds to delay while in
the replay state before thrashing .
2014-02-27 14:09:52 +00:00
max_revive_delay : [ default : 10 ] maximum number of seconds to delay before
2016-08-30 21:23:46 +00:00
bringing back a thrashed MDS .
2014-02-27 14:17:54 +00:00
2016-08-30 21:23:46 +00:00
randomize : [ default : true ] enables randomization and use the max / min values
2013-01-09 22:02:42 +00:00
2016-08-30 21:23:46 +00:00
seed : [ no default ] seed the random number generator
2013-01-09 22:02:42 +00:00
2016-08-30 21:23:46 +00:00
thrash_in_replay : [ default : 0.0 ] likelihood that the MDS will be thrashed
during replay . Value should be between 0.0 and 1.0 .
2017-07-07 05:25:48 +00:00
thrash_max_mds : [ default : 0.05 ] likelihood that the max_mds of the mds
2016-08-30 21:23:46 +00:00
cluster will be modified to a value [ 1 , current ) or ( current , starting
2018-04-14 00:22:29 +00:00
max_mds ] . Value should be between 0.0 and 1.0 .
2016-08-30 21:23:46 +00:00
2017-01-26 23:54:18 +00:00
thrash_while_stopping : [ default : false ] thrash an MDS while there
are MDS in up : stopping ( because max_mds was changed and some
MDS were deactivated ) .
2016-08-30 21:23:46 +00:00
thrash_weights : allows specific MDSs to be thrashed more / less frequently .
This option overrides anything specified by max_thrash . This option is a
dict containing mds . x : weight pairs . For example , [ mds . a : 0.7 , mds . b :
0.3 , mds . c : 0.0 ] . Each weight is a value from 0.0 to 1.0 . Any MDSs not
specified will be automatically given a weight of 0.0 ( not thrashed ) .
For a given MDS , by default the trasher delays for up to
max_thrash_delay , trashes , waits for the MDS to recover , and iterates .
If a non - zero weight is specified for an MDS , for each iteration the
thrasher chooses whether to thrash during that iteration based on a
random value [ 0 - 1 ] not exceeding the weight of that MDS .
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
Examples : :
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
The following example sets the likelihood that mds . a will be thrashed
to 80 % , mds . b to 20 % , and other MDSs will not be thrashed . It also sets the
likelihood that an MDS will be thrashed in replay to 40 % .
Thrash weights do not have to sum to 1.
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
tasks :
- ceph :
- mds_thrash :
thrash_weights :
- mds . a : 0.8
- mds . b : 0.2
thrash_in_replay : 0.4
- ceph - fuse :
- workunit :
clients :
all : [ suites / fsx . sh ]
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
The following example disables randomization , and uses the max delay values :
2013-01-09 22:02:42 +00:00
2014-02-27 14:09:52 +00:00
tasks :
- ceph :
- mds_thrash :
max_thrash_delay : 10
max_revive_delay : 1
max_replay_thrash_delay : 4
2013-01-09 22:02:42 +00:00
2013-10-12 08:28:27 +00:00
"""
2013-01-09 22:02:42 +00:00
2017-02-04 17:00:45 +00:00
def __init__ ( self , ctx , manager , config , fs , max_mds ) :
2019-08-05 10:52:10 +00:00
super ( MDSThrasher , self ) . __init__ ( )
2014-02-27 14:17:54 +00:00
2016-08-30 21:23:46 +00:00
self . config = config
2017-02-04 17:00:45 +00:00
self . ctx = ctx
self . logger = log . getChild ( ' fs.[ {f} ] ' . format ( f = fs . name ) )
2016-08-30 21:23:46 +00:00
self . fs = fs
2017-02-04 17:00:45 +00:00
self . manager = manager
2016-08-30 21:23:46 +00:00
self . max_mds = max_mds
2017-02-04 17:00:45 +00:00
self . name = ' thrasher.fs.[ {f} ] ' . format ( f = fs . name )
2014-02-27 14:17:54 +00:00
self . stopping = Event ( )
2014-02-27 14:09:52 +00:00
self . randomize = bool ( self . config . get ( ' randomize ' , True ) )
2017-07-07 05:25:48 +00:00
self . thrash_max_mds = float ( self . config . get ( ' thrash_max_mds ' , 0.05 ) )
2016-08-30 21:23:46 +00:00
self . max_thrash = int ( self . config . get ( ' max_thrash ' , 1 ) )
self . max_thrash_delay = float ( self . config . get ( ' thrash_delay ' , 120.0 ) )
2014-02-27 14:09:52 +00:00
self . thrash_in_replay = float ( self . config . get ( ' thrash_in_replay ' , False ) )
assert self . thrash_in_replay > = 0.0 and self . thrash_in_replay < = 1.0 , ' thrash_in_replay ( {v} ) must be between [0.0, 1.0] ' . format (
v = self . thrash_in_replay )
self . max_replay_thrash_delay = float ( self . config . get ( ' max_replay_thrash_delay ' , 4.0 ) )
self . max_revive_delay = float ( self . config . get ( ' max_revive_delay ' , 10.0 ) )
2014-02-27 14:17:54 +00:00
def _run ( self ) :
try :
self . do_thrash ( )
2017-02-04 17:00:45 +00:00
except Exception as e :
# Log exceptions here so we get the full backtrace (gevent loses them).
2018-03-10 14:36:31 +00:00
# Also allow successful completion as gevent exception handling is a broken mess:
2017-02-04 17:00:45 +00:00
#
# 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
# File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
# self.print_exception(context, type, value, tb)
# File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
# traceback.print_exception(type, value, tb, file=errstream)
# File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
# _print(file, 'Traceback (most recent call last):')
# File "/usr/lib/python2.7/traceback.py", line 13, in _print
# file.write(str+terminator)
# 2017-02-03T14:34:01.261 CRITICAL:root:IOError
2019-08-05 10:52:10 +00:00
self . exception = e
2017-02-04 17:00:45 +00:00
self . logger . exception ( " exception: " )
# allow successful completion so gevent doesn't see an exception...
2014-02-27 14:17:54 +00:00
2014-02-27 14:09:52 +00:00
def log ( self , x ) :
2019-08-20 09:49:55 +00:00
""" Write data to the logger assigned to MDSThrasher """
2014-02-27 14:09:52 +00:00
self . logger . info ( x )
2014-02-27 14:17:54 +00:00
def stop ( self ) :
self . stopping . set ( )
2014-02-27 14:09:52 +00:00
2016-01-11 19:43:40 +00:00
def kill_mds ( self , mds ) :
if self . config . get ( ' powercycle ' ) :
( remote , ) = ( self . ctx . cluster . only ( ' mds. {m} ' . format ( m = mds ) ) .
remotes . iterkeys ( ) )
self . log ( ' kill_mds on mds. {m} doing powercycle of {s} ' .
format ( m = mds , s = remote . name ) )
2016-08-24 17:10:08 +00:00
self . _assert_ipmi ( remote )
2016-01-11 19:43:40 +00:00
remote . console . power_off ( )
else :
self . ctx . daemons . get_daemon ( ' mds ' , mds ) . stop ( )
2016-08-24 17:10:08 +00:00
@staticmethod
def _assert_ipmi ( remote ) :
assert remote . console . has_ipmi_credentials , (
" powercycling requested but RemoteConsole is not "
" initialized. Check ipmi config. " )
2019-02-25 19:10:01 +00:00
def revive_mds ( self , mds ) :
2016-01-11 19:43:40 +00:00
"""
Revive mds - - do an ipmpi powercycle ( if indicated by the config )
2019-02-25 19:10:01 +00:00
and then restart .
2016-01-11 19:43:40 +00:00
"""
if self . config . get ( ' powercycle ' ) :
( remote , ) = ( self . ctx . cluster . only ( ' mds. {m} ' . format ( m = mds ) ) .
remotes . iterkeys ( ) )
self . log ( ' revive_mds on mds. {m} doing powercycle of {s} ' .
format ( m = mds , s = remote . name ) )
2016-08-24 17:10:08 +00:00
self . _assert_ipmi ( remote )
2016-01-11 19:43:40 +00:00
remote . console . power_on ( )
self . manager . make_admin_daemon_dir ( self . ctx , remote )
args = [ ]
self . ctx . daemons . get_daemon ( ' mds ' , mds ) . restart ( * args )
2016-08-30 21:23:46 +00:00
def wait_for_stable ( self , rank = None , gid = None ) :
self . log ( ' waiting for mds cluster to stabilize... ' )
2017-01-27 17:53:56 +00:00
for itercount in itertools . count ( ) :
2017-01-26 05:57:40 +00:00
status = self . fs . status ( )
2016-08-30 21:23:46 +00:00
max_mds = status . get_fsmap ( self . fs . id ) [ ' mdsmap ' ] [ ' max_mds ' ]
2017-01-26 05:57:40 +00:00
ranks = list ( status . get_ranks ( self . fs . id ) )
2017-01-26 23:54:18 +00:00
stopping = filter ( lambda info : " up:stopping " == info [ ' state ' ] , ranks )
2017-01-26 05:57:40 +00:00
actives = filter ( lambda info : " up:active " == info [ ' state ' ] and " laggy_since " not in info , ranks )
2017-01-26 23:54:18 +00:00
if not bool ( self . config . get ( ' thrash_while_stopping ' , False ) ) and len ( stopping ) > 0 :
if itercount % 5 == 0 :
self . log ( ' cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping) ' )
2016-08-30 21:23:46 +00:00
else :
2017-01-26 23:54:18 +00:00
if rank is not None :
try :
info = status . get_rank ( self . fs . id , rank )
if info [ ' gid ' ] != gid and " up:active " == info [ ' state ' ] :
self . log ( ' mds. {name} has gained rank= {rank} , replacing gid= {gid} ' . format ( name = info [ ' name ' ] , rank = rank , gid = gid ) )
return status
except :
pass # no rank present
2017-02-04 17:00:45 +00:00
if len ( actives ) > = max_mds :
# no replacement can occur!
self . log ( " cluster has %d actives (max_mds is %d ), no MDS can replace rank %d " . format ( len ( actives ) , max_mds , rank ) )
return status
2017-01-26 23:54:18 +00:00
else :
2018-04-14 00:22:29 +00:00
if len ( actives ) == max_mds :
2017-01-26 23:54:18 +00:00
self . log ( ' mds cluster has {count} alive and active, now stable! ' . format ( count = len ( actives ) ) )
return status , None
2017-01-26 05:25:48 +00:00
if itercount > 300 / 2 : # 5 minutes
raise RuntimeError ( ' timeout waiting for cluster to stabilize ' )
2017-01-26 23:54:18 +00:00
elif itercount % 5 == 0 :
2017-02-04 17:00:45 +00:00
self . log ( ' mds map: {status} ' . format ( status = status ) )
else :
self . log ( ' no change ' )
2017-02-02 00:20:35 +00:00
sleep ( 2 )
2016-01-11 19:43:40 +00:00
2014-02-27 14:09:52 +00:00
def do_thrash ( self ) :
"""
Perform the random thrashing action
"""
2016-01-08 10:23:20 +00:00
2016-08-30 21:23:46 +00:00
self . log ( ' starting mds_do_thrash for fs {fs} ' . format ( fs = self . fs . name ) )
stats = {
" max_mds " : 0 ,
" deactivate " : 0 ,
" kill " : 0 ,
}
2014-02-27 14:17:54 +00:00
while not self . stopping . is_set ( ) :
2014-02-27 14:09:52 +00:00
delay = self . max_thrash_delay
if self . randomize :
delay = random . randrange ( 0.0 , self . max_thrash_delay )
if delay > 0.0 :
self . log ( ' waiting for {delay} secs before thrashing ' . format ( delay = delay ) )
2014-02-27 14:17:54 +00:00
self . stopping . wait ( delay )
if self . stopping . is_set ( ) :
continue
2014-02-27 14:09:52 +00:00
2016-08-30 21:23:46 +00:00
status = self . fs . status ( )
2017-02-04 16:58:56 +00:00
if random . random ( ) < = self . thrash_max_mds :
2016-08-30 21:23:46 +00:00
max_mds = status . get_fsmap ( self . fs . id ) [ ' mdsmap ' ] [ ' max_mds ' ]
options = range ( 1 , max_mds ) + range ( max_mds + 1 , self . max_mds + 1 )
if len ( options ) > 0 :
sample = random . sample ( options , 1 )
new_max_mds = sample [ 0 ]
self . log ( ' thrashing max_mds: %d -> %d ' % ( max_mds , new_max_mds ) )
self . fs . set_max_mds ( new_max_mds )
stats [ ' max_mds ' ] + = 1
2018-04-14 00:22:29 +00:00
self . wait_for_stable ( )
2016-08-30 21:23:46 +00:00
count = 0
for info in status . get_ranks ( self . fs . id ) :
name = info [ ' name ' ]
label = ' mds. ' + name
rank = info [ ' rank ' ]
gid = info [ ' gid ' ]
# if thrash_weights isn't specified and we've reached max_thrash,
# we're done
count = count + 1
if ' thrash_weights ' not in self . config and count > self . max_thrash :
2014-02-27 14:09:52 +00:00
break
2016-08-30 21:23:46 +00:00
weight = 1.0
if ' thrash_weights ' in self . config :
weight = self . config [ ' thrash_weights ' ] . get ( label , ' 0.0 ' )
skip = random . randrange ( 0.0 , 1.0 )
if weight < = skip :
self . log ( ' skipping thrash iteration with skip ( {skip} ) > weight ( {weight} ) ' . format ( skip = skip , weight = weight ) )
continue
2014-02-27 14:09:52 +00:00
2016-08-30 21:23:46 +00:00
self . log ( ' kill {label} (rank= {rank} ) ' . format ( label = label , rank = rank ) )
self . kill_mds ( name )
stats [ ' kill ' ] + = 1
# wait for mon to report killed mds as crashed
last_laggy_since = None
itercount = 0
while True :
status = self . fs . status ( )
info = status . get_mds ( name )
if not info :
break
if ' laggy_since ' in info :
last_laggy_since = info [ ' laggy_since ' ]
break
if any ( [ ( f == name ) for f in status . get_fsmap ( self . fs . id ) [ ' mdsmap ' ] [ ' failed ' ] ] ) :
break
self . log (
' waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap ' . format (
label = label ) )
itercount = itercount + 1
if itercount > 10 :
self . log ( ' mds map: {status} ' . format ( status = status ) )
2017-02-02 00:20:35 +00:00
sleep ( 2 )
2016-08-30 21:23:46 +00:00
if last_laggy_since :
self . log (
' {label} reported laggy/crashed since: {since} ' . format ( label = label , since = last_laggy_since ) )
else :
self . log ( ' {label} down, removed from mdsmap ' . format ( label = label , since = last_laggy_since ) )
# wait for a standby mds to takeover and become active
2017-01-26 05:57:40 +00:00
status = self . wait_for_stable ( rank , gid )
2016-08-30 21:23:46 +00:00
# wait for a while before restarting old active to become new
# standby
2014-02-27 14:09:52 +00:00
delay = self . max_revive_delay
if self . randomize :
delay = random . randrange ( 0.0 , self . max_revive_delay )
2016-08-30 21:23:46 +00:00
self . log ( ' waiting for {delay} secs before reviving {label} ' . format (
delay = delay , label = label ) )
2017-02-02 00:20:35 +00:00
sleep ( delay )
2014-02-27 14:09:52 +00:00
2016-08-30 21:23:46 +00:00
self . log ( ' reviving {label} ' . format ( label = label ) )
self . revive_mds ( name )
2017-01-27 17:53:56 +00:00
for itercount in itertools . count ( ) :
if itercount > 300 / 2 : # 5 minutes
raise RuntimeError ( ' timeout waiting for MDS to revive ' )
2016-08-30 21:23:46 +00:00
status = self . fs . status ( )
info = status . get_mds ( name )
2017-01-27 17:54:39 +00:00
if info and info [ ' state ' ] in ( ' up:standby ' , ' up:standby-replay ' , ' up:active ' ) :
2016-08-30 21:23:46 +00:00
self . log ( ' {label} reported in {state} state ' . format ( label = label , state = info [ ' state ' ] ) )
break
self . log (
2017-01-27 17:54:39 +00:00
' waiting till mds map indicates {label} is in active, standby or standby-replay ' . format ( label = label ) )
2017-02-02 00:20:35 +00:00
sleep ( 2 )
2016-08-30 21:23:46 +00:00
for stat in stats :
self . log ( " stat[ ' {key} ' ] = {value} " . format ( key = stat , value = stats [ stat ] ) )
# don't do replay thrashing right now
# for info in status.get_replays(self.fs.id):
# # this might race with replay -> active transition...
# if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
# delay = self.max_replay_thrash_delay
# if self.randomize:
# delay = random.randrange(0.0, self.max_replay_thrash_delay)
2017-02-02 00:20:35 +00:00
# sleep(delay)
2016-08-30 21:23:46 +00:00
# self.log('kill replaying mds.{id}'.format(id=self.to_kill))
# self.kill_mds(self.to_kill)
#
# delay = self.max_revive_delay
# if self.randomize:
# delay = random.randrange(0.0, self.max_revive_delay)
#
# self.log('waiting for {delay} secs before reviving mds.{id}'.format(
# delay=delay, id=self.to_kill))
2017-02-02 00:20:35 +00:00
# sleep(delay)
2016-08-30 21:23:46 +00:00
#
# self.log('revive mds.{id}'.format(id=self.to_kill))
# self.revive_mds(self.to_kill)
2013-01-09 22:02:42 +00:00
2014-01-27 21:10:54 +00:00
2013-01-09 22:02:42 +00:00
@contextlib.contextmanager
def task ( ctx , config ) :
2014-01-27 21:10:54 +00:00
"""
Stress test the mds by thrashing while another task / workunit
is running .
2013-01-09 22:02:42 +00:00
2014-01-27 21:10:54 +00:00
Please refer to MDSThrasher class for further information on the
available options .
"""
2016-01-08 10:23:20 +00:00
mds_cluster = MDSCluster ( ctx )
2014-01-27 21:10:54 +00:00
if config is None :
config = { }
assert isinstance ( config , dict ) , \
' mds_thrash task only accepts a dict for configuration '
mdslist = list ( teuthology . all_roles_of_type ( ctx . cluster , ' mds ' ) )
assert len ( mdslist ) > 1 , \
' mds_thrash task requires at least 2 metadata servers '
# choose random seed
if ' seed ' in config :
seed = int ( config [ ' seed ' ] )
else :
seed = int ( time . time ( ) )
log . info ( ' mds thrasher using random seed: {seed} ' . format ( seed = seed ) )
random . seed ( seed )
2014-03-27 16:35:28 +00:00
( first , ) = ctx . cluster . only ( ' mds. {_id} ' . format ( _id = mdslist [ 0 ] ) ) . remotes . iterkeys ( )
2014-01-27 21:10:54 +00:00
manager = ceph_manager . CephManager (
first , ctx = ctx , logger = log . getChild ( ' ceph_manager ' ) ,
2013-01-09 22:02:42 +00:00
)
2014-01-27 21:10:54 +00:00
# make sure everyone is in active, standby, or standby-replay
log . info ( ' Wait for all MDSs to reach steady state... ' )
2016-08-30 21:23:46 +00:00
status = mds_cluster . status ( )
2014-01-27 21:10:54 +00:00
while True :
2016-08-30 21:23:46 +00:00
steady = True
for info in status . get_all ( ) :
state = info [ ' state ' ]
if state not in ( ' up:active ' , ' up:standby ' , ' up:standby-replay ' ) :
steady = False
break
if steady :
2014-01-27 21:10:54 +00:00
break
2017-02-02 00:20:35 +00:00
sleep ( 2 )
2016-08-30 21:23:46 +00:00
status = mds_cluster . status ( )
2014-01-27 21:10:54 +00:00
log . info ( ' Ready to start thrashing ' )
2014-02-27 14:17:54 +00:00
manager . wait_for_clean ( )
2017-02-04 17:00:45 +00:00
assert manager . is_clean ( )
2019-06-06 11:20:18 +00:00
if ' cluster ' not in config :
config [ ' cluster ' ] = ' ceph '
2016-08-30 21:23:46 +00:00
for fs in status . get_filesystems ( ) :
2017-02-04 17:00:45 +00:00
thrasher = MDSThrasher ( ctx , manager , config , Filesystem ( ctx , fs [ ' id ' ] ) , fs [ ' mdsmap ' ] [ ' max_mds ' ] )
2014-02-27 14:17:54 +00:00
thrasher . start ( )
2019-06-06 11:20:18 +00:00
ctx . ceph [ config [ ' cluster ' ] ] . thrashers . append ( thrasher )
2014-01-27 21:10:54 +00:00
try :
log . debug ( ' Yielding ' )
yield
finally :
2019-06-07 11:55:54 +00:00
log . info ( ' joining mds_thrasher ' )
thrasher . stop ( )
2019-08-05 10:52:10 +00:00
if thrasher . exception is not None :
2019-06-07 11:55:54 +00:00
raise RuntimeError ( ' error during thrashing ' )
thrasher . join ( )
2014-01-27 21:10:54 +00:00
log . info ( ' done joining ' )