Merge PR #21857 into mimic

* refs/pull/21857/head:
	qa: move snap-hierarchy out of snaps workunits
	qa: try snap format upgrade with multimds cluster
	qa: add mds deactivation procedure for upgrades
This commit is contained in:
Patrick Donnelly 2018-05-07 07:40:24 -07:00
commit 574906d323
No known key found for this signature in database
GPG Key ID: 3A2A7E25BEA8AADB
8 changed files with 76 additions and 41 deletions

View File

@ -0,0 +1,3 @@
overrides:
ceph:
max_mds: 2

View File

@ -9,5 +9,5 @@ tasks:
cleanup: false
clients:
client.0:
- fs/snaps/snap-hierarchy.sh
- fs/snap-hierarchy.sh
- print: "**** done snap hierarchy"

View File

@ -6,5 +6,5 @@ tasks:
VERIFY: verify
clients:
client.0:
- fs/snaps/snap-hierarchy.sh
- fs/snap-hierarchy.sh
- print: "**** done verify snap hierarchy"

View File

@ -9,6 +9,7 @@ import datetime
import re
import errno
import random
import traceback
from teuthology.exceptions import CommandFailedError
from teuthology import misc
@ -432,6 +433,42 @@ class Filesystem(MDSCluster):
raise RuntimeError("cannot specify fscid when configuring overlay")
self.metadata_overlay = overlay
def deactivate(self, rank):
if rank < 0:
raise RuntimeError("invalid rank")
elif rank == 0:
raise RuntimeError("cannot deactivate rank 0")
self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank))
def reach_max_mds(self):
# Try to reach rank count == max_mds, up or down (UPGRADE SENSITIVE!)
status = self.getinfo()
mds_map = self.get_mds_map(status=status)
max_mds = mds_map['max_mds']
count = len(list(self.get_ranks(status=status)))
if count > max_mds:
try:
# deactivate mds in decending order
status = self.wait_for_daemons(status=status, skip_max_mds_check=True)
while count > max_mds:
targets = sorted(self.get_ranks(status=status), key=lambda r: r['rank'], reverse=True)
target = targets[0]
log.info("deactivating rank %d" % target['rank'])
self.deactivate(target['rank'])
status = self.wait_for_daemons(skip_max_mds_check=True)
count = len(list(self.get_ranks(status=status)))
except:
# In Mimic, deactivation is done automatically:
log.info("Error:\n{}".format(traceback.format_exc()))
status = self.wait_for_daemons()
else:
status = self.wait_for_daemons()
mds_map = self.get_mds_map(status=status)
assert(mds_map['max_mds'] == max_mds)
assert(mds_map['in'] == list(range(0, max_mds)))
def set_var(self, var, *args):
a = map(str, args)
self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
@ -631,7 +668,7 @@ class Filesystem(MDSCluster):
def get_usage(self):
return self._df()['stats']['total_used_bytes']
def are_daemons_healthy(self, status=None):
def are_daemons_healthy(self, status=None, skip_max_mds_check=False):
"""
Return true if all daemons are in one of active, standby, standby-replay, and
at least max_mds daemons are in 'active'.
@ -671,30 +708,34 @@ class Filesystem(MDSCluster):
active_count, mds_map['max_mds']
))
if active_count > mds_map['max_mds']:
log.info("are_daemons_healthy: number of actives is grater than max_mds: {0}".format(mds_map))
return False
elif active_count == mds_map['max_mds']:
# The MDSMap says these guys are active, but let's check they really are
for mds_id, mds_status in mds_map['info'].items():
if mds_status['state'] == 'up:active':
try:
daemon_status = self.mds_asok(["status"], mds_id=mds_status['name'])
except CommandFailedError as cfe:
if cfe.exitstatus == errno.EINVAL:
# Old version, can't do this check
continue
else:
# MDS not even running
if not skip_max_mds_check:
if active_count > mds_map['max_mds']:
log.info("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map))
return False
elif active_count == mds_map['max_mds']:
# The MDSMap says these guys are active, but let's check they really are
for mds_id, mds_status in mds_map['info'].items():
if mds_status['state'] == 'up:active':
try:
daemon_status = self.mds_asok(["status"], mds_id=mds_status['name'])
except CommandFailedError as cfe:
if cfe.exitstatus == errno.EINVAL:
# Old version, can't do this check
continue
else:
# MDS not even running
return False
if daemon_status['state'] != 'up:active':
# MDS hasn't taken the latest map yet
return False
if daemon_status['state'] != 'up:active':
# MDS hasn't taken the latest map yet
return False
return True
return True
else:
return False
else:
return False
log.info("are_daemons_healthy: skipping max_mds check")
return True
def get_daemon_names(self, state=None, status=None):
"""
@ -753,7 +794,7 @@ class Filesystem(MDSCluster):
return result
def wait_for_daemons(self, timeout=None):
def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None):
"""
Wait until all daemons are healthy
:return:
@ -762,10 +803,12 @@ class Filesystem(MDSCluster):
if timeout is None:
timeout = DAEMON_WAIT_TIMEOUT
if status is None:
status = self.status()
elapsed = 0
while True:
status = self.status()
if self.are_daemons_healthy(status=status):
if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check):
return status
else:
time.sleep(1)
@ -775,6 +818,8 @@ class Filesystem(MDSCluster):
log.info("status = {0}".format(status))
raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
status = self.status()
def get_lone_mds_id(self):
"""
Get a single MDS ID: the only one if there is only one

View File

@ -25,20 +25,7 @@ def task(ctx, config):
status = fs.getinfo()
fs.set_max_mds(1)
status = fs.getinfo()
targets = filter(lambda r: r['rank'] >= 1, fs.get_ranks(status=status))
if len(targets) > 0:
# deactivate mds in decending order
targets = sorted(targets, key=lambda r: r['rank'], reverse=True)
for target in targets:
self.log("deactivating rank %d" % target['rank'])
self.fs.deactivate(target['rank'])
status = self.wait_for_stable()[0]
else:
status = self.wait_for_stable()[0]
assert(fs.get_mds_map(status=status)['max_mds'] == 1)
assert(fs.get_mds_map(status=status)['in'] == [0])
fs.reach_max_mds()
# Stop standbys now to minimize time rank 0 is down in subsequent:
# tasks: