Merge PR #28825 into master

* refs/pull/28825/head:
	qa: wait for kernel client death
	qa: use hard_reset to reboot kclient

Reviewed-by: David Galloway <dgallowa@redhat.com>
This commit is contained in:
Patrick Donnelly 2019-07-29 13:08:29 -07:00
commit 2a82081f77
No known key found for this signature in database
GPG Key ID: 3A2A7E25BEA8AADB
2 changed files with 22 additions and 8 deletions

View File

@ -1,6 +1,7 @@
from StringIO import StringIO from StringIO import StringIO
import json import json
import logging import logging
import time
from textwrap import dedent from textwrap import dedent
from teuthology.orchestra.run import CommandFailedError from teuthology.orchestra.run import CommandFailedError
from teuthology import misc from teuthology import misc
@ -176,21 +177,31 @@ class KernelMount(CephFSMount):
self.ipmi_user, self.ipmi_user,
self.ipmi_password, self.ipmi_password,
self.ipmi_domain) self.ipmi_domain)
con.power_off() con.hard_reset(wait_for_login=False)
self.mounted = False self.mounted = False
def kill_cleanup(self): def kill_cleanup(self):
assert not self.mounted assert not self.mounted
con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, # We need to do a sleep here because we don't know how long it will
self.ipmi_user, # take for a hard_reset to be effected.
self.ipmi_password, time.sleep(30)
self.ipmi_domain)
con.power_on()
# Wait for node to come back up after reboot try:
misc.reconnect(None, 300, [self.client_remote]) # Wait for node to come back up after reboot
misc.reconnect(None, 300, [self.client_remote])
except:
# attempt to get some useful debug output:
con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
self.ipmi_user,
self.ipmi_password,
self.ipmi_domain)
con.check_status(timeout=60)
raise
# Remove mount directory
self.client_remote.run(args=['uptime'], timeout=10)
# Remove mount directory # Remove mount directory
self.client_remote.run( self.client_remote.run(

View File

@ -297,6 +297,9 @@ class TestClientRecovery(CephFSTestCase):
# Simulate client death # Simulate client death
self.mount_a.kill() self.mount_a.kill()
# wait for it to die so it doesn't voluntarily release buffer cap
time.sleep(5)
try: try:
# The waiter should get stuck waiting for the capability # The waiter should get stuck waiting for the capability
# held on the MDS by the now-dead client A # held on the MDS by the now-dead client A