Merge PR #21712 into master

* refs/pull/21712/head:
	qa/tasks/cephfs: add test for renewing stale session
	client: invalidate caps and leases when session becomes stale
	client: fix race in concurrent readdir

Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
Patrick Donnelly 2018-05-07 15:53:32 -07:00
commit e510e29e01
No known key found for this signature in database
GPG Key ID: 3A2A7E25BEA8AADB
5 changed files with 52 additions and 1 deletions

View File

@ -10,5 +10,6 @@ overrides:
tasks:
- cephfs_test_runner:
fail_on_skip: false
modules:
- tasks.cephfs.test_client_recovery

View File

@ -10,5 +10,6 @@ overrides:
tasks:
- cephfs_test_runner:
fail_on_skip: false
modules:
- tasks.cephfs.test_client_recovery

View File

@ -409,9 +409,15 @@ print find_socket("{client_name}")
"""
Look up the CephFS client ID for this mount
"""
return self.admin_socket(['mds_sessions'])['id']
def get_client_pid(self):
"""
return pid of ceph-fuse process
"""
status = self.admin_socket(['status'])
return status['metadata']['pid']
def get_osd_epoch(self):
"""
Return 2-tuple of osd_epoch, osd_epoch_barrier

View File

@ -11,8 +11,10 @@ import re
import os
from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
from tasks.cephfs.fuse_mount import FuseMount
from tasks.cephfs.cephfs_test_case import CephFSTestCase
from teuthology.packaging import get_package_version
from unittest import SkipTest
log = logging.getLogger(__name__)
@ -483,3 +485,37 @@ class TestClientRecovery(CephFSTestCase):
self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
self.mount_a.umount_wait(require_clean=True, timeout=30)
def test_stale_renew(self):
if not isinstance(self.mount_a, FuseMount):
raise SkipTest("Require FUSE client to handle signal STOP/CONT")
session_timeout = self.fs.get_var("session_timeout")
self.mount_a.run_shell(["mkdir", "testdir"])
self.mount_a.run_shell(["touch", "testdir/file1"])
# populate readdir cache
self.mount_a.run_shell(["ls", "testdir"])
self.mount_b.run_shell(["ls", "testdir"])
# check if readdir cache is effective
initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
self.mount_b.run_shell(["ls", "testdir"])
current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
self.assertEqual(current_readdirs, initial_readdirs);
mount_b_gid = self.mount_b.get_global_id()
mount_b_pid = self.mount_b.get_client_pid()
# stop ceph-fuse process of mount_b
self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid])
self.assert_session_state(mount_b_gid, "open")
time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
self.assert_session_state(mount_b_gid, "stale")
self.mount_a.run_shell(["touch", "testdir/file2"])
# resume ceph-fuse process of mount_b
self.mount_b.client_remote.run(args=["sudo", "kill", "-CONT", mount_b_pid])
# Is the new file visible from mount_b? (caps become invalid after session stale)
self.mount_b.run_shell(["ls", "testdir/file2"])

View File

@ -2075,6 +2075,10 @@ void Client::handle_client_session(MClientSession *m)
break;
case CEPH_SESSION_STALE:
// invalidate session caps/leases
session->cap_gen++;
session->cap_ttl = ceph_clock_now();
session->cap_ttl -= 1;
renew_caps(session);
break;
@ -3893,6 +3897,7 @@ void Client::add_update_cap(Inode *in, MetaSession *mds_session, uint64_t cap_id
cap.seq = seq;
cap.issue_seq = seq;
cap.mseq = mseq;
cap.gen = mds_session->cap_gen;
cap.latest_perms = cap_perms;
ldout(cct, 10) << __func__ << " issued " << ccap_string(old_caps) << " -> " << ccap_string(cap.issued)
<< " from mds." << mds
@ -5028,6 +5033,7 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
<< " caps now " << ccap_string(new_caps)
<< " was " << ccap_string(old_caps) << dendl;
cap->seq = m->get_seq();
cap->gen = session->cap_gen;
in->layout = m->get_layout();
@ -7723,6 +7729,7 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
else
dirp->next_offset = dirp->offset_low();
dirp->last_name = dn_name; // we successfully returned this one; update!
dirp->release_count = 0; // last_name no longer match cache index
if (r > 0)
return r;
}