diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index 4aad8cfe360..e4f750dbd51 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -53,8 +53,8 @@ mds / - clean up multi-auth_pin code paths (e.g. link_local) - FIXME how to journal root and stray inode content? - - in particular, i care about dirfragtree! - - and dir sizes, if i add that... + - in particular, i care about dirfragtree.. get it on rejoin? + - and dir sizes, if i add that... also on rejoin? - mds failure vs clients / - clean up client op redirection @@ -68,8 +68,8 @@ mds / - local unlink - local rename / - fix dir renames vs subtrees - - how notify replicas... - - stray purge + - how to notify replicas... +/ - stray purge - stray reintegration - remote link - remote unlink @@ -88,6 +88,41 @@ mds - statfs? +foreign link +- + +foreign rename +- question: can we generalize foreign and local rename? +- initiated by dest. + - if we get into race with lock acquisition, drop locks and forward to new dest. +- how to do pre-auth pinning? + - is it sufficient to wait on, then grab, all local auth pins, _then_ do foreign locks? + - local auth pins can hold subtrees in freezing state, preventing exports, and additional auth_pins. + - so, wait, then grab all local auth_pins, + - then work on locks in proper order (*), + - if we detect we are missing a local auth_pin (i.e. migration race), drop all auth_pins and wait/restart + - need to more carefully look at lock dependencies to avoid deadlock... + - establish a complete full ordering on locks, based on any lock dependencies? + - is it possible to "leak" locks, e.g. get inode_hard lock, work on something else, but inode moves and we dont notice? + - pin paths for those locks? + - can we pin when we choose order, so that locks are sure to proceed? +- we can change active_requests to key of reqid (also unique), and use the same key for foreign locks + - clean up dentry_xlock_request.. just merge it into destroy_xlock_start, if !is_auth(). +- renamer will + - check preconditions (i.e. i am dest) + - grab all locks (avoiding deadlock) + - verify preconditions are still true, else forward/retry (actually, this already happens w/ the way we structure the lock acquisition code...) + - prepare foreign bits (using foreign request_auth_pins, locks, etc.) + - source unlink, + - anchortable update (if source is anchored), + - dest nlink-- (if dest is remote link on foreign host) + - make sure replicas have either both source+dest pinned in cache (or neither...) + - use foreign request_pins? + - log update + - do update locally + - async commit + unlock +- rejoin will need to explicitly resolve uncommitted items. + - fully implement link/unlink first, and use that as a model? monitor - finish generic paxos diff --git a/branches/sage/cephmds2/doc/mds_locks.txt b/branches/sage/cephmds2/doc/mds_locks.txt index 1a337c3bc3d..783139ab90a 100644 --- a/branches/sage/cephmds2/doc/mds_locks.txt +++ b/branches/sage/cephmds2/doc/mds_locks.txt @@ -2,21 +2,35 @@ path_pin = read lock on /some/random/path - - prevents a dnxlock + - blocks a dentry xlock dnxlock = exclusive lock on /some/random/path - - prevents dn read + - locking: prevents subsequent path pins. + - locked: prevents dn read - on auth - - does auth_pin + - requires auth_pin + +-> grab _all_ path pins at onces; hold none while waiting. +-> grab xlocks in order. auth_pin = pin to authority, on *dir, *in - - prevents freeze - - can hang up export - - thus blocking other auth_pins + - prevents freezing -> frozen. + - freezing blocks new auth pins, thus blocking other local auth_pins. (hangs up local export.) + - does not block remote auth_pins, because remote side is not auth (or frozen!) until after local subtree is frozen. + +-> grab _all_ auth pins at once; hold none while waiting. hard/file_wrlock = exlusive lock on inode content - prevents inode read - on auth - - does auth_pin + - requires auth_pin + +-> grab locks in order. + +ORDERING +- path pins < dn xlocks < inode read locks < inode write locks. +- order dentries on (dirino, dname) +- order inodes on (ino); + diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 911de33ef74..ab2ce34f072 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -3007,9 +3007,8 @@ bool MDCache::request_start(Message *req, assert(active_requests.count(req) == 0); // pin path - if (trace.size()) { + if (!trace.empty()) if (!path_pin(trace, req, new C_MDS_RetryMessage(mds,req))) return false; - } dout(7) << "request_start " << *req << endl; diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index d3c33744c2c..b3d4af0c100 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -1362,8 +1362,7 @@ void Server::handle_client_link_2(int r, MClientRequest *req, CInode *diri, vect // make dentry and inode, xlock dentry. r = prepare_null_dentry(req, diri, &dir, &dn); - if (!r) - return; // wait on something + if (!r) return; // wait or forward or something assert(dir); assert(dn); @@ -1397,7 +1396,7 @@ public: void Server::_link_local(MClientRequest *req, CInode *diri, - CDentry *dn, CInode *targeti) + CDentry *dn, CInode *targeti) { dout(10) << "_link_local " << *dn << " to " << *targeti << endl;