Merge branch 'auth_change_waiters' into unstable

This commit is contained in:
Sage Weil 2010-08-23 20:31:17 -07:00
commit 1d8770be27
11 changed files with 110 additions and 20 deletions

View File

@ -548,3 +548,8 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker)
}
CDir *CDentry::get_containing_subtree()
{
return get_dir()->get_containing_subtree();
}

View File

@ -258,6 +258,8 @@ public:
lru_unpin();
}
CDir *get_containing_subtree();
// auth pins
bool can_auth_pin();
void auth_pin(void *by);

View File

@ -139,7 +139,7 @@ ostream& CDir::print_db_line_prefix(ostream& out)
// CDir
CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) :
item_dirty(this), item_new(this)
item_dirty(this), item_new(this), waiting_on_auth_change(member_offset(MDSCacheObject, item_waiting_on_auth_change))
{
g_num_dir++;
g_num_dira++;
@ -880,7 +880,7 @@ void CDir::add_waiter(uint64_t tag, Context *c)
/* NOTE: this checks dentry waiters too */
/* NOTE: this checks dentry and authchange waiters too */
void CDir::take_waiting(uint64_t mask, list<Context*>& ls)
{
if ((mask & WAIT_DENTRY) && waiting_on_dentry.size()) {
@ -895,6 +895,15 @@ void CDir::take_waiting(uint64_t mask, list<Context*>& ls)
}
put(PIN_DNWAITER);
}
if (mask & MDSCacheObject::WAIT_AUTHCHANGE) {
elist<MDSCacheObject*>::iterator p = waiting_on_auth_change.begin();
while (!p.end()) {
MDSCacheObject *o = *p;
++p;
o->take_waiting(MDSCacheObject::WAIT_AUTHCHANGE, ls); // careful, this removes *o from the elist
}
}
// waiting
MDSCacheObject::take_waiting(mask, ls);
@ -2217,5 +2226,9 @@ void CDir::unfreeze_dir()
CDir *CDir::get_containing_subtree()
{
return cache->get_subtree_root(this);
}

View File

@ -254,6 +254,16 @@ protected:
int num_dentries_auth_subtree_nested;
// extra wait stuff
elist<MDSCacheObject*> waiting_on_auth_change; // only on subtree roots
public:
void add_auth_change_waiter(MDSCacheObject *o) {
waiting_on_auth_change.push_back(&o->item_waiting_on_auth_change);
}
protected:
// friends
friend class Migrator;
friend class CInode;
@ -333,6 +343,8 @@ private:
public:
bool try_trim_snap_dentry(CDentry *dn, const set<snapid_t>& snaps);
CDir *get_containing_subtree();
public:
void split(int bits, list<CDir*>& subs, list<Context*>& waiters, bool replay);

View File

@ -416,6 +416,10 @@ void CInode::put_stickydirs()
CDir *CInode::get_containing_subtree()
{
return get_projected_parent_dn()->get_dir()->get_containing_subtree();
}
// pins

View File

@ -264,6 +264,9 @@ public:
void get_stickydirs();
void put_stickydirs();
CDir *get_containing_subtree();
protected:
// parent dentries in cache
CDentry *parent; // primary link

View File

@ -197,11 +197,6 @@ bool Locker::acquire_locks(MDRequest *mdr,
sorted.insert(*p);
if ((*p)->get_parent()->is_auth())
mustpin.insert(*p);
else if ((*p)->get_type() == CEPH_LOCK_IFILE &&
!(*p)->get_parent()->is_auth() && !(*p)->can_wrlock(client)) { // we might have to request a scatter
dout(15) << " will also auth_pin " << *(*p)->get_parent() << " in case we need to request a scatter" << dendl;
mustpin.insert(*p);
}
}
// rdlocks
@ -212,11 +207,6 @@ bool Locker::acquire_locks(MDRequest *mdr,
sorted.insert(*p);
if ((*p)->get_parent()->is_auth())
mustpin.insert(*p);
else if ((*p)->get_type() == CEPH_LOCK_IFILE &&
!(*p)->get_parent()->is_auth() && !(*p)->can_rdlock(client)) { // we might have to request an rdlock
dout(15) << " will also auth_pin " << *(*p)->get_parent() << " in case we need to request a rdlock" << dendl;
mustpin.insert(*p);
}
}
@ -808,11 +798,18 @@ bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mut, bool as_anon)
}
// wait!
int wait_on;
uint64_t wait_on;
if (lock->get_parent()->is_auth() && lock->is_stable())
wait_on = SimpleLock::WAIT_RD;
else
wait_on = SimpleLock::WAIT_STABLE; // REQRDLOCK is ignored if lock is unstable, so we need to retry.
else {
// REQRDLOCK is ignored if lock is unstable, so we need to retry on stable OR auth change
wait_on = SimpleLock::WAIT_STABLE;
if (!lock->get_parent()->is_auth()) {
wait_on |= MDSCacheObject::WAIT_AUTHCHANGE;
CDir *subtree = lock->get_parent()->get_containing_subtree();
subtree->add_auth_change_waiter(in);
}
}
dout(7) << "rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
lock->add_waiter(wait_on, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);
@ -904,7 +901,6 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait)
} else {
// replica.
// auth should be auth_pinned (see acquire_locks wrlock weird mustpin case).
int auth = lock->get_parent()->authority().first;
dout(10) << "requesting scatter from auth on "
<< *lock << " on " << *lock->get_parent() << dendl;
@ -915,7 +911,13 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait)
if (!nowait) {
dout(7) << "wrlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut));
uint64_t mask = SimpleLock::WAIT_STABLE;
if (!lock->get_parent()->is_auth()) {
mask |= MDSCacheObject::WAIT_AUTHCHANGE;
CDir *subtree = lock->get_parent()->get_containing_subtree();
subtree->add_auth_change_waiter(in);
}
lock->add_waiter(mask, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);
}

View File

@ -752,6 +752,13 @@ void MDCache::try_subtree_merge_at(CDir *dir)
subtrees.erase(dir);
subtrees[parent].erase(dir);
// move auth change waiters
while (!dir->waiting_on_auth_change.empty()) {
MDSCacheObject *o = dir->waiting_on_auth_change.front();
parent->add_auth_change_waiter(o);
dout(10) << " moved auth change waiter " << *o << dendl;
}
// adjust popularity?
if (dir->is_auth()) {
utime_t now = g_clock.now();
@ -870,6 +877,17 @@ void MDCache::adjust_bounded_subtree_auth(CDir *dir, set<CDir*>& bounds, pair<in
}
p = next;
}
// move auth change waiters
elist<MDSCacheObject*>::iterator q = root->waiting_on_auth_change.begin();
while (!q.end()) {
MDSCacheObject *o = *q;
++q;
if (o->get_containing_subtree() == dir) {
dout(20) << " moving auth change waiter " << *o << dendl;
dir->add_auth_change_waiter(o); // careful, this removes *o from root's list
}
}
// i am a bound of the parent subtree.
subtrees[root].insert(dir);
@ -1006,6 +1024,7 @@ void MDCache::remove_subtree(CDir *dir)
assert(subtrees[p].count(dir));
subtrees[p].erase(dir);
}
assert(dir->waiting_on_auth_change.empty());
}
void MDCache::get_subtree_bounds(CDir *dir, set<CDir*>& bounds)

View File

@ -2258,6 +2258,15 @@ void Migrator::handle_export_notify(MExportDirNotify *m)
set<CDir*> have;
cache->map_dirfrag_set(m->get_bounds(), have);
cache->adjust_bounded_subtree_auth(dir, have, new_auth);
if (new_auth.second == CDIR_AUTH_UNKNOWN) {
// wake up any auth change waiters
list<Context*> ls;
dir->take_waiting(MDSCacheObject::WAIT_AUTHCHANGE, ls);
if (!ls.empty())
dout(10) << "handle_export_notify woke up some AUTHCHANGE waiters" << dendl;
mds->queue_waiters(ls);
}
// induce a merge?
cache->try_subtree_merge(dir);

View File

@ -288,7 +288,8 @@ public:
parent->take_waiting(mask << get_wait_shift(), ls);
}
void add_waiter(uint64_t mask, Context *c) {
parent->add_waiter(mask << get_wait_shift(), c);
// preserve WAIT_AUTHCHANGE bit unshifted, if present.
parent->add_waiter((mask << get_wait_shift()) | (mask & MDSCacheObject::WAIT_AUTHCHANGE), c);
}
bool is_waiter_for(uint64_t mask) {
return parent->is_waiter_for(mask << get_wait_shift());

View File

@ -16,6 +16,7 @@ using namespace std;
#include "include/frag.h"
#include "include/xlist.h"
#include "include/elist.h"
#include <boost/pool/pool.hpp>
@ -1665,6 +1666,8 @@ class SimpleLock;
class MDSCacheObject;
class CDir;
// -- authority delegation --
// directory authority types
// >= 0 is the auth mds
@ -1772,6 +1775,7 @@ class MDSCacheObject {
// -- wait --
const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
const static uint64_t WAIT_AUTHCHANGE = (1ull<<58);
// ============================================
@ -1780,7 +1784,8 @@ class MDSCacheObject {
MDSCacheObject() :
state(0),
ref(0),
replica_nonce(0) {}
replica_nonce(0),
waiting_on_auth_change(0) {}
virtual ~MDSCacheObject() {}
// printing
@ -1812,6 +1817,7 @@ class MDSCacheObject {
bool is_ambiguous_auth() {
return authority().second != CDIR_AUTH_UNKNOWN;
}
virtual CDir *get_containing_subtree() = 0;
// --------------------------------------------
// pins
@ -1957,7 +1963,10 @@ protected:
// waiting
protected:
multimap<uint64_t, Context*> waiting;
int waiting_on_auth_change;
elist<MDSCacheObject*>::item item_waiting_on_auth_change;
friend class CDir;
public:
bool is_waiter_for(uint64_t mask, uint64_t min=0) {
if (!min) {
@ -1977,6 +1986,10 @@ protected:
if (waiting.empty())
get(PIN_WAITER);
waiting.insert(pair<uint64_t,Context*>(mask, c));
if (mask & WAIT_AUTHCHANGE)
waiting_on_auth_change++;
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "add_waiter " << hex << mask << dec << " " << c
<< " on " << *this
@ -1989,6 +2002,13 @@ protected:
while (it != waiting.end()) {
if (it->first & mask) {
ls.push_back(it->second);
if (it->first & WAIT_AUTHCHANGE) {
waiting_on_auth_change--;
if (!waiting_on_auth_change)
item_waiting_on_auth_change.remove_myself();
}
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "take_waiting mask " << hex << mask << dec << " took " << it->second
<< " tag " << it->first