mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
* scatterlock. untested.
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1346 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
parent
bd8c9409fd
commit
53abf9474d
@ -42,6 +42,9 @@ mds
|
||||
- discover
|
||||
- open_remote_ino needs major work...
|
||||
|
||||
- scatterlock
|
||||
- unlink, link, rename need to pre_dirty and update dir inode's mtime
|
||||
|
||||
- FIXME how to journal root and stray inode content?
|
||||
- in particular, i care about dirfragtree.. get it on rejoin?
|
||||
- and dir sizes, if i add that... also on rejoin?
|
||||
|
@ -64,6 +64,7 @@ ostream& operator<<(ostream& out, CInode& in)
|
||||
out << " link=" << in.linklock;
|
||||
out << " dft=" << in.dirfragtreelock;
|
||||
out << " file=" << in.filelock;
|
||||
out << " dir=" << in.dirlock;
|
||||
|
||||
if (in.get_num_ref()) {
|
||||
out << " |";
|
||||
@ -332,12 +333,14 @@ void CInode::encode_lock_state(int type, bufferlist& bl)
|
||||
{
|
||||
switch (type) {
|
||||
case LOCK_OTYPE_IAUTH:
|
||||
::_encode(inode.ctime, bl);
|
||||
::_encode(inode.mode, bl);
|
||||
::_encode(inode.uid, bl);
|
||||
::_encode(inode.gid, bl);
|
||||
break;
|
||||
|
||||
case LOCK_OTYPE_ILINK:
|
||||
::_encode(inode.ctime, bl);
|
||||
::_encode(inode.nlink, bl);
|
||||
::_encode(inode.anchored, bl);
|
||||
break;
|
||||
@ -351,6 +354,19 @@ void CInode::encode_lock_state(int type, bufferlist& bl)
|
||||
::_encode(inode.mtime, bl);
|
||||
::_encode(inode.atime, bl);
|
||||
break;
|
||||
|
||||
case LOCK_OTYPE_IDIR:
|
||||
::_encode(inode.mtime, bl);
|
||||
{
|
||||
map<frag_t,int> dfsz;
|
||||
for (map<frag_t,CDir*>::iterator p = dirfrags.begin();
|
||||
p != dirfrags.end();
|
||||
++p)
|
||||
if (p->second->is_auth())
|
||||
dfsz[p->first] = p->second->get_nitems();
|
||||
::_encode(dfsz, bl);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
@ -360,14 +376,20 @@ void CInode::encode_lock_state(int type, bufferlist& bl)
|
||||
void CInode::decode_lock_state(int type, bufferlist& bl)
|
||||
{
|
||||
int off = 0;
|
||||
utime_t tm;
|
||||
|
||||
switch (type) {
|
||||
case LOCK_OTYPE_IAUTH:
|
||||
::_decode(tm, bl, off);
|
||||
if (inode.ctime < tm) inode.ctime = tm;
|
||||
::_decode(inode.mode, bl, off);
|
||||
::_decode(inode.uid, bl, off);
|
||||
::_decode(inode.gid, bl, off);
|
||||
break;
|
||||
|
||||
case LOCK_OTYPE_ILINK:
|
||||
::_decode(tm, bl, off);
|
||||
if (inode.ctime < tm) inode.ctime = tm;
|
||||
::_decode(inode.nlink, bl, off);
|
||||
::_decode(inode.anchored, bl, off);
|
||||
break;
|
||||
@ -382,6 +404,17 @@ void CInode::decode_lock_state(int type, bufferlist& bl)
|
||||
::_decode(inode.atime, bl, off);
|
||||
break;
|
||||
|
||||
case LOCK_OTYPE_IDIR:
|
||||
//::_decode(inode.size, bl, off);
|
||||
::_decode(tm, bl, off);
|
||||
if (inode.mtime < tm) inode.mtime = tm;
|
||||
{
|
||||
map<frag_t,int> dfsz;
|
||||
::_decode(dfsz, bl, off);
|
||||
// hmm which to keep?
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "CDentry.h"
|
||||
#include "SimpleLock.h"
|
||||
#include "FileLock.h"
|
||||
#include "ScatterLock.h"
|
||||
#include "Capability.h"
|
||||
|
||||
|
||||
@ -97,17 +98,16 @@ class CInode : public MDSCacheObject {
|
||||
static const int WAIT_SLAVEAGREE = (1<<0);
|
||||
static const int WAIT_AUTHPINNABLE = (1<<1);
|
||||
//static const int WAIT_SINGLEAUTH = (1<<2);
|
||||
static const int WAIT_DIR = (1<<3);
|
||||
static const int WAIT_LINK = (1<<4); // as in remotely nlink++
|
||||
static const int WAIT_ANCHORED = (1<<5);
|
||||
static const int WAIT_UNANCHORED = (1<<6);
|
||||
static const int WAIT_UNLINK = (1<<7); // as in remotely nlink--
|
||||
static const int WAIT_CAPS = (1<<8);
|
||||
static const int WAIT_DIR = (1<<2);
|
||||
static const int WAIT_ANCHORED = (1<<3);
|
||||
static const int WAIT_UNANCHORED = (1<<4);
|
||||
static const int WAIT_CAPS = (1<<5);
|
||||
|
||||
static const int WAIT_AUTHLOCK_OFFSET = 9;
|
||||
static const int WAIT_LINKLOCK_OFFSET = 9 + SimpleLock::WAIT_BITS;
|
||||
static const int WAIT_DIRFRAGTREELOCK_OFFSET = 9 + 2*SimpleLock::WAIT_BITS;;
|
||||
static const int WAIT_FILELOCK_OFFSET = 9 + 3*SimpleLock::WAIT_BITS;;
|
||||
static const int WAIT_AUTHLOCK_OFFSET = 6;
|
||||
static const int WAIT_LINKLOCK_OFFSET = 6 + SimpleLock::WAIT_BITS;
|
||||
static const int WAIT_DIRFRAGTREELOCK_OFFSET = 6 + 2*SimpleLock::WAIT_BITS;
|
||||
static const int WAIT_FILELOCK_OFFSET = 6 + 3*SimpleLock::WAIT_BITS;
|
||||
static const int WAIT_DIRLOCK_OFFSET = 6 + 4*SimpleLock::WAIT_BITS;
|
||||
|
||||
static const int WAIT_ANY = 0xffffffff;
|
||||
|
||||
@ -123,6 +123,7 @@ class CInode : public MDSCacheObject {
|
||||
inode_t inode; // the inode itself
|
||||
string symlink; // symlink dest, if symlink
|
||||
fragtree_t dirfragtree; // dir frag tree, if any
|
||||
map<frag_t,int> dirfrag_size; // size of each dirfrag
|
||||
|
||||
off_t last_open_journaled; // log offset for the last journaled EOpen
|
||||
|
||||
@ -187,7 +188,8 @@ protected:
|
||||
authlock(this, LOCK_OTYPE_IAUTH, WAIT_AUTHLOCK_OFFSET),
|
||||
linklock(this, LOCK_OTYPE_ILINK, WAIT_LINKLOCK_OFFSET),
|
||||
dirfragtreelock(this, LOCK_OTYPE_IDIRFRAGTREE, WAIT_DIRFRAGTREELOCK_OFFSET),
|
||||
filelock(this, LOCK_OTYPE_IFILE, WAIT_FILELOCK_OFFSET)
|
||||
filelock(this, LOCK_OTYPE_IFILE, WAIT_FILELOCK_OFFSET),
|
||||
dirlock(this, LOCK_OTYPE_IDIR, WAIT_DIRLOCK_OFFSET)
|
||||
{
|
||||
state = 0;
|
||||
if (auth) state_set(STATE_AUTH);
|
||||
@ -253,6 +255,7 @@ public:
|
||||
SimpleLock linklock;
|
||||
SimpleLock dirfragtreelock;
|
||||
FileLock filelock;
|
||||
ScatterLock dirlock;
|
||||
|
||||
SimpleLock* get_lock(int type) {
|
||||
switch (type) {
|
||||
@ -260,6 +263,7 @@ public:
|
||||
case LOCK_OTYPE_IAUTH: return &authlock;
|
||||
case LOCK_OTYPE_ILINK: return &linklock;
|
||||
case LOCK_OTYPE_IDIRFRAGTREE: return &dirfragtreelock;
|
||||
case LOCK_OTYPE_IDIR: return &dirlock;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
@ -359,6 +363,8 @@ public:
|
||||
|
||||
if (get_caps_issued() & (CAP_FILE_WR|CAP_FILE_WRBUFFER) == 0)
|
||||
filelock.replicate_relax();
|
||||
|
||||
dirlock.replicate_relax();
|
||||
}
|
||||
|
||||
|
||||
|
@ -214,8 +214,8 @@ inline ostream& operator<<(ostream& out, FileLock& l)
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_filelock_state_name(l.get_state());
|
||||
if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set();
|
||||
if (l.get_num_rdlock())
|
||||
out << " r=" << l.get_num_rdlock();
|
||||
if (l.is_rdlocked())
|
||||
out << " r=" << l.get_num_rdlocks();
|
||||
if (l.is_xlocked())
|
||||
out << " x=" << l.get_xlocked_by();
|
||||
out << ")";
|
||||
|
@ -116,6 +116,7 @@ void Locker::send_lock_message(SimpleLock *lock, int msg, bufferlist &data)
|
||||
|
||||
bool Locker::acquire_locks(MDRequest *mdr,
|
||||
set<SimpleLock*> &rdlocks,
|
||||
set<SimpleLock*> &wrlocks,
|
||||
set<SimpleLock*> &xlocks)
|
||||
{
|
||||
dout(10) << "acquire_locks " << *mdr << endl;
|
||||
@ -125,13 +126,20 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
|
||||
// (local) AUTH PINS
|
||||
|
||||
// can i auth_pin everything?
|
||||
for (set<SimpleLock*>::iterator p = xlocks.begin();
|
||||
p != xlocks.end();
|
||||
// make list of items to authpin
|
||||
set<SimpleLock*> mustpin = xlocks;
|
||||
for (set<SimpleLock*>::iterator p = wrlocks.begin(); p != wrlocks.end(); ++p)
|
||||
mustpin.insert(*p);
|
||||
|
||||
// can i auth pin them all now?
|
||||
for (set<SimpleLock*>::iterator p = mustpin.begin();
|
||||
p != mustpin.end();
|
||||
++p) {
|
||||
dout(10) << "will xlock " << **p << " " << *(*p)->get_parent() << endl;
|
||||
dout(10) << "must authpin " << **p << " " << *(*p)->get_parent() << endl;
|
||||
|
||||
// sort in
|
||||
sorted.insert(*p);
|
||||
|
||||
if ((*p)->get_type() == LOCK_OTYPE_DN) {
|
||||
CDir *dir = ((CDentry*)(*p)->get_parent())->dir;
|
||||
dout(10) << "might auth_pin " << *dir << endl;
|
||||
@ -159,8 +167,8 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
}
|
||||
|
||||
// ok, grab the auth pins
|
||||
for (set<SimpleLock*>::iterator p = xlocks.begin();
|
||||
p != xlocks.end();
|
||||
for (set<SimpleLock*>::iterator p = mustpin.begin();
|
||||
p != mustpin.end();
|
||||
++p) {
|
||||
if ((*p)->get_type() == LOCK_OTYPE_DN) {
|
||||
CDir *dir = ((CDentry*)(*p)->get_parent())->dir;
|
||||
@ -175,6 +183,7 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
}
|
||||
}
|
||||
|
||||
// sort in rdlocks too
|
||||
for (set<SimpleLock*>::iterator p = rdlocks.begin();
|
||||
p != rdlocks.end();
|
||||
++p) {
|
||||
@ -193,7 +202,9 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
if (existing != mdr->locks.end() && *existing == *p) {
|
||||
// right kind?
|
||||
SimpleLock *had = *existing;
|
||||
if (xlocks.count(*p) == (had->get_xlocked_by() == mdr)) {
|
||||
if (xlocks.count(*p) == mdr->xlocks.count(*p) &&
|
||||
wrlocks.count(*p) == mdr->wrlocks.count(*p) &&
|
||||
rdlocks.count(*p) == mdr->rdlocks.count(*p)) {
|
||||
dout(10) << "acquire_locks already locked " << *had << " " << *had->get_parent() << endl;
|
||||
existing++;
|
||||
continue;
|
||||
@ -206,19 +217,25 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
existing++;
|
||||
dout(10) << "acquire_locks unlocking out-of-order " << **existing
|
||||
<< " " << *(*existing)->get_parent() << endl;
|
||||
if (had->get_xlocked_by() == mdr)
|
||||
if (mdr->xlocks.count(had))
|
||||
xlock_finish(had, mdr);
|
||||
else if (mdr->wrlocks.count(had))
|
||||
wrlock_finish(had, mdr);
|
||||
else
|
||||
rdlock_finish(had, mdr);
|
||||
}
|
||||
|
||||
// lock
|
||||
if (xlocks.count(*p)) {
|
||||
if (!xlock_start(*p, mdr))
|
||||
if (!xlock_start(*p, mdr))
|
||||
return false;
|
||||
dout(10) << "acquire_locks got xlock on " << **p << " " << *(*p)->get_parent() << endl;
|
||||
} else if (wrlocks.count(*p)) {
|
||||
if (!wrlock_start(*p, mdr))
|
||||
return false;
|
||||
dout(10) << "acquire_locks got wrlock on " << **p << " " << *(*p)->get_parent() << endl;
|
||||
} else {
|
||||
if (!rdlock_start(*p, mdr))
|
||||
if (!rdlock_start(*p, mdr))
|
||||
return false;
|
||||
dout(10) << "acquire_locks got rdlock on " << **p << " " << *(*p)->get_parent() << endl;
|
||||
}
|
||||
@ -228,8 +245,10 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
while (existing != mdr->locks.end()) {
|
||||
dout(10) << "acquire_locks unlocking " << *existing
|
||||
<< " " << *(*existing)->get_parent() << endl;
|
||||
if ((*existing)->get_xlocked_by() == mdr)
|
||||
if (mdr->xlocks.count(*existing))
|
||||
xlock_finish(*existing, mdr);
|
||||
else if (mdr->wrlocks.count(*existing))
|
||||
wrlock_finish(*existing, mdr);
|
||||
else
|
||||
rdlock_finish(*existing, mdr);
|
||||
}
|
||||
@ -241,23 +260,13 @@ bool Locker::acquire_locks(MDRequest *mdr,
|
||||
|
||||
// generics
|
||||
|
||||
/*
|
||||
bool Locker::rdlock_try(SimpleLock *lock, Context *con)
|
||||
{
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IFILE:
|
||||
return file_rdlock_try((FileLock*)lock, con);
|
||||
default:
|
||||
return simple_rdlock_try(lock, con);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
{
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IFILE:
|
||||
return file_rdlock_start((FileLock*)lock, mdr);
|
||||
case LOCK_OTYPE_IDIR:
|
||||
return scatter_rdlock_start((ScatterLock*)lock, mdr);
|
||||
default:
|
||||
return simple_rdlock_start(lock, mdr);
|
||||
}
|
||||
@ -268,16 +277,40 @@ void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IFILE:
|
||||
return file_rdlock_finish((FileLock*)lock, mdr);
|
||||
case LOCK_OTYPE_IDIR:
|
||||
return scatter_rdlock_finish((ScatterLock*)lock, mdr);
|
||||
default:
|
||||
return simple_rdlock_finish(lock, mdr);
|
||||
}
|
||||
}
|
||||
|
||||
bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
{
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IDIR:
|
||||
return scatter_wrlock_start((ScatterLock*)lock, mdr);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
{
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IDIR:
|
||||
return scatter_wrlock_finish((ScatterLock*)lock, mdr);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
{
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IFILE:
|
||||
return file_xlock_start((FileLock*)lock, mdr);
|
||||
case LOCK_OTYPE_IDIR:
|
||||
assert(0);
|
||||
default:
|
||||
return simple_xlock_start(lock, mdr);
|
||||
}
|
||||
@ -288,6 +321,8 @@ void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
switch (lock->get_type()) {
|
||||
case LOCK_OTYPE_IFILE:
|
||||
return file_xlock_finish((FileLock*)lock, mdr);
|
||||
case LOCK_OTYPE_IDIR:
|
||||
assert(0);
|
||||
default:
|
||||
return simple_xlock_finish(lock, mdr);
|
||||
}
|
||||
@ -712,6 +747,18 @@ void Locker::handle_lock(MLock *m)
|
||||
}
|
||||
break;
|
||||
|
||||
case LOCK_OTYPE_IDIR:
|
||||
{
|
||||
CInode *in = mdcache->get_inode(m->get_ino());
|
||||
if (!in) {
|
||||
dout(7) << "dont' have ino " << m->get_ino() << endl;
|
||||
delete m;
|
||||
return;
|
||||
}
|
||||
handle_scatter_lock(&in->dirlock, m);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
dout(7) << "handle_lock got otype " << m->get_otype() << endl;
|
||||
assert(0);
|
||||
@ -721,6 +768,11 @@ void Locker::handle_lock(MLock *m)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ==========================================================================
|
||||
// simple lock
|
||||
|
||||
void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
{
|
||||
int from = m->get_asker();
|
||||
@ -739,11 +791,11 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
//|| lock->get_state() == LOCK_GLOCKR);
|
||||
|
||||
// wait for readers to finish?
|
||||
if (lock->get_num_rdlock() > 0) {
|
||||
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock << " on " << *lock->get_parent()
|
||||
<< endl;
|
||||
if (lock->is_rdlocked()) {
|
||||
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->set_state(LOCK_GLOCKR);
|
||||
lock->add_waiter(SimpleLock::WAIT_NORD, new C_MDS_RetryMessage(mds, m));
|
||||
lock->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -836,7 +888,7 @@ void Locker::simple_eval(SimpleLock *lock)
|
||||
switch (lock->get_state()) {
|
||||
case LOCK_GLOCKR:
|
||||
lock->set_state(LOCK_LOCK);
|
||||
lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -851,7 +903,8 @@ void Locker::simple_eval(SimpleLock *lock)
|
||||
if (lock->get_state() != LOCK_SYNC &&
|
||||
lock->get_parent()->is_replicated() &&
|
||||
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
||||
dout(7) << "simple_eval stable, syncing " << *lock << " on " << *lock->get_parent() << endl;
|
||||
dout(7) << "simple_eval stable, syncing " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
simple_sync(lock);
|
||||
}
|
||||
|
||||
@ -965,9 +1018,9 @@ void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
if (lock->get_state() == LOCK_GLOCKR &&
|
||||
lock->get_num_rdlock() == 0) {
|
||||
!lock->is_rdlocked()) {
|
||||
lock->set_state(LOCK_SYNC); // return state to sync, in case the unpinner flails
|
||||
lock->finish_waiters(SimpleLock::WAIT_NORD);
|
||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1003,7 +1056,7 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
return true;
|
||||
} else {
|
||||
// wait for lock
|
||||
lock->add_waiter(SimpleLock::WAIT_LOCK, new C_MDS_RetryRequest(mdcache, mdr));
|
||||
lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr));
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@ -1118,9 +1171,264 @@ void Locker::dentry_anon_rdlock_trace_finish(vector<CDentry*>& trace)
|
||||
|
||||
|
||||
|
||||
// ==========================================================================
|
||||
// scatter lock
|
||||
|
||||
bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr)
|
||||
{
|
||||
dout(7) << "scatter_rdlock_start on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
|
||||
// pre-twiddle?
|
||||
if (lock->get_state() == LOCK_SCATTER &&
|
||||
lock->get_parent()->is_auth() &&
|
||||
!lock->get_parent()->is_replicated() &&
|
||||
!lock->is_wrlocked())
|
||||
scatter_sync(lock);
|
||||
|
||||
// can rdlock?
|
||||
if (lock->can_rdlock(mdr)) {
|
||||
lock->get_rdlock();
|
||||
mdr->rdlocks.insert(lock);
|
||||
mdr->locks.insert(lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// wait for read.
|
||||
lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr));
|
||||
|
||||
// initiate sync?
|
||||
if (lock->get_state() == LOCK_SCATTER &&
|
||||
lock->get_parent()->is_auth())
|
||||
scatter_sync(lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Locker::scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr)
|
||||
{
|
||||
dout(7) << "scatter_rdlock_finish on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->put_rdlock();
|
||||
if (mdr) {
|
||||
mdr->rdlocks.erase(lock);
|
||||
mdr->locks.erase(lock);
|
||||
}
|
||||
|
||||
scatter_eval(lock);
|
||||
}
|
||||
|
||||
|
||||
// ===============================
|
||||
bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr)
|
||||
{
|
||||
dout(7) << "scatter_wrlock_start on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
|
||||
// pre-twiddle?
|
||||
if (lock->get_state() == LOCK_SYNC &&
|
||||
lock->get_parent()->is_auth() &&
|
||||
!lock->get_parent()->is_replicated() &&
|
||||
!lock->is_rdlocked())
|
||||
scatter_scatter(lock);
|
||||
|
||||
// can wrlock?
|
||||
if (lock->can_wrlock()) {
|
||||
lock->get_wrlock();
|
||||
mdr->wrlocks.insert(lock);
|
||||
mdr->locks.insert(lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// wait for write.
|
||||
lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mdr));
|
||||
|
||||
// initiate scatter?
|
||||
if (lock->get_state() == LOCK_SYNC &&
|
||||
lock->get_parent()->is_auth())
|
||||
scatter_scatter(lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Locker::scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr)
|
||||
{
|
||||
dout(7) << "scatter_wrlock_finish on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->put_wrlock();
|
||||
if (mdr) {
|
||||
mdr->wrlocks.erase(lock);
|
||||
mdr->locks.erase(lock);
|
||||
}
|
||||
|
||||
scatter_eval(lock);
|
||||
}
|
||||
|
||||
void Locker::scatter_eval(ScatterLock *lock)
|
||||
{
|
||||
if (!lock->get_parent()->is_auth()) {
|
||||
// REPLICA
|
||||
|
||||
if (lock->get_state() == LOCK_GSYNCS &&
|
||||
!lock->is_wrlocked()) {
|
||||
dout(10) << "scatter_eval no wrlocks, acking sync" << endl;
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
||||
lock->set_state(LOCK_SYNC);
|
||||
}
|
||||
|
||||
} else {
|
||||
// AUTH
|
||||
|
||||
// gsyncs -> sync?
|
||||
if (lock->get_state() == LOCK_GSYNCS &&
|
||||
!lock->is_gathering() &&
|
||||
!lock->is_wrlocked()) {
|
||||
dout(7) << "scatter_eval finished gather/un-wrlock on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->set_state(LOCK_SYNC);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_RD|SimpleLock::WAIT_NOLOCKS);
|
||||
}
|
||||
|
||||
// gscatters -> scatter?
|
||||
if (lock->get_state() == LOCK_GSCATTERS &&
|
||||
!lock->is_rdlocked()) {
|
||||
assert(lock->get_parent()->is_auth());
|
||||
if (lock->get_parent()->is_replicated()) {
|
||||
// encode and bcast
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
send_lock_message(lock, LOCK_AC_SCATTER, data);
|
||||
}
|
||||
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
}
|
||||
|
||||
// waiting for rd?
|
||||
if (lock->get_state() == LOCK_SCATTER &&
|
||||
!lock->is_wrlocked() &&
|
||||
lock->is_waiter_for(SimpleLock::WAIT_RD)) {
|
||||
dout(10) << "scatter_eval no wrlocks, read waiter, syncing" << endl;
|
||||
scatter_sync(lock);
|
||||
}
|
||||
|
||||
// re-scatter?
|
||||
if (lock->get_state() == LOCK_SYNC &&
|
||||
!lock->is_rdlocked()) {
|
||||
dout(10) << "scatter_eval no rdlocks, scattering" << endl;
|
||||
scatter_scatter(lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Locker::scatter_sync(ScatterLock *lock)
|
||||
{
|
||||
dout(10) << "scatter_sync " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
assert(lock->get_parent()->is_auth());
|
||||
|
||||
if (lock->get_state() == LOCK_SYNC) return;
|
||||
assert(lock->get_state() == LOCK_SCATTER);
|
||||
|
||||
// bcast
|
||||
if (lock->get_parent()->is_replicated()) {
|
||||
send_lock_message(lock, LOCK_AC_SYNC);
|
||||
lock->set_state(LOCK_GSYNCS);
|
||||
lock->init_gather();
|
||||
}
|
||||
else if (lock->is_wrlocked()) {
|
||||
lock->set_state(LOCK_GSYNCS);
|
||||
} else {
|
||||
lock->set_state(LOCK_SYNC);
|
||||
lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Locker::scatter_scatter(ScatterLock *lock)
|
||||
{
|
||||
dout(10) << "scatter_scatter " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
assert(lock->get_parent()->is_auth());
|
||||
|
||||
if (lock->get_state() == LOCK_SCATTER) return;
|
||||
assert(lock->get_state() == LOCK_SYNC);
|
||||
|
||||
if (lock->is_rdlocked()) {
|
||||
lock->set_state(LOCK_GSCATTERS);
|
||||
} else {
|
||||
if (lock->get_parent()->is_replicated()) {
|
||||
// encode and bcast
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
send_lock_message(lock, LOCK_AC_SCATTER, data);
|
||||
}
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
||||
{
|
||||
int from = m->get_asker();
|
||||
|
||||
switch (m->get_action()) {
|
||||
// -- replica --
|
||||
case LOCK_AC_SYNC:
|
||||
assert(lock->get_state() == LOCK_SCATTER);
|
||||
|
||||
// wait for wrlocks to close?
|
||||
if (lock->is_wrlocked()) {
|
||||
dout(7) << "handle_scatter_lock has wrlocks, waiting on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->set_state(LOCK_GSYNCS);
|
||||
} else {
|
||||
// encode and reply
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
||||
from, MDS_PORT_LOCKER);
|
||||
}
|
||||
break;
|
||||
|
||||
case LOCK_AC_SCATTER:
|
||||
assert(lock->get_state() == LOCK_SYNC);
|
||||
lock->decode_locked_state(m->get_data());
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
break;
|
||||
|
||||
// -- for auth --
|
||||
case LOCK_AC_SYNCACK:
|
||||
assert(lock->get_state() == LOCK_GSYNCS);
|
||||
assert(lock->is_gathering(from));
|
||||
lock->remove_gather(from);
|
||||
lock->decode_locked_state(m->get_data());
|
||||
|
||||
if (lock->is_gathering()) {
|
||||
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
|
||||
<< " from " << from << ", still gathering " << lock->get_gather_set()
|
||||
<< endl;
|
||||
} else {
|
||||
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
|
||||
<< " from " << from << ", last one"
|
||||
<< endl;
|
||||
simple_eval(lock);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
delete m;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==========================================================================
|
||||
// file lock
|
||||
|
||||
|
||||
@ -1154,7 +1462,7 @@ bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr)
|
||||
mdr->rdlocks.insert(lock);
|
||||
mdr->locks.insert(lock);
|
||||
|
||||
lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
@ -1202,8 +1510,8 @@ void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr)
|
||||
|
||||
dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
if (lock->get_num_rdlock() == 0) {
|
||||
lock->finish_waiters(SimpleLock::WAIT_NORD);
|
||||
if (!lock->is_rdlocked()) {
|
||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
||||
file_eval(lock);
|
||||
}
|
||||
}
|
||||
@ -1262,7 +1570,7 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
|
||||
dout(7) << "file_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
// drop lock?
|
||||
if (!lock->is_waiter_for(SimpleLock::WAIT_LOCK))
|
||||
if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
|
||||
file_eval(lock);
|
||||
}
|
||||
|
||||
@ -1297,7 +1605,7 @@ void Locker::file_eval(FileLock *lock)
|
||||
|
||||
// waiters
|
||||
lock->get_rdlock();
|
||||
lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
||||
lock->put_rdlock();
|
||||
}
|
||||
break;
|
||||
@ -1415,7 +1723,7 @@ void Locker::file_eval(FileLock *lock)
|
||||
<< endl;
|
||||
|
||||
// * -> loner?
|
||||
if (lock->get_num_rdlock() == 0 &&
|
||||
if (!lock->is_rdlocked() &&
|
||||
!lock->is_waiter_for(SimpleLock::WAIT_WR) &&
|
||||
(wanted & CAP_FILE_WR) &&
|
||||
loner &&
|
||||
@ -1425,7 +1733,7 @@ void Locker::file_eval(FileLock *lock)
|
||||
}
|
||||
|
||||
// * -> mixed?
|
||||
else if (lock->get_num_rdlock() == 0 &&
|
||||
else if (!lock->is_rdlocked() &&
|
||||
!lock->is_waiter_for(SimpleLock::WAIT_WR) &&
|
||||
(wanted & CAP_FILE_RD) &&
|
||||
(wanted & CAP_FILE_WR) &&
|
||||
@ -1796,9 +2104,9 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
|
||||
dout(7) << "handle_file_lock client readers, gathering caps on " << *in << endl;
|
||||
issue_caps(in);
|
||||
}
|
||||
if (lock->get_num_rdlock() > 0) {
|
||||
dout(7) << "handle_file_lock readers, waiting before ack on " << *in << endl;
|
||||
in->add_waiter(SimpleLock::WAIT_NORD, new C_MDS_RetryMessage(mds, m));
|
||||
if (lock->is_rdlocked()) {
|
||||
dout(7) << "handle_file_lock rdlocked, waiting before ack on " << *in << endl;
|
||||
in->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
||||
lock->set_state(LOCK_GLOCKR);
|
||||
assert(0);// i am broken.. why retry message when state captures all the info i need?
|
||||
return;
|
||||
|
@ -45,6 +45,7 @@ class Capability;
|
||||
|
||||
class SimpleLock;
|
||||
class FileLock;
|
||||
class ScatterLock;
|
||||
|
||||
class Locker {
|
||||
private:
|
||||
@ -63,13 +64,15 @@ private:
|
||||
// -- locks --
|
||||
bool acquire_locks(MDRequest *mdr,
|
||||
set<SimpleLock*> &rdlocks,
|
||||
set<SimpleLock*> &wrlocks,
|
||||
set<SimpleLock*> &xlocks);
|
||||
|
||||
bool rdlock_try(SimpleLock *lock, Context *con);
|
||||
bool rdlock_start(SimpleLock *lock, MDRequest *mdr);
|
||||
void rdlock_finish(SimpleLock *lock, MDRequest *mdr);
|
||||
bool xlock_start(SimpleLock *lock, MDRequest *mdr);
|
||||
void xlock_finish(SimpleLock *lock, MDRequest *mdr);
|
||||
bool wrlock_start(SimpleLock *lock, MDRequest *mdr);
|
||||
void wrlock_finish(SimpleLock *lock, MDRequest *mdr);
|
||||
|
||||
// simple
|
||||
void handle_simple_lock(SimpleLock *lock, MLock *m);
|
||||
@ -86,6 +89,16 @@ private:
|
||||
void dentry_anon_rdlock_trace_start(vector<CDentry*>& trace);
|
||||
void dentry_anon_rdlock_trace_finish(vector<CDentry*>& trace);
|
||||
|
||||
// scatter
|
||||
void handle_scatter_lock(ScatterLock *lock, MLock *m);
|
||||
void scatter_eval(ScatterLock *lock);
|
||||
void scatter_sync(ScatterLock *lock);
|
||||
void scatter_scatter(ScatterLock *lock);
|
||||
bool scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr);
|
||||
void scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr);
|
||||
bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr);
|
||||
void scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr);
|
||||
|
||||
// file
|
||||
void handle_file_lock(FileLock *lock, MLock *m);
|
||||
void file_eval(FileLock *lock);
|
||||
|
@ -3348,27 +3348,8 @@ void MDCache::request_drop_locks(MDRequest *mdr)
|
||||
mds->locker->xlock_finish(*mdr->xlocks.begin(), mdr);
|
||||
while (!mdr->rdlocks.empty())
|
||||
mds->locker->rdlock_finish(*mdr->rdlocks.begin(), mdr);
|
||||
|
||||
/*
|
||||
// foreign xlocks?
|
||||
if (active_requests[req].foreign_xlocks.size()) {
|
||||
set<CDentry*> dns = active_requests[req].foreign_xlocks;
|
||||
active_requests[req].foreign_xlocks.clear();
|
||||
|
||||
for (set<CDentry*>::iterator it = dns.begin();
|
||||
it != dns.end();
|
||||
it++) {
|
||||
CDentry *dn = *it;
|
||||
|
||||
dout(7) << "request_cleanup sending unxlock for foreign xlock on " << *dn << endl;
|
||||
assert(dn->is_xlocked());
|
||||
int dauth = dn->dir->dentry_authority(dn->name).first;
|
||||
MLock *m = new MLock(LOCK_AC_UNXLOCK, mds->get_nodeid());
|
||||
m->set_dn(dn->dir->dirfrag(), dn->name);
|
||||
mds->send_message_mds(m, dauth, MDS_PORT_CACHE);
|
||||
}
|
||||
}
|
||||
*/
|
||||
while (!mdr->wrlocks.empty())
|
||||
mds->locker->wrlock_finish(*mdr->wrlocks.begin(), mdr);
|
||||
|
||||
// make sure ref and trace are empty
|
||||
// if we are doing our own locking, we can't use them!
|
||||
|
@ -79,7 +79,8 @@ struct MDRequest {
|
||||
|
||||
// held locks
|
||||
set< SimpleLock* > rdlocks; // always local.
|
||||
set< SimpleLock* > xlocks; // may be remote.
|
||||
set< SimpleLock* > wrlocks; // always local.
|
||||
set< SimpleLock* > xlocks; // local or remote.
|
||||
set< SimpleLock*, SimpleLock::ptr_lt > locks; // full ordering
|
||||
|
||||
// projected updates
|
||||
|
102
branches/sage/cephmds2/mds/ScatterLock.h
Normal file
102
branches/sage/cephmds2/mds/ScatterLock.h
Normal file
@ -0,0 +1,102 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __SCATTERLOCK_H
|
||||
#define __SCATTERLOCK_H
|
||||
|
||||
#include "SimpleLock.h"
|
||||
|
||||
|
||||
// lock state machine states.
|
||||
#define LOCK_SYNC__ // rdlocks allowed (e.g., for stat)
|
||||
#define LOCK_GSYNCS -20 // waiting for replicas to gather
|
||||
#define LOCK_SCATTER 21 // mtime updates on replicas allowed, no reads.
|
||||
#define LOCK_GSCATTERS 22 // waiting for rdlocks to release
|
||||
|
||||
inline const char *get_scatterlock_state_name(int s) {
|
||||
switch(s) {
|
||||
case LOCK_SYNC: return "sync";
|
||||
case LOCK_GSYNCS: return "gsyncs";
|
||||
case LOCK_SCATTER: return "scatter";
|
||||
case LOCK_GSCATTERS: return "gscatters";
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
class ScatterLock : public SimpleLock {
|
||||
int num_wrlock;
|
||||
|
||||
public:
|
||||
ScatterLock(MDSCacheObject *o, int t, int wo) : SimpleLock(o, t, wo) {}
|
||||
|
||||
char get_replica_state() {
|
||||
switch (state) {
|
||||
case LOCK_SYNC:
|
||||
case LOCK_GSYNCS:
|
||||
case LOCK_GSCATTERS:
|
||||
return LOCK_SYNC;
|
||||
case LOCK_SCATTER:
|
||||
return LOCK_SCATTER;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
void replicate_relax() {
|
||||
if (state == LOCK_SYNC && !is_rdlocked())
|
||||
state = LOCK_SCATTER;
|
||||
}
|
||||
|
||||
// rdlock
|
||||
bool can_rdlock(MDRequest *mdr) {
|
||||
return state == LOCK_SYNC;
|
||||
}
|
||||
bool can_rdlock_soon() {
|
||||
return state == LOCK_SYNC || state == LOCK_GSYNCS;
|
||||
}
|
||||
|
||||
// wrlock
|
||||
bool can_wrlock() {
|
||||
return state == LOCK_SCATTER;
|
||||
}
|
||||
void get_wrlock() {
|
||||
assert(state == LOCK_SCATTER);
|
||||
++num_wrlock;
|
||||
}
|
||||
void put_wrlock() {
|
||||
--num_wrlock;
|
||||
}
|
||||
bool is_wrlocked() { return num_wrlock > 0; }
|
||||
int get_num_wrlocks() { return num_wrlock; }
|
||||
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, ScatterLock& l)
|
||||
{
|
||||
out << "(";
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_scatterlock_state_name(l.get_state());
|
||||
if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set();
|
||||
if (l.is_rdlocked())
|
||||
out << " r=" << l.get_num_rdlocks();
|
||||
//if (l.is_xlocked())
|
||||
//out << " x=" << l.get_xlocked_by();
|
||||
if (l.is_wrlocked())
|
||||
out << " wr=" << l.get_num_wrlocks();
|
||||
out << ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -721,13 +721,12 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth)
|
||||
}
|
||||
|
||||
// lock the path
|
||||
set<SimpleLock*> rdlocks;
|
||||
set<SimpleLock*> xlocks;
|
||||
set<SimpleLock*> rdlocks, empty;
|
||||
|
||||
for (unsigned i=0; i<trace.size(); i++)
|
||||
rdlocks.insert(&trace[i]->lock);
|
||||
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks))
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, empty, empty))
|
||||
return 0;
|
||||
|
||||
// set and pin ref
|
||||
@ -796,17 +795,17 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mus
|
||||
}
|
||||
|
||||
// -- lock --
|
||||
set<SimpleLock*> rdlocks;
|
||||
set<SimpleLock*> xlocks;
|
||||
set<SimpleLock*> rdlocks, wrlocks, xlocks;
|
||||
|
||||
for (unsigned i=0; i<trace.size(); i++)
|
||||
rdlocks.insert(&trace[i]->lock);
|
||||
if (dn->is_null())
|
||||
xlocks.insert(&dn->lock); // new dn, xlock
|
||||
else
|
||||
if (dn->is_null()) {
|
||||
xlocks.insert(&dn->lock); // new dn, xlock
|
||||
wrlocks.insert(&dn->dir->inode->dirlock); // also, wrlock on dir mtime
|
||||
} else
|
||||
rdlocks.insert(&dn->lock); // existing dn, rdlock
|
||||
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks))
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
|
||||
return 0;
|
||||
|
||||
// save the locked trace.
|
||||
@ -900,23 +899,24 @@ void Server::handle_client_stat(MDRequest *mdr)
|
||||
CInode *ref = rdlock_path_pin_ref(mdr, false);
|
||||
if (!ref) return;
|
||||
|
||||
// FIXME: this is really not the way to handle the statlite mask.
|
||||
|
||||
// do I need file info?
|
||||
// which inode locks do I want?
|
||||
/* note: this works because we include existing locks in our lists,
|
||||
and because all new locks are on inodes and sort to the right of
|
||||
the dentry rdlocks previous acquired by rdlock_path_pin_ref(). */
|
||||
set<SimpleLock*> rdlocks = mdr->rdlocks;
|
||||
set<SimpleLock*> wrlocks = mdr->wrlocks;
|
||||
set<SimpleLock*> xlocks = mdr->xlocks;
|
||||
|
||||
int mask = req->args.stat.mask;
|
||||
if (mask & (INODE_MASK_SIZE|INODE_MASK_MTIME)) {
|
||||
// yes. do a full stat.
|
||||
if (!mds->locker->rdlock_start(&ref->filelock, mdr))
|
||||
return; // syncing
|
||||
mds->locker->rdlock_finish(&ref->filelock, mdr);
|
||||
} else {
|
||||
// nope! easy peasy.
|
||||
}
|
||||
|
||||
mds->balancer->hit_inode(ref, META_POP_IRD);
|
||||
|
||||
if (mask & INODE_MASK_LINK) rdlocks.insert(&ref->linklock);
|
||||
if (mask & INODE_MASK_AUTH) rdlocks.insert(&ref->authlock);
|
||||
if (ref->is_file() && mask & INODE_MASK_FILE) rdlocks.insert(&ref->filelock);
|
||||
if (ref->is_dir() && mask & INODE_MASK_MTIME) rdlocks.insert(&ref->dirlock);
|
||||
|
||||
mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks);
|
||||
|
||||
// reply
|
||||
//dout(10) << "reply to " << *req << " stat " << ref->inode.mtime << endl;
|
||||
dout(10) << "reply to stat on " << *req << endl;
|
||||
MClientReply *reply = new MClientReply(req);
|
||||
reply_request(mdr, reply, ref);
|
||||
}
|
||||
@ -1451,17 +1451,17 @@ void Server::handle_client_link(MDRequest *mdr)
|
||||
if (!dn) return;
|
||||
|
||||
// create lock lists
|
||||
set<SimpleLock*> rdlocks;
|
||||
set<SimpleLock*> xlocks;
|
||||
set<SimpleLock*> rdlocks, wrlocks, xlocks;
|
||||
|
||||
for (unsigned i=0; i<linktrace.size(); i++)
|
||||
rdlocks.insert(&linktrace[i]->lock);
|
||||
xlocks.insert(&dn->lock);
|
||||
wrlocks.insert(&dn->dir->inode->dirlock);
|
||||
for (unsigned i=0; i<targettrace.size(); i++)
|
||||
rdlocks.insert(&targettrace[i]->lock);
|
||||
xlocks.insert(&targeti->linklock);
|
||||
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks))
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
|
||||
return;
|
||||
|
||||
// go!
|
||||
@ -1699,15 +1699,15 @@ void Server::handle_client_unlink(MDRequest *mdr)
|
||||
}
|
||||
|
||||
// lock
|
||||
set<SimpleLock*> rdlocks;
|
||||
set<SimpleLock*> xlocks;
|
||||
set<SimpleLock*> rdlocks, wrlocks, xlocks;
|
||||
|
||||
for (unsigned i=0; i<trace.size()-1; i++)
|
||||
rdlocks.insert(&trace[i]->lock);
|
||||
xlocks.insert(&dn->lock);
|
||||
wrlocks.insert(&dn->dir->inode->dirlock);
|
||||
xlocks.insert(&in->linklock);
|
||||
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks))
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
|
||||
return;
|
||||
|
||||
// ok!
|
||||
@ -2087,23 +2087,24 @@ void Server::handle_client_rename(MDRequest *mdr)
|
||||
|
||||
|
||||
// -- locks --
|
||||
set<SimpleLock*> rdlocks;
|
||||
set<SimpleLock*> xlocks;
|
||||
set<SimpleLock*> rdlocks, wrlocks, xlocks;
|
||||
|
||||
// rdlock sourcedir path, xlock src dentry
|
||||
for (unsigned i=0; i<srctrace.size()-1; i++)
|
||||
rdlocks.insert(&srctrace[i]->lock);
|
||||
xlocks.insert(&srcdn->lock);
|
||||
wrlocks.insert(&srcdn->dir->inode->dirlock);
|
||||
|
||||
// rdlock destdir path, xlock dest dentry
|
||||
for (unsigned i=0; i<desttrace.size(); i++)
|
||||
rdlocks.insert(&desttrace[i]->lock);
|
||||
xlocks.insert(&destdn->lock);
|
||||
wrlocks.insert(&destdn->dir->inode->dirlock);
|
||||
|
||||
// xlock oldin
|
||||
if (oldin) xlocks.insert(&oldin->linklock);
|
||||
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks))
|
||||
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
|
||||
return;
|
||||
|
||||
|
||||
|
@ -23,8 +23,9 @@
|
||||
#define LOCK_OTYPE_IAUTH 3
|
||||
#define LOCK_OTYPE_ILINK 4
|
||||
#define LOCK_OTYPE_IDIRFRAGTREE 5
|
||||
#define LOCK_OTYPE_IDIR 6
|
||||
|
||||
#define LOCK_OTYPE_DIR 7 // not used
|
||||
//#define LOCK_OTYPE_DIR 7 // not used
|
||||
|
||||
inline const char *get_lock_type_name(int t) {
|
||||
switch (t) {
|
||||
@ -33,6 +34,7 @@ inline const char *get_lock_type_name(int t) {
|
||||
case LOCK_OTYPE_IAUTH: return "inode_auth";
|
||||
case LOCK_OTYPE_ILINK: return "inode_link";
|
||||
case LOCK_OTYPE_IDIRFRAGTREE: return "inode_dirfragtree";
|
||||
case LOCK_OTYPE_IDIR: return "inode_dir";
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
@ -59,12 +61,12 @@ class MDRequest;
|
||||
class SimpleLock {
|
||||
public:
|
||||
static const int WAIT_RD = (1<<0); // to read
|
||||
static const int WAIT_NORD = (1<<1); // for last rdlock to finish
|
||||
static const int WAIT_WR = (1<<2); // to write
|
||||
static const int WAIT_LOCK = (1<<3); // for locked state
|
||||
static const int WAIT_STABLE = (1<<4); // for a stable state
|
||||
static const int WAIT_REMOTEXLOCK = (1<<5); // for a remote xlock
|
||||
static const int WAIT_BITS = 6;
|
||||
static const int WAIT_WR = (1<<1); // to write
|
||||
static const int WAIT_NOLOCKS = (1<<2); // for last rdlock to finish
|
||||
//static const int WAIT_LOCK = (1<<3); // for locked state
|
||||
static const int WAIT_STABLE = (1<<3); // for a stable state
|
||||
static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock
|
||||
static const int WAIT_BITS = 5;
|
||||
|
||||
protected:
|
||||
// parent (what i lock)
|
||||
@ -154,7 +156,7 @@ public:
|
||||
assert(num_rdlock>0);
|
||||
return --num_rdlock;
|
||||
}
|
||||
int get_num_rdlock() { return num_rdlock; }
|
||||
int get_num_rdlocks() { return num_rdlock; }
|
||||
|
||||
void get_xlock(MDRequest *who) {
|
||||
assert(xlock_by == 0);
|
||||
@ -248,8 +250,8 @@ inline ostream& operator<<(ostream& out, SimpleLock& l)
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_simplelock_state_name(l.get_state());
|
||||
if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set();
|
||||
if (l.get_num_rdlock())
|
||||
out << " r=" << l.get_num_rdlock();
|
||||
if (l.is_rdlocked())
|
||||
out << " r=" << l.get_num_rdlocks();
|
||||
if (l.is_xlocked())
|
||||
out << " w=" << l.get_xlocked_by();
|
||||
out << ")";
|
||||
|
@ -26,8 +26,7 @@
|
||||
#define LOCK_AC_REQXLOCKACK -4 // req dentry xlock
|
||||
#define LOCK_AC_REQXLOCKNAK -5 // req dentry xlock
|
||||
|
||||
#define LOCK_AC_FOR_REPLICA(a) ((a) < 0)
|
||||
#define LOCK_AC_FOR_AUTH(a) ((a) > 0)
|
||||
#define LOCK_AC_SCATTER -6
|
||||
|
||||
// for auth
|
||||
#define LOCK_AC_SYNCACK 1
|
||||
@ -42,6 +41,9 @@
|
||||
#define LOCK_AC_FINISH 8
|
||||
|
||||
|
||||
#define LOCK_AC_FOR_REPLICA(a) ((a) < 0)
|
||||
#define LOCK_AC_FOR_AUTH(a) ((a) > 0)
|
||||
|
||||
|
||||
class MLock : public Message {
|
||||
int asker; // who is initiating this request
|
||||
@ -79,6 +81,14 @@ class MLock : public Message {
|
||||
this->action = action;
|
||||
this->asker = asker;
|
||||
}
|
||||
MLock(SimpleLock *lock, int action, int asker, bufferlist& bl) :
|
||||
Message(MSG_MDS_LOCK) {
|
||||
this->otype = lock->get_type();
|
||||
lock->get_parent()->set_mlock_info(this);
|
||||
this->action = action;
|
||||
this->asker = asker;
|
||||
data.claim(bl);
|
||||
}
|
||||
virtual char *get_type_name() { return "ILock"; }
|
||||
|
||||
void set_ino(inodeno_t ino, char ot) {
|
||||
@ -88,10 +98,12 @@ class MLock : public Message {
|
||||
void set_ino(inodeno_t ino) {
|
||||
this->ino = ino;
|
||||
}
|
||||
/*
|
||||
void set_dirfrag(dirfrag_t df) {
|
||||
otype = LOCK_OTYPE_DIR;
|
||||
this->dirfrag = df;
|
||||
}
|
||||
*/
|
||||
void set_dn(dirfrag_t df, const string& dn) {
|
||||
otype = LOCK_OTYPE_DN;
|
||||
this->dirfrag = df;
|
||||
|
Loading…
Reference in New Issue
Block a user