mirror of
https://github.com/ceph/ceph
synced 2025-01-03 01:22:53 +00:00
mds: introduce fine-grained discover dirfrag wait queue
Current discover dirfrag code only allows discover one dirfrag at a time. This can cause deadlock if there are directories that are fragmented to several dirfrags. For example: mds.0 mds.1 ----------------------------------------------------------------- freeze subtree (1.*) with bound (2.1*) discover (2.0*) -> handle discover (2.0*), frozen tree, wait <- export subtree (1.*) to with bound (2.1*) discover (2.1*), wait Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
This commit is contained in:
parent
2c909cda0e
commit
1ff776669b
@ -531,7 +531,7 @@ protected:
|
||||
map< inodeno_t, list<Context*> > waiting_on_ino;
|
||||
|
||||
public:
|
||||
bool is_waiting_for_dentry(const char *dname, snapid_t snap) {
|
||||
bool is_waiting_for_dentry(const string& dname, snapid_t snap) {
|
||||
return waiting_on_dentry.count(string_snap_t(dname, snap));
|
||||
}
|
||||
void add_dentry_waiter(const string& dentry, snapid_t snap, Context *c);
|
||||
|
@ -1959,6 +1959,30 @@ bool CInode::is_freezing()
|
||||
return false;
|
||||
}
|
||||
|
||||
void CInode::add_dir_waiter(frag_t fg, Context *c)
|
||||
{
|
||||
if (waiting_on_dir.empty())
|
||||
get(PIN_DIRWAITER);
|
||||
waiting_on_dir[fg].push_back(c);
|
||||
dout(10) << "add_dir_waiter frag " << fg << " " << c << " on " << *this << dendl;
|
||||
}
|
||||
|
||||
void CInode::take_dir_waiting(frag_t fg, list<Context*>& ls)
|
||||
{
|
||||
if (waiting_on_dir.empty())
|
||||
return;
|
||||
|
||||
map<frag_t, list<Context*> >::iterator p = waiting_on_dir.find(fg);
|
||||
if (p != waiting_on_dir.end()) {
|
||||
dout(10) << "take_dir_waiting frag " << fg << " on " << *this << dendl;
|
||||
ls.splice(ls.end(), p->second);
|
||||
waiting_on_dir.erase(p);
|
||||
|
||||
if (waiting_on_dir.empty())
|
||||
put(PIN_DIRWAITER);
|
||||
}
|
||||
}
|
||||
|
||||
void CInode::add_waiter(uint64_t tag, Context *c)
|
||||
{
|
||||
dout(10) << "add_waiter tag " << std::hex << tag << std::dec << " " << c
|
||||
@ -1979,6 +2003,23 @@ void CInode::add_waiter(uint64_t tag, Context *c)
|
||||
MDSCacheObject::add_waiter(tag, c);
|
||||
}
|
||||
|
||||
void CInode::take_waiting(uint64_t mask, list<Context*>& ls)
|
||||
{
|
||||
if ((mask & WAIT_DIR) && !waiting_on_dir.empty()) {
|
||||
// take all dentry waiters
|
||||
while (!waiting_on_dir.empty()) {
|
||||
map<frag_t, list<Context*> >::iterator p = waiting_on_dir.begin();
|
||||
dout(10) << "take_waiting dirfrag " << p->first << " on " << *this << dendl;
|
||||
ls.splice(ls.end(), p->second);
|
||||
waiting_on_dir.erase(p);
|
||||
}
|
||||
put(PIN_DIRWAITER);
|
||||
}
|
||||
|
||||
// waiting
|
||||
MDSCacheObject::take_waiting(mask, ls);
|
||||
}
|
||||
|
||||
bool CInode::freeze_inode(int auth_pin_allowance)
|
||||
{
|
||||
assert(auth_pin_allowance > 0); // otherwise we need to adjust parent's nested_auth_pins
|
||||
|
@ -110,6 +110,7 @@ public:
|
||||
static const int PIN_DIRTYRSTAT = 21;
|
||||
static const int PIN_EXPORTINGCAPS = 22;
|
||||
static const int PIN_DIRTYPARENT = 23;
|
||||
static const int PIN_DIRWAITER = 24;
|
||||
|
||||
const char *pin_name(int p) {
|
||||
switch (p) {
|
||||
@ -135,6 +136,7 @@ public:
|
||||
case PIN_NEEDSNAPFLUSH: return "needsnapflush";
|
||||
case PIN_DIRTYRSTAT: return "dirtyrstat";
|
||||
case PIN_DIRTYPARENT: return "dirtyparent";
|
||||
case PIN_DIRWAITER: return "dirwaiter";
|
||||
default: return generic_pin_name(p);
|
||||
}
|
||||
}
|
||||
@ -570,10 +572,17 @@ private:
|
||||
_decode_locks_state(p, is_new);
|
||||
}
|
||||
|
||||
|
||||
// -- waiting --
|
||||
protected:
|
||||
map<frag_t, list<Context*> > waiting_on_dir;
|
||||
public:
|
||||
void add_dir_waiter(frag_t fg, Context *c);
|
||||
void take_dir_waiting(frag_t fg, list<Context*>& ls);
|
||||
bool is_waiting_for_dir(frag_t fg) {
|
||||
return waiting_on_dir.count(fg);
|
||||
}
|
||||
void add_waiter(uint64_t tag, Context *c);
|
||||
|
||||
void take_waiting(uint64_t tag, list<Context*>& ls);
|
||||
|
||||
// -- encode/decode helpers --
|
||||
void _encode_base(bufferlist& bl);
|
||||
@ -584,7 +593,6 @@ private:
|
||||
void _decode_locks_state(bufferlist::iterator& p, bool is_new);
|
||||
void _decode_locks_rejoin(bufferlist::iterator& p, list<Context*>& waiters);
|
||||
|
||||
|
||||
// -- import/export --
|
||||
void encode_export(bufferlist& bl);
|
||||
void finish_export(utime_t now);
|
||||
|
@ -9727,7 +9727,7 @@ void MDCache::discover_dir_frag(CInode *base,
|
||||
dout(7) << "discover_dir_frag " << df
|
||||
<< " from mds." << from << dendl;
|
||||
|
||||
if (!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // FIXME: this is kind of weak!
|
||||
if (!base->is_waiting_for_dir(approx_fg) || !onfinish) {
|
||||
discover_info_t& d = _create_discover(from);
|
||||
d.ino = base->ino();
|
||||
d.frag = approx_fg;
|
||||
@ -9736,7 +9736,7 @@ void MDCache::discover_dir_frag(CInode *base,
|
||||
}
|
||||
|
||||
if (onfinish)
|
||||
base->add_waiter(CInode::WAIT_DIR, onfinish);
|
||||
base->add_dir_waiter(approx_fg, onfinish);
|
||||
}
|
||||
|
||||
struct C_MDC_RetryDiscoverPath : public Context {
|
||||
@ -9779,10 +9779,12 @@ void MDCache::discover_path(CInode *base,
|
||||
return;
|
||||
}
|
||||
|
||||
frag_t fg = base->pick_dirfrag(want_path[0]);
|
||||
if ((want_xlocked && want_path.depth() == 1) ||
|
||||
!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // FIXME: weak!
|
||||
!base->is_waiting_for_dir(fg) || !onfinish) {
|
||||
discover_info_t& d = _create_discover(from);
|
||||
d.ino = base->ino();
|
||||
d.frag = fg;
|
||||
d.snap = snap;
|
||||
d.want_path = want_path;
|
||||
d.want_base_dir = true;
|
||||
@ -9792,7 +9794,7 @@ void MDCache::discover_path(CInode *base,
|
||||
|
||||
// register + wait
|
||||
if (onfinish)
|
||||
base->add_waiter(CInode::WAIT_DIR, onfinish);
|
||||
base->add_dir_waiter(fg, onfinish);
|
||||
}
|
||||
|
||||
struct C_MDC_RetryDiscoverPath2 : public Context {
|
||||
@ -10329,46 +10331,36 @@ void MDCache::handle_discover_reply(MDiscoverReply *m)
|
||||
if (who >= 0)
|
||||
dout(7) << " dir_auth_hint is " << m->get_dir_auth_hint() << dendl;
|
||||
|
||||
// try again?
|
||||
if (m->get_error_dentry().length()) {
|
||||
// wanted a dentry
|
||||
frag_t fg = cur->pick_dirfrag(m->get_error_dentry());
|
||||
CDir *dir = cur->get_dirfrag(fg);
|
||||
filepath relpath(m->get_error_dentry(), 0);
|
||||
frag_t fg = m->get_base_dir_frag();
|
||||
CDir *dir = cur->get_dirfrag(fg);
|
||||
|
||||
if (cur->is_waiter_for(CInode::WAIT_DIR)) {
|
||||
if (cur->is_auth() || dir)
|
||||
cur->take_waiting(CInode::WAIT_DIR, finished);
|
||||
else
|
||||
discover_path(cur, m->get_wanted_snapid(), relpath, 0, m->get_wanted_xlocked(), who);
|
||||
} else
|
||||
dout(7) << " doing nothing, nobody is waiting for dir" << dendl;
|
||||
|
||||
if (dir) {
|
||||
// don't actaully need the hint, now
|
||||
if (dir->is_waiting_for_dentry(m->get_error_dentry().c_str(), m->get_wanted_snapid())) {
|
||||
if (dir->is_auth() || dir->lookup(m->get_error_dentry()))
|
||||
dir->take_dentry_waiting(m->get_error_dentry(), m->get_wanted_snapid(),
|
||||
m->get_wanted_snapid(), finished);
|
||||
else
|
||||
discover_path(dir, m->get_wanted_snapid(), relpath, 0, m->get_wanted_xlocked());
|
||||
} else
|
||||
dout(7) << " doing nothing, have dir but nobody is waiting on dentry "
|
||||
<< m->get_error_dentry() << dendl;
|
||||
}
|
||||
} else {
|
||||
// wanted dir or ino
|
||||
frag_t fg = m->get_base_dir_frag();
|
||||
CDir *dir = cur->get_dirfrag(fg);
|
||||
|
||||
if (cur->is_waiter_for(CInode::WAIT_DIR)) {
|
||||
if (cur->is_auth() || dir)
|
||||
if (m->get_wanted_base_dir()) {
|
||||
if (cur->is_waiting_for_dir(fg)) {
|
||||
if (cur->is_auth())
|
||||
cur->take_waiting(CInode::WAIT_DIR, finished);
|
||||
else if (dir)
|
||||
cur->take_dir_waiting(fg, finished);
|
||||
else
|
||||
discover_dir_frag(cur, fg, 0, who);
|
||||
} else
|
||||
dout(7) << " doing nothing, nobody is waiting for dir" << dendl;
|
||||
}
|
||||
|
||||
// try again?
|
||||
if (m->get_error_dentry().length()) {
|
||||
// wanted a dentry
|
||||
if (dir && dir->is_waiting_for_dentry(m->get_error_dentry(), m->get_wanted_snapid())) {
|
||||
if (dir->is_auth() || dir->lookup(m->get_error_dentry())) {
|
||||
dir->take_dentry_waiting(m->get_error_dentry(), m->get_wanted_snapid(),
|
||||
m->get_wanted_snapid(), finished);
|
||||
} else {
|
||||
filepath relpath(m->get_error_dentry(), 0);
|
||||
discover_path(dir, m->get_wanted_snapid(), relpath, 0, m->get_wanted_xlocked());
|
||||
}
|
||||
} else
|
||||
dout(7) << " doing nothing, have dir but nobody is waiting on dentry "
|
||||
<< m->get_error_dentry() << dendl;
|
||||
} else {
|
||||
if (dir && m->get_wanted_ino() && dir->is_waiting_for_ino(m->get_wanted_ino())) {
|
||||
if (dir->is_auth() || get_inode(m->get_wanted_ino()))
|
||||
dir->take_ino_waiting(m->get_wanted_ino(), finished);
|
||||
@ -10428,7 +10420,7 @@ CDir *MDCache::add_replica_dir(bufferlist::iterator& p, CInode *diri, int from,
|
||||
dout(7) << "add_replica_dir added " << *dir << " nonce " << dir->replica_nonce << dendl;
|
||||
|
||||
// get waiters
|
||||
diri->take_waiting(CInode::WAIT_DIR, finished);
|
||||
diri->take_dir_waiting(df.frag, finished);
|
||||
}
|
||||
|
||||
return dir;
|
||||
@ -11510,11 +11502,13 @@ void MDCache::handle_fragment_notify(MMDSFragmentNotify *notify)
|
||||
// refragment
|
||||
list<Context*> waiters;
|
||||
list<CDir*> resultfrags;
|
||||
adjust_dir_fragments(diri, base, bits,
|
||||
resultfrags, waiters, false);
|
||||
adjust_dir_fragments(diri, base, bits, resultfrags, waiters, false);
|
||||
if (g_conf->mds_debug_frag)
|
||||
diri->verify_dirfrags();
|
||||
|
||||
for (list<CDir*>::iterator p = resultfrags.begin(); p != resultfrags.end(); ++p)
|
||||
diri->take_dir_waiting((*p)->get_frag(), waiters);
|
||||
|
||||
/*
|
||||
// add new replica dirs values
|
||||
bufferlist::iterator p = notify->basebl.begin();
|
||||
|
Loading…
Reference in New Issue
Block a user