mirror of
https://github.com/ceph/ceph
synced 2025-01-27 13:34:31 +00:00
07ac5d3e74
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1068 29311d96-e01e-0410-9327-a35deaab8ce9
2287 lines
62 KiB
C++
2287 lines
62 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
|
|
#include "MDS.h"
|
|
#include "MDCache.h"
|
|
#include "Locker.h"
|
|
#include "Server.h"
|
|
#include "CInode.h"
|
|
#include "CDir.h"
|
|
#include "CDentry.h"
|
|
#include "Migrator.h"
|
|
|
|
#include "MDBalancer.h"
|
|
#include "MDLog.h"
|
|
#include "MDSMap.h"
|
|
|
|
#include "include/filepath.h"
|
|
|
|
#include "events/EInodeUpdate.h"
|
|
#include "events/EDirUpdate.h"
|
|
#include "events/EUnlink.h"
|
|
|
|
#include "msg/Messenger.h"
|
|
|
|
#include "messages/MGenericMessage.h"
|
|
#include "messages/MDiscover.h"
|
|
#include "messages/MDiscoverReply.h"
|
|
|
|
#include "messages/MDirUpdate.h"
|
|
|
|
#include "messages/MInodeFileCaps.h"
|
|
|
|
#include "messages/MInodeLink.h"
|
|
#include "messages/MInodeLinkAck.h"
|
|
#include "messages/MInodeUnlink.h"
|
|
#include "messages/MInodeUnlinkAck.h"
|
|
|
|
#include "messages/MLock.h"
|
|
#include "messages/MDentryUnlink.h"
|
|
|
|
#include "messages/MClientRequest.h"
|
|
#include "messages/MClientFileCaps.h"
|
|
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
|
|
#include "config.h"
|
|
#undef dout
|
|
#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".locker "
|
|
|
|
|
|
|
|
void Locker::dispatch(Message *m)
|
|
{
|
|
switch (m->get_type()) {
|
|
|
|
// locking
|
|
case MSG_MDS_LOCK:
|
|
handle_lock((MLock*)m);
|
|
break;
|
|
|
|
// cache fun
|
|
case MSG_MDS_INODEFILECAPS:
|
|
handle_inode_file_caps((MInodeFileCaps*)m);
|
|
break;
|
|
|
|
case MSG_CLIENT_FILECAPS:
|
|
handle_client_file_caps((MClientFileCaps*)m);
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
// file i/o -----------------------------------------
|
|
|
|
__uint64_t Locker::issue_file_data_version(CInode *in)
|
|
{
|
|
dout(7) << "issue_file_data_version on " << *in << endl;
|
|
return in->inode.file_data_version;
|
|
}
|
|
|
|
|
|
Capability* Locker::issue_new_caps(CInode *in,
|
|
int mode,
|
|
MClientRequest *req)
|
|
{
|
|
dout(7) << "issue_new_caps for mode " << mode << " on " << *in << endl;
|
|
|
|
// my needs
|
|
int my_client = req->get_client();
|
|
int my_want = 0;
|
|
if (mode & FILE_MODE_R) my_want |= CAP_FILE_RDCACHE | CAP_FILE_RD;
|
|
if (mode & FILE_MODE_W) my_want |= CAP_FILE_WRBUFFER | CAP_FILE_WR;
|
|
|
|
// register a capability
|
|
Capability *cap = in->get_client_cap(my_client);
|
|
if (!cap) {
|
|
// new cap
|
|
Capability c(my_want);
|
|
in->add_client_cap(my_client, c);
|
|
cap = in->get_client_cap(my_client);
|
|
|
|
// note client addr
|
|
mds->clientmap.add_open(my_client, req->get_client_inst());
|
|
|
|
} else {
|
|
// make sure it has sufficient caps
|
|
if (cap->wanted() & ~my_want) {
|
|
// augment wanted caps for this client
|
|
cap->set_wanted( cap->wanted() | my_want );
|
|
}
|
|
}
|
|
|
|
// suppress file cap messages for this guy for a few moments (we'll bundle with the open() reply)
|
|
cap->set_suppress(true);
|
|
int before = cap->pending();
|
|
|
|
if (in->is_auth()) {
|
|
// [auth] twiddle mode?
|
|
inode_file_eval(in);
|
|
} else {
|
|
// [replica] tell auth about any new caps wanted
|
|
request_inode_file_caps(in);
|
|
}
|
|
|
|
// issue caps (pot. incl new one)
|
|
issue_caps(in); // note: _eval above may have done this already...
|
|
|
|
// re-issue whatever we can
|
|
cap->issue(cap->pending());
|
|
|
|
// ok, stop suppressing.
|
|
cap->set_suppress(false);
|
|
|
|
int now = cap->pending();
|
|
if (before != now &&
|
|
(before & CAP_FILE_WR) == 0 &&
|
|
(now & CAP_FILE_WR)) {
|
|
// FIXME FIXME FIXME
|
|
}
|
|
|
|
// twiddle file_data_version?
|
|
if ((before & CAP_FILE_WRBUFFER) == 0 &&
|
|
(now & CAP_FILE_WRBUFFER)) {
|
|
in->inode.file_data_version++;
|
|
dout(7) << " incrementing file_data_version, now " << in->inode.file_data_version << " for " << *in << endl;
|
|
}
|
|
|
|
return cap;
|
|
}
|
|
|
|
|
|
|
|
bool Locker::issue_caps(CInode *in)
|
|
{
|
|
// allowed caps are determined by the lock mode.
|
|
int allowed = in->filelock.caps_allowed(in->is_auth());
|
|
dout(7) << "issue_caps filelock allows=" << cap_string(allowed)
|
|
<< " on " << *in << endl;
|
|
|
|
// count conflicts with
|
|
int nissued = 0;
|
|
|
|
// client caps
|
|
for (map<int, Capability>::iterator it = in->client_caps.begin();
|
|
it != in->client_caps.end();
|
|
it++) {
|
|
if (it->second.issued() != (it->second.wanted() & allowed)) {
|
|
// issue
|
|
nissued++;
|
|
|
|
int before = it->second.pending();
|
|
long seq = it->second.issue(it->second.wanted() & allowed);
|
|
int after = it->second.pending();
|
|
|
|
// twiddle file_data_version?
|
|
if (!(before & CAP_FILE_WRBUFFER) &&
|
|
(after & CAP_FILE_WRBUFFER)) {
|
|
dout(7) << " incrementing file_data_version for " << *in << endl;
|
|
in->inode.file_data_version++;
|
|
}
|
|
|
|
if (seq > 0 &&
|
|
!it->second.is_suppress()) {
|
|
dout(7) << " sending MClientFileCaps to client" << it->first << " seq " << it->second.get_last_seq() << " new pending " << cap_string(it->second.pending()) << " was " << cap_string(before) << endl;
|
|
mds->messenger->send_message(new MClientFileCaps(in->inode,
|
|
it->second.get_last_seq(),
|
|
it->second.pending(),
|
|
it->second.wanted()),
|
|
MSG_ADDR_CLIENT(it->first), mds->clientmap.get_inst(it->first),
|
|
0, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
}
|
|
|
|
return (nissued == 0); // true if no re-issued, no callbacks
|
|
}
|
|
|
|
|
|
|
|
void Locker::request_inode_file_caps(CInode *in)
|
|
{
|
|
int wanted = in->get_caps_wanted();
|
|
if (wanted != in->replica_caps_wanted) {
|
|
|
|
if (wanted == 0) {
|
|
if (in->replica_caps_wanted_keep_until > g_clock.recent_now()) {
|
|
// ok, release them finally!
|
|
in->replica_caps_wanted_keep_until.sec_ref() = 0;
|
|
dout(7) << "request_inode_file_caps " << cap_string(wanted)
|
|
<< " was " << cap_string(in->replica_caps_wanted)
|
|
<< " no keeping anymore "
|
|
<< " on " << *in
|
|
<< endl;
|
|
}
|
|
else if (in->replica_caps_wanted_keep_until.sec() == 0) {
|
|
in->replica_caps_wanted_keep_until = g_clock.recent_now();
|
|
in->replica_caps_wanted_keep_until.sec_ref() += 2;
|
|
|
|
dout(7) << "request_inode_file_caps " << cap_string(wanted)
|
|
<< " was " << cap_string(in->replica_caps_wanted)
|
|
<< " keeping until " << in->replica_caps_wanted_keep_until
|
|
<< " on " << *in
|
|
<< endl;
|
|
return;
|
|
} else {
|
|
// wait longer
|
|
return;
|
|
}
|
|
} else {
|
|
in->replica_caps_wanted_keep_until.sec_ref() = 0;
|
|
}
|
|
assert(!in->is_auth());
|
|
|
|
int auth = in->authority();
|
|
dout(7) << "request_inode_file_caps " << cap_string(wanted)
|
|
<< " was " << cap_string(in->replica_caps_wanted)
|
|
<< " on " << *in << " to mds" << auth << endl;
|
|
assert(!in->is_auth());
|
|
|
|
in->replica_caps_wanted = wanted;
|
|
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
|
|
in->replica_caps_wanted),
|
|
auth, MDS_PORT_LOCKER);
|
|
} else {
|
|
in->replica_caps_wanted_keep_until.sec_ref() = 0;
|
|
}
|
|
}
|
|
|
|
void Locker::handle_inode_file_caps(MInodeFileCaps *m)
|
|
{
|
|
CInode *in = mdcache->get_inode(m->get_ino());
|
|
assert(in);
|
|
assert(in->is_auth() || in->is_proxy());
|
|
|
|
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
|
|
|
|
if (in->is_proxy()) {
|
|
dout(7) << "proxy, fw" << endl;
|
|
mds->send_message_mds(m, in->authority(), MDS_PORT_LOCKER);
|
|
return;
|
|
}
|
|
|
|
if (m->get_caps())
|
|
in->mds_caps_wanted[m->get_from()] = m->get_caps();
|
|
else
|
|
in->mds_caps_wanted.erase(m->get_from());
|
|
|
|
inode_file_eval(in);
|
|
delete m;
|
|
}
|
|
|
|
|
|
/*
|
|
* note: we only get these from the client if
|
|
* - we are calling back previously issued caps (fewer than the client previously had)
|
|
* - or if the client releases (any of) its caps on its own
|
|
*/
|
|
void Locker::handle_client_file_caps(MClientFileCaps *m)
|
|
{
|
|
int client = m->get_source().num();
|
|
CInode *in = mdcache->get_inode(m->get_ino());
|
|
Capability *cap = 0;
|
|
if (in)
|
|
cap = in->get_client_cap(client);
|
|
|
|
if (!in || !cap) {
|
|
if (!in) {
|
|
dout(7) << "handle_client_file_caps on unknown ino " << m->get_ino() << ", dropping" << endl;
|
|
} else {
|
|
dout(7) << "handle_client_file_caps no cap for client" << client << " on " << *in << endl;
|
|
}
|
|
delete m;
|
|
return;
|
|
}
|
|
|
|
assert(cap);
|
|
|
|
// filter wanted based on what we could ever give out (given auth/replica status)
|
|
int wanted = m->get_wanted() & in->filelock.caps_allowed_ever(in->is_auth());
|
|
|
|
dout(7) << "handle_client_file_caps seq " << m->get_seq()
|
|
<< " confirms caps " << cap_string(m->get_caps())
|
|
<< " wants " << cap_string(wanted)
|
|
<< " from client" << client
|
|
<< " on " << *in
|
|
<< endl;
|
|
|
|
// update wanted
|
|
if (cap->wanted() != wanted)
|
|
cap->set_wanted(wanted);
|
|
|
|
// confirm caps
|
|
int had = cap->confirm_receipt(m->get_seq(), m->get_caps());
|
|
int has = cap->confirmed();
|
|
if (cap->is_null()) {
|
|
dout(7) << " cap for client" << client << " is now null, removing from " << *in << endl;
|
|
in->remove_client_cap(client);
|
|
if (!in->is_auth())
|
|
request_inode_file_caps(in);
|
|
|
|
// dec client addr counter
|
|
mds->clientmap.dec_open(client);
|
|
|
|
// tell client.
|
|
MClientFileCaps *r = new MClientFileCaps(in->inode,
|
|
0, 0, 0,
|
|
MClientFileCaps::FILECAP_RELEASE);
|
|
mds->messenger->send_message(r, m->get_source(), m->get_source_inst(), 0, MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// merge in atime?
|
|
if (m->get_inode().atime > in->inode.atime) {
|
|
dout(7) << " taking atime " << m->get_inode().atime << " > "
|
|
<< in->inode.atime << " for " << *in << endl;
|
|
in->inode.atime = m->get_inode().atime;
|
|
}
|
|
|
|
if ((has|had) & CAP_FILE_WR) {
|
|
bool dirty = false;
|
|
|
|
// mtime
|
|
if (m->get_inode().mtime > in->inode.mtime) {
|
|
dout(7) << " taking mtime " << m->get_inode().mtime << " > "
|
|
<< in->inode.mtime << " for " << *in << endl;
|
|
in->inode.mtime = m->get_inode().mtime;
|
|
dirty = true;
|
|
}
|
|
// size
|
|
if (m->get_inode().size > in->inode.size) {
|
|
dout(7) << " taking size " << m->get_inode().size << " > "
|
|
<< in->inode.size << " for " << *in << endl;
|
|
in->inode.size = m->get_inode().size;
|
|
dirty = true;
|
|
}
|
|
|
|
if (dirty)
|
|
mds->mdlog->submit_entry(new EInodeUpdate(in));
|
|
}
|
|
|
|
// reevaluate, waiters
|
|
inode_file_eval(in);
|
|
in->finish_waiting(CINODE_WAIT_CAPS, 0);
|
|
|
|
delete m;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// locks ----------------------------------------------------------------
|
|
|
|
/*
|
|
|
|
|
|
INODES:
|
|
|
|
= two types of inode metadata:
|
|
hard - uid/gid, mode
|
|
file - mtime, size
|
|
? atime - atime (*) <-- we want a lazy update strategy?
|
|
|
|
= correspondingly, two types of inode locks:
|
|
hardlock - hard metadata
|
|
filelock - file metadata
|
|
|
|
-> These locks are completely orthogonal!
|
|
|
|
= metadata ops and how they affect inode metadata:
|
|
sma=size mtime atime
|
|
HARD FILE OP
|
|
files:
|
|
R RRR stat
|
|
RW chmod/chown
|
|
R W touch ?ctime
|
|
R openr
|
|
W read atime
|
|
R openw
|
|
Wc openwc ?ctime
|
|
WW write size mtime
|
|
close
|
|
|
|
dirs:
|
|
R W readdir atime
|
|
RRR ( + implied stats on files)
|
|
Rc WW mkdir (ctime on new dir, size+mtime on parent dir)
|
|
R WW link/unlink/rename/rmdir (size+mtime on dir)
|
|
|
|
|
|
|
|
= relationship to client (writers):
|
|
|
|
- ops in question are
|
|
- stat ... need reasonable value for mtime (+ atime?)
|
|
- maybe we want a "quicksync" type operation instead of full lock
|
|
- truncate ... need to stop writers for the atomic truncate operation
|
|
- need a full lock
|
|
|
|
|
|
|
|
|
|
= modes
|
|
- SYNC
|
|
Rauth Rreplica Wauth Wreplica
|
|
sync
|
|
|
|
|
|
|
|
|
|
|
|
ALSO:
|
|
|
|
dirlock - no dir changes (prior to unhashing)
|
|
denlock - dentry lock (prior to unlink, rename)
|
|
|
|
|
|
*/
|
|
|
|
|
|
void Locker::handle_lock(MLock *m)
|
|
{
|
|
switch (m->get_otype()) {
|
|
case LOCK_OTYPE_IHARD:
|
|
handle_lock_inode_hard(m);
|
|
break;
|
|
|
|
case LOCK_OTYPE_IFILE:
|
|
handle_lock_inode_file(m);
|
|
break;
|
|
|
|
case LOCK_OTYPE_DIR:
|
|
handle_lock_dir(m);
|
|
break;
|
|
|
|
case LOCK_OTYPE_DN:
|
|
handle_lock_dn(m);
|
|
break;
|
|
|
|
default:
|
|
dout(7) << "handle_lock got otype " << m->get_otype() << endl;
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// ===============================
|
|
// hard inode metadata
|
|
|
|
bool Locker::inode_hard_read_try(CInode *in, Context *con)
|
|
{
|
|
dout(7) << "inode_hard_read_try on " << *in << endl;
|
|
|
|
// can read? grab ref.
|
|
if (in->hardlock.can_read(in->is_auth()))
|
|
return true;
|
|
|
|
assert(!in->is_auth());
|
|
|
|
// wait!
|
|
dout(7) << "inode_hard_read_try waiting on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_HARDR, con);
|
|
return false;
|
|
}
|
|
|
|
bool Locker::inode_hard_read_start(CInode *in, MClientRequest *m)
|
|
{
|
|
dout(7) << "inode_hard_read_start on " << *in << endl;
|
|
|
|
// can read? grab ref.
|
|
if (in->hardlock.can_read(in->is_auth())) {
|
|
in->hardlock.get_read();
|
|
return true;
|
|
}
|
|
|
|
// can't read, and replicated.
|
|
assert(!in->is_auth());
|
|
|
|
// wait!
|
|
dout(7) << "inode_hard_read_start waiting on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_HARDR, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
}
|
|
|
|
|
|
void Locker::inode_hard_read_finish(CInode *in)
|
|
{
|
|
// drop ref
|
|
assert(in->hardlock.can_read(in->is_auth()));
|
|
in->hardlock.put_read();
|
|
|
|
dout(7) << "inode_hard_read_finish on " << *in << endl;
|
|
|
|
//if (in->hardlock.get_nread() == 0) in->finish_waiting(CINODE_WAIT_HARDNORD);
|
|
}
|
|
|
|
|
|
bool Locker::inode_hard_write_start(CInode *in, MClientRequest *m)
|
|
{
|
|
dout(7) << "inode_hard_write_start on " << *in << endl;
|
|
|
|
// if not replicated, i can twiddle lock at will
|
|
if (in->is_auth() &&
|
|
!in->is_cached_by_anyone() &&
|
|
in->hardlock.get_state() != LOCK_LOCK)
|
|
in->hardlock.set_state(LOCK_LOCK);
|
|
|
|
// can write? grab ref.
|
|
if (in->hardlock.can_write(in->is_auth())) {
|
|
assert(in->is_auth());
|
|
if (!in->can_auth_pin()) {
|
|
dout(7) << "inode_hard_write_start waiting for authpinnable on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
}
|
|
|
|
in->auth_pin(); // ugh, can't condition this on nwrite==0 bc we twiddle that in handle_lock_*
|
|
in->hardlock.get_write();
|
|
return true;
|
|
}
|
|
|
|
// can't write, replicated.
|
|
if (in->is_auth()) {
|
|
// auth
|
|
if (in->hardlock.can_write_soon(in->is_auth())) {
|
|
// just wait
|
|
} else {
|
|
// initiate lock
|
|
inode_hard_lock(in);
|
|
}
|
|
|
|
dout(7) << "inode_hard_write_start waiting on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_HARDW, new C_MDS_RetryRequest(mds, m, in));
|
|
|
|
return false;
|
|
} else {
|
|
// replica
|
|
// fw to auth
|
|
int auth = in->authority();
|
|
dout(7) << "inode_hard_write_start " << *in << " on replica, fw to auth " << auth << endl;
|
|
assert(auth != mds->get_nodeid());
|
|
mdcache->request_forward(m, auth);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
void Locker::inode_hard_write_finish(CInode *in)
|
|
{
|
|
// drop ref
|
|
assert(in->hardlock.can_write(in->is_auth()));
|
|
in->hardlock.put_write();
|
|
in->auth_unpin();
|
|
dout(7) << "inode_hard_write_finish on " << *in << endl;
|
|
|
|
// drop lock?
|
|
if (in->hardlock.get_nwrite() == 0) {
|
|
|
|
// auto-sync if alone.
|
|
if (in->is_auth() &&
|
|
!in->is_cached_by_anyone() &&
|
|
in->hardlock.get_state() != LOCK_SYNC)
|
|
in->hardlock.set_state(LOCK_SYNC);
|
|
|
|
inode_hard_eval(in);
|
|
}
|
|
}
|
|
|
|
|
|
void Locker::inode_hard_eval(CInode *in)
|
|
{
|
|
// finished gather?
|
|
if (in->is_auth() &&
|
|
!in->hardlock.is_stable() &&
|
|
in->hardlock.gather_set.empty()) {
|
|
dout(7) << "inode_hard_eval finished gather on " << *in << endl;
|
|
switch (in->hardlock.get_state()) {
|
|
case LOCK_GLOCKR:
|
|
in->hardlock.set_state(LOCK_LOCK);
|
|
|
|
// waiters
|
|
in->hardlock.get_write();
|
|
in->finish_waiting(CINODE_WAIT_HARDRWB|CINODE_WAIT_HARDSTABLE);
|
|
in->hardlock.put_write();
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
if (!in->hardlock.is_stable()) return;
|
|
|
|
if (in->is_auth()) {
|
|
|
|
// sync?
|
|
if (in->is_cached_by_anyone() &&
|
|
in->hardlock.get_nwrite() == 0 &&
|
|
in->hardlock.get_state() != LOCK_SYNC) {
|
|
dout(7) << "inode_hard_eval stable, syncing " << *in << endl;
|
|
inode_hard_sync(in);
|
|
}
|
|
|
|
} else {
|
|
// replica
|
|
}
|
|
}
|
|
|
|
|
|
// mid
|
|
|
|
void Locker::inode_hard_sync(CInode *in)
|
|
{
|
|
dout(7) << "inode_hard_sync on " << *in << endl;
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->hardlock.get_state() == LOCK_SYNC)
|
|
return; // already sync
|
|
if (in->hardlock.get_state() == LOCK_GLOCKR)
|
|
assert(0); // um... hmm!
|
|
assert(in->hardlock.get_state() == LOCK_LOCK);
|
|
|
|
// hard data
|
|
bufferlist harddata;
|
|
in->encode_hard_state(harddata);
|
|
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IHARD);
|
|
m->set_data(harddata);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// change lock
|
|
in->hardlock.set_state(LOCK_SYNC);
|
|
|
|
// waiters?
|
|
in->finish_waiting(CINODE_WAIT_HARDSTABLE);
|
|
}
|
|
|
|
void Locker::inode_hard_lock(CInode *in)
|
|
{
|
|
dout(7) << "inode_hard_lock on " << *in << " hardlock=" << in->hardlock << endl;
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->hardlock.get_state() == LOCK_LOCK ||
|
|
in->hardlock.get_state() == LOCK_GLOCKR)
|
|
return; // already lock or locking
|
|
assert(in->hardlock.get_state() == LOCK_SYNC);
|
|
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IHARD);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// change lock
|
|
in->hardlock.set_state(LOCK_GLOCKR);
|
|
in->hardlock.init_gather(in->get_cached_by());
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// messenger
|
|
|
|
void Locker::handle_lock_inode_hard(MLock *m)
|
|
{
|
|
assert(m->get_otype() == LOCK_OTYPE_IHARD);
|
|
|
|
if (mds->logger) mds->logger->inc("lih");
|
|
|
|
int from = m->get_asker();
|
|
CInode *in = mdcache->get_inode(m->get_ino());
|
|
|
|
if (LOCK_AC_FOR_AUTH(m->get_action())) {
|
|
// auth
|
|
assert(in);
|
|
assert(in->is_auth() || in->is_proxy());
|
|
dout(7) << "handle_lock_inode_hard " << *in << " hardlock=" << in->hardlock << endl;
|
|
|
|
if (in->is_proxy()) {
|
|
// fw
|
|
int newauth = in->authority();
|
|
assert(newauth >= 0);
|
|
if (from == newauth) {
|
|
dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, but from new auth, dropping" << endl;
|
|
delete m;
|
|
} else {
|
|
dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, fw to " << newauth << endl;
|
|
mds->send_message_mds(m, newauth, MDS_PORT_LOCKER);
|
|
}
|
|
return;
|
|
}
|
|
} else {
|
|
// replica
|
|
if (!in) {
|
|
dout(7) << "handle_lock_inode_hard " << m->get_ino() << ": don't have it anymore" << endl;
|
|
/* do NOT nak.. if we go that route we need to duplicate all the nonce funkiness
|
|
to keep gather_set a proper/correct subset of cached_by. better to use the existing
|
|
cacheexpire mechanism instead!
|
|
*/
|
|
delete m;
|
|
return;
|
|
}
|
|
|
|
assert(!in->is_auth());
|
|
}
|
|
|
|
dout(7) << "handle_lock_inode_hard a=" << m->get_action() << " from " << from << " " << *in << " hardlock=" << in->hardlock << endl;
|
|
|
|
CLock *lock = &in->hardlock;
|
|
|
|
switch (m->get_action()) {
|
|
// -- replica --
|
|
case LOCK_AC_SYNC:
|
|
assert(lock->get_state() == LOCK_LOCK);
|
|
|
|
{ // assim data
|
|
int off = 0;
|
|
in->decode_hard_state(m->get_data(), off);
|
|
}
|
|
|
|
// update lock
|
|
lock->set_state(LOCK_SYNC);
|
|
|
|
// no need to reply
|
|
|
|
// waiters
|
|
in->finish_waiting(CINODE_WAIT_HARDR|CINODE_WAIT_HARDSTABLE);
|
|
break;
|
|
|
|
case LOCK_AC_LOCK:
|
|
assert(lock->get_state() == LOCK_SYNC);
|
|
//|| lock->get_state() == LOCK_GLOCKR);
|
|
|
|
// wait for readers to finish?
|
|
if (lock->get_nread() > 0) {
|
|
dout(7) << "handle_lock_inode_hard readers, waiting before ack on " << *in << endl;
|
|
lock->set_state(LOCK_GLOCKR);
|
|
in->add_waiter(CINODE_WAIT_HARDNORD,
|
|
new C_MDS_RetryMessage(mds,m));
|
|
assert(0); // does this ever happen? (if so, fix hard_read_finish, and CInodeExport.update_inode!)
|
|
return;
|
|
} else {
|
|
|
|
// update lock and reply
|
|
lock->set_state(LOCK_LOCK);
|
|
|
|
{
|
|
MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IHARD);
|
|
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
break;
|
|
|
|
|
|
// -- auth --
|
|
case LOCK_AC_LOCKACK:
|
|
assert(lock->state == LOCK_GLOCKR);
|
|
assert(lock->gather_set.count(from));
|
|
lock->gather_set.erase(from);
|
|
|
|
if (lock->gather_set.size()) {
|
|
dout(7) << "handle_lock_inode_hard " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_inode_hard " << *in << " from " << from << ", last one" << endl;
|
|
inode_hard_eval(in);
|
|
}
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
|
|
|
|
|
|
// =====================
|
|
// soft inode metadata
|
|
|
|
|
|
bool Locker::inode_file_read_start(CInode *in, MClientRequest *m)
|
|
{
|
|
dout(7) << "inode_file_read_start " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
// can read? grab ref.
|
|
if (in->filelock.can_read(in->is_auth())) {
|
|
in->filelock.get_read();
|
|
return true;
|
|
}
|
|
|
|
// can't read, and replicated.
|
|
if (in->filelock.can_read_soon(in->is_auth())) {
|
|
// wait
|
|
dout(7) << "inode_file_read_start can_read_soon " << *in << endl;
|
|
} else {
|
|
if (in->is_auth()) {
|
|
// auth
|
|
|
|
// FIXME or qsync?
|
|
|
|
if (in->filelock.is_stable()) {
|
|
inode_file_lock(in); // lock, bc easiest to back off
|
|
|
|
if (in->filelock.can_read(in->is_auth())) {
|
|
in->filelock.get_read();
|
|
|
|
in->filelock.get_write();
|
|
in->finish_waiting(CINODE_WAIT_FILERWB|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_write();
|
|
return true;
|
|
}
|
|
} else {
|
|
dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
|
|
in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
}
|
|
} else {
|
|
// replica
|
|
if (in->filelock.is_stable()) {
|
|
|
|
// fw to auth
|
|
int auth = in->authority();
|
|
dout(7) << "inode_file_read_start " << *in << " on replica and async, fw to auth " << auth << endl;
|
|
assert(auth != mds->get_nodeid());
|
|
mdcache->request_forward(m, auth);
|
|
return false;
|
|
|
|
} else {
|
|
// wait until stable
|
|
dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
|
|
in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// wait
|
|
dout(7) << "inode_file_read_start waiting on " << *in << ", filelock=" << in->filelock << endl;
|
|
in->add_waiter(CINODE_WAIT_FILER, new C_MDS_RetryRequest(mds, m, in));
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
void Locker::inode_file_read_finish(CInode *in)
|
|
{
|
|
// drop ref
|
|
assert(in->filelock.can_read(in->is_auth()));
|
|
in->filelock.put_read();
|
|
|
|
dout(7) << "inode_file_read_finish on " << *in << ", filelock=" << in->filelock << endl;
|
|
|
|
if (in->filelock.get_nread() == 0) {
|
|
in->finish_waiting(CINODE_WAIT_FILENORD);
|
|
inode_file_eval(in);
|
|
}
|
|
}
|
|
|
|
|
|
bool Locker::inode_file_write_start(CInode *in, MClientRequest *m)
|
|
{
|
|
// can write? grab ref.
|
|
if (in->filelock.can_write(in->is_auth())) {
|
|
in->filelock.get_write();
|
|
return true;
|
|
}
|
|
|
|
// can't write, replicated.
|
|
if (in->is_auth()) {
|
|
// auth
|
|
if (in->filelock.can_write_soon(in->is_auth())) {
|
|
// just wait
|
|
} else {
|
|
if (!in->filelock.is_stable()) {
|
|
dout(7) << "inode_file_write_start on auth, waiting for stable on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
}
|
|
|
|
// initiate lock
|
|
inode_file_lock(in);
|
|
|
|
if (in->filelock.can_write(in->is_auth())) {
|
|
in->filelock.get_write();
|
|
|
|
in->filelock.get_read();
|
|
in->finish_waiting(CINODE_WAIT_FILERWB|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_read();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
dout(7) << "inode_file_write_start on auth, waiting for write on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_FILEW, new C_MDS_RetryRequest(mds, m, in));
|
|
return false;
|
|
} else {
|
|
// replica
|
|
// fw to auth
|
|
int auth = in->authority();
|
|
dout(7) << "inode_file_write_start " << *in << " on replica, fw to auth " << auth << endl;
|
|
assert(auth != mds->get_nodeid());
|
|
mdcache->request_forward(m, auth);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
void Locker::inode_file_write_finish(CInode *in)
|
|
{
|
|
// drop ref
|
|
assert(in->filelock.can_write(in->is_auth()));
|
|
in->filelock.put_write();
|
|
dout(7) << "inode_file_write_finish on " << *in << ", filelock=" << in->filelock << endl;
|
|
|
|
// drop lock?
|
|
if (in->filelock.get_nwrite() == 0) {
|
|
in->finish_waiting(CINODE_WAIT_FILENOWR);
|
|
inode_file_eval(in);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* ...
|
|
*
|
|
* also called after client caps are acked to us
|
|
* - checks if we're in unstable sfot state and can now move on to next state
|
|
* - checks if soft state should change (eg bc last writer closed)
|
|
*/
|
|
|
|
void Locker::inode_file_eval(CInode *in)
|
|
{
|
|
int issued = in->get_caps_issued();
|
|
|
|
// [auth] finished gather?
|
|
if (in->is_auth() &&
|
|
!in->filelock.is_stable() &&
|
|
in->filelock.gather_set.size() == 0) {
|
|
dout(7) << "inode_file_eval finished mds gather on " << *in << endl;
|
|
|
|
switch (in->filelock.get_state()) {
|
|
// to lock
|
|
case LOCK_GLOCKR:
|
|
case LOCK_GLOCKM:
|
|
case LOCK_GLOCKL:
|
|
if (issued == 0) {
|
|
in->filelock.set_state(LOCK_LOCK);
|
|
|
|
// waiters
|
|
in->filelock.get_read();
|
|
in->filelock.get_write();
|
|
in->finish_waiting(CINODE_WAIT_FILERWB|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_read();
|
|
in->filelock.put_write();
|
|
}
|
|
break;
|
|
|
|
// to mixed
|
|
case LOCK_GMIXEDR:
|
|
if ((issued & ~(CAP_FILE_RD)) == 0) {
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
in->finish_waiting(CINODE_WAIT_FILESTABLE);
|
|
}
|
|
break;
|
|
|
|
case LOCK_GMIXEDL:
|
|
if ((issued & ~(CAP_FILE_WR)) == 0) {
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
|
|
if (in->is_cached_by_anyone()) {
|
|
// data
|
|
bufferlist softdata;
|
|
in->encode_file_state(softdata);
|
|
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_MIXED, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
m->set_data(softdata);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
in->finish_waiting(CINODE_WAIT_FILESTABLE);
|
|
}
|
|
break;
|
|
|
|
// to loner
|
|
case LOCK_GLONERR:
|
|
if (issued == 0) {
|
|
in->filelock.set_state(LOCK_LONER);
|
|
in->finish_waiting(CINODE_WAIT_FILESTABLE);
|
|
}
|
|
break;
|
|
|
|
case LOCK_GLONERM:
|
|
if ((issued & ~CAP_FILE_WR) == 0) {
|
|
in->filelock.set_state(LOCK_LONER);
|
|
in->finish_waiting(CINODE_WAIT_FILESTABLE);
|
|
}
|
|
break;
|
|
|
|
// to sync
|
|
case LOCK_GSYNCL:
|
|
case LOCK_GSYNCM:
|
|
if ((issued & ~(CAP_FILE_RD)) == 0) {
|
|
in->filelock.set_state(LOCK_SYNC);
|
|
|
|
{ // bcast data to replicas
|
|
bufferlist softdata;
|
|
in->encode_file_state(softdata);
|
|
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *reply = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
reply->set_data(softdata);
|
|
mds->send_message_mds(reply, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
// waiters
|
|
in->filelock.get_read();
|
|
in->finish_waiting(CINODE_WAIT_FILER|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_read();
|
|
}
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
issue_caps(in);
|
|
}
|
|
|
|
// [replica] finished caps gather?
|
|
if (!in->is_auth() &&
|
|
!in->filelock.is_stable()) {
|
|
switch (in->filelock.get_state()) {
|
|
case LOCK_GMIXEDR:
|
|
if ((issued & ~(CAP_FILE_RD)) == 0) {
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
|
|
// ack
|
|
MLock *reply = new MLock(LOCK_AC_MIXEDACK, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(reply, in->authority(), MDS_PORT_LOCKER);
|
|
}
|
|
break;
|
|
|
|
case LOCK_GLOCKR:
|
|
if (issued == 0) {
|
|
in->filelock.set_state(LOCK_LOCK);
|
|
|
|
// ack
|
|
MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(reply, in->authority(), MDS_PORT_LOCKER);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
// !stable -> do nothing.
|
|
if (!in->filelock.is_stable()) return;
|
|
|
|
|
|
// stable.
|
|
assert(in->filelock.is_stable());
|
|
|
|
if (in->is_auth()) {
|
|
// [auth]
|
|
int wanted = in->get_caps_wanted();
|
|
bool loner = (in->client_caps.size() == 1) && in->mds_caps_wanted.empty();
|
|
dout(7) << "inode_file_eval wanted=" << cap_string(wanted)
|
|
<< " filelock=" << in->filelock
|
|
<< " loner=" << loner
|
|
<< endl;
|
|
|
|
// * -> loner?
|
|
if (in->filelock.get_nread() == 0 &&
|
|
in->filelock.get_nwrite() == 0 &&
|
|
(wanted & CAP_FILE_WR) &&
|
|
loner &&
|
|
in->filelock.get_state() != LOCK_LONER) {
|
|
dout(7) << "inode_file_eval stable, bump to loner " << *in << ", filelock=" << in->filelock << endl;
|
|
inode_file_loner(in);
|
|
}
|
|
|
|
// * -> mixed?
|
|
else if (in->filelock.get_nread() == 0 &&
|
|
in->filelock.get_nwrite() == 0 &&
|
|
(wanted & CAP_FILE_RD) &&
|
|
(wanted & CAP_FILE_WR) &&
|
|
!(loner && in->filelock.get_state() == LOCK_LONER) &&
|
|
in->filelock.get_state() != LOCK_MIXED) {
|
|
dout(7) << "inode_file_eval stable, bump to mixed " << *in << ", filelock=" << in->filelock << endl;
|
|
inode_file_mixed(in);
|
|
}
|
|
|
|
// * -> sync?
|
|
else if (in->filelock.get_nwrite() == 0 &&
|
|
!(wanted & CAP_FILE_WR) &&
|
|
((wanted & CAP_FILE_RD) ||
|
|
in->is_cached_by_anyone() ||
|
|
(!loner && in->filelock.get_state() == LOCK_LONER)) &&
|
|
in->filelock.get_state() != LOCK_SYNC) {
|
|
dout(7) << "inode_file_eval stable, bump to sync " << *in << ", filelock=" << in->filelock << endl;
|
|
inode_file_sync(in);
|
|
}
|
|
|
|
// * -> lock? (if not replicated or open)
|
|
else if (!in->is_cached_by_anyone() &&
|
|
wanted == 0 &&
|
|
in->filelock.get_state() != LOCK_LOCK) {
|
|
inode_file_lock(in);
|
|
}
|
|
|
|
} else {
|
|
// replica
|
|
// recall? check wiaters? XXX
|
|
}
|
|
}
|
|
|
|
|
|
// mid
|
|
|
|
bool Locker::inode_file_sync(CInode *in)
|
|
{
|
|
dout(7) << "inode_file_sync " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->filelock.get_state() == LOCK_SYNC ||
|
|
in->filelock.get_state() == LOCK_GSYNCL ||
|
|
in->filelock.get_state() == LOCK_GSYNCM)
|
|
return true;
|
|
|
|
assert(in->filelock.is_stable());
|
|
|
|
int issued = in->get_caps_issued();
|
|
|
|
assert((in->get_caps_wanted() & CAP_FILE_WR) == 0);
|
|
|
|
if (in->filelock.get_state() == LOCK_LOCK) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// soft data
|
|
bufferlist softdata;
|
|
in->encode_file_state(softdata);
|
|
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
m->set_data(softdata);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_SYNC);
|
|
|
|
// reissue caps
|
|
issue_caps(in);
|
|
return true;
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_MIXED) {
|
|
// writers?
|
|
if (issued & CAP_FILE_WR) {
|
|
// gather client write caps
|
|
in->filelock.set_state(LOCK_GSYNCM);
|
|
issue_caps(in);
|
|
} else {
|
|
// no writers, go straight to sync
|
|
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_SYNC);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_LONER) {
|
|
// writers?
|
|
if (issued & CAP_FILE_WR) {
|
|
// gather client write caps
|
|
in->filelock.set_state(LOCK_GSYNCL);
|
|
issue_caps(in);
|
|
} else {
|
|
// no writers, go straight to sync
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_SYNC);
|
|
}
|
|
return false;
|
|
}
|
|
else
|
|
assert(0); // wtf.
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
void Locker::inode_file_lock(CInode *in)
|
|
{
|
|
dout(7) << "inode_file_lock " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->filelock.get_state() == LOCK_LOCK ||
|
|
in->filelock.get_state() == LOCK_GLOCKR ||
|
|
in->filelock.get_state() == LOCK_GLOCKM ||
|
|
in->filelock.get_state() == LOCK_GLOCKL)
|
|
return; // lock or locking
|
|
|
|
assert(in->filelock.is_stable());
|
|
|
|
int issued = in->get_caps_issued();
|
|
|
|
if (in->filelock.get_state() == LOCK_SYNC) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.init_gather(in->get_cached_by());
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLOCKR);
|
|
|
|
// call back caps
|
|
if (issued)
|
|
issue_caps(in);
|
|
} else {
|
|
if (issued) {
|
|
// call back caps
|
|
in->filelock.set_state(LOCK_GLOCKR);
|
|
issue_caps(in);
|
|
} else {
|
|
in->filelock.set_state(LOCK_LOCK);
|
|
}
|
|
}
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_MIXED) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.init_gather(in->get_cached_by());
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLOCKM);
|
|
|
|
// call back caps
|
|
issue_caps(in);
|
|
} else {
|
|
//assert(issued); // ??? -sage 2/19/06
|
|
if (issued) {
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLOCKM);
|
|
|
|
// call back caps
|
|
issue_caps(in);
|
|
} else {
|
|
in->filelock.set_state(LOCK_LOCK);
|
|
}
|
|
}
|
|
|
|
}
|
|
else if (in->filelock.get_state() == LOCK_LONER) {
|
|
if (issued & CAP_FILE_WR) {
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLOCKL);
|
|
|
|
// call back caps
|
|
issue_caps(in);
|
|
} else {
|
|
in->filelock.set_state(LOCK_LOCK);
|
|
}
|
|
}
|
|
else
|
|
assert(0); // wtf.
|
|
}
|
|
|
|
|
|
void Locker::inode_file_mixed(CInode *in)
|
|
{
|
|
dout(7) << "inode_file_mixed " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->filelock.get_state() == LOCK_GMIXEDR ||
|
|
in->filelock.get_state() == LOCK_GMIXEDL)
|
|
return; // mixed or mixing
|
|
|
|
assert(in->filelock.is_stable());
|
|
|
|
int issued = in->get_caps_issued();
|
|
|
|
if (in->filelock.get_state() == LOCK_SYNC) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_MIXED, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.init_gather(in->get_cached_by());
|
|
|
|
in->filelock.set_state(LOCK_GMIXEDR);
|
|
issue_caps(in);
|
|
} else {
|
|
if (issued) {
|
|
in->filelock.set_state(LOCK_GMIXEDR);
|
|
issue_caps(in);
|
|
} else {
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
}
|
|
}
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_LOCK) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// data
|
|
bufferlist softdata;
|
|
in->encode_file_state(softdata);
|
|
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_MIXED, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
m->set_data(softdata);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
issue_caps(in);
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_LONER) {
|
|
if (issued & CAP_FILE_WRBUFFER) {
|
|
// gather up WRBUFFER caps
|
|
in->filelock.set_state(LOCK_GMIXEDL);
|
|
issue_caps(in);
|
|
}
|
|
else if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_MIXED, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
issue_caps(in);
|
|
} else {
|
|
in->filelock.set_state(LOCK_MIXED);
|
|
issue_caps(in);
|
|
}
|
|
}
|
|
|
|
else
|
|
assert(0); // wtf.
|
|
}
|
|
|
|
|
|
void Locker::inode_file_loner(CInode *in)
|
|
{
|
|
dout(7) << "inode_file_loner " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
assert(in->is_auth());
|
|
|
|
// check state
|
|
if (in->filelock.get_state() == LOCK_LONER ||
|
|
in->filelock.get_state() == LOCK_GLONERR ||
|
|
in->filelock.get_state() == LOCK_GLONERM)
|
|
return;
|
|
|
|
assert(in->filelock.is_stable());
|
|
assert((in->client_caps.size() == 1) && in->mds_caps_wanted.empty());
|
|
|
|
if (in->filelock.get_state() == LOCK_SYNC) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.init_gather(in->get_cached_by());
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLONERR);
|
|
} else {
|
|
// only one guy with file open, who gets it all, so
|
|
in->filelock.set_state(LOCK_LONER);
|
|
issue_caps(in);
|
|
}
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_LOCK) {
|
|
// change lock. ignore replicas; they don't know about LONER.
|
|
in->filelock.set_state(LOCK_LONER);
|
|
issue_caps(in);
|
|
}
|
|
|
|
else if (in->filelock.get_state() == LOCK_MIXED) {
|
|
if (in->is_cached_by_anyone()) {
|
|
// bcast to replicas
|
|
for (set<int>::iterator it = in->cached_by_begin();
|
|
it != in->cached_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
in->filelock.init_gather(in->get_cached_by());
|
|
|
|
// change lock
|
|
in->filelock.set_state(LOCK_GLONERM);
|
|
} else {
|
|
in->filelock.set_state(LOCK_LONER);
|
|
issue_caps(in);
|
|
}
|
|
}
|
|
|
|
else
|
|
assert(0);
|
|
}
|
|
|
|
// messenger
|
|
|
|
void Locker::handle_lock_inode_file(MLock *m)
|
|
{
|
|
assert(m->get_otype() == LOCK_OTYPE_IFILE);
|
|
|
|
if (mds->logger) mds->logger->inc("lif");
|
|
|
|
CInode *in = mdcache->get_inode(m->get_ino());
|
|
int from = m->get_asker();
|
|
|
|
if (LOCK_AC_FOR_AUTH(m->get_action())) {
|
|
// auth
|
|
assert(in);
|
|
assert(in->is_auth() || in->is_proxy());
|
|
dout(7) << "handle_lock_inode_file " << *in << " hardlock=" << in->hardlock << endl;
|
|
|
|
if (in->is_proxy()) {
|
|
// fw
|
|
int newauth = in->authority();
|
|
assert(newauth >= 0);
|
|
if (from == newauth) {
|
|
dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, but from new auth, dropping" << endl;
|
|
delete m;
|
|
} else {
|
|
dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, fw to " << newauth << endl;
|
|
mds->send_message_mds(m, newauth, MDS_PORT_LOCKER);
|
|
}
|
|
return;
|
|
}
|
|
} else {
|
|
// replica
|
|
if (!in) {
|
|
// drop it. don't nak.
|
|
dout(7) << "handle_lock " << m->get_ino() << ": don't have it anymore" << endl;
|
|
delete m;
|
|
return;
|
|
}
|
|
|
|
assert(!in->is_auth());
|
|
}
|
|
|
|
dout(7) << "handle_lock_inode_file a=" << m->get_action() << " from " << from << " " << *in << " filelock=" << in->filelock << endl;
|
|
|
|
CLock *lock = &in->filelock;
|
|
int issued = in->get_caps_issued();
|
|
|
|
switch (m->get_action()) {
|
|
// -- replica --
|
|
case LOCK_AC_SYNC:
|
|
assert(lock->get_state() == LOCK_LOCK ||
|
|
lock->get_state() == LOCK_MIXED);
|
|
|
|
{ // assim data
|
|
int off = 0;
|
|
in->decode_file_state(m->get_data(), off);
|
|
}
|
|
|
|
// update lock
|
|
lock->set_state(LOCK_SYNC);
|
|
|
|
// no need to reply.
|
|
|
|
// waiters
|
|
in->filelock.get_read();
|
|
in->finish_waiting(CINODE_WAIT_FILER|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_read();
|
|
inode_file_eval(in);
|
|
break;
|
|
|
|
case LOCK_AC_LOCK:
|
|
assert(lock->get_state() == LOCK_SYNC ||
|
|
lock->get_state() == LOCK_MIXED);
|
|
|
|
// call back caps?
|
|
if (issued & CAP_FILE_RD) {
|
|
dout(7) << "handle_lock_inode_file client readers, gathering caps on " << *in << endl;
|
|
issue_caps(in);
|
|
}
|
|
if (lock->get_nread() > 0) {
|
|
dout(7) << "handle_lock_inode_file readers, waiting before ack on " << *in << endl;
|
|
in->add_waiter(CINODE_WAIT_FILENORD,
|
|
new C_MDS_RetryMessage(mds,m));
|
|
lock->set_state(LOCK_GLOCKR);
|
|
assert(0);// i am broken.. why retry message when state captures all the info i need?
|
|
return;
|
|
}
|
|
if (issued & CAP_FILE_RD) {
|
|
lock->set_state(LOCK_GLOCKR);
|
|
break;
|
|
}
|
|
|
|
// nothing to wait for, lock and ack.
|
|
{
|
|
lock->set_state(LOCK_LOCK);
|
|
|
|
MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
|
|
}
|
|
break;
|
|
|
|
case LOCK_AC_MIXED:
|
|
assert(lock->get_state() == LOCK_SYNC ||
|
|
lock->get_state() == LOCK_LOCK);
|
|
|
|
if (lock->get_state() == LOCK_SYNC) {
|
|
// MIXED
|
|
if (issued & CAP_FILE_RD) {
|
|
// call back client caps
|
|
lock->set_state(LOCK_GMIXEDR);
|
|
issue_caps(in);
|
|
break;
|
|
} else {
|
|
// no clients, go straight to mixed
|
|
lock->set_state(LOCK_MIXED);
|
|
|
|
// ack
|
|
MLock *reply = new MLock(LOCK_AC_MIXEDACK, mds->get_nodeid());
|
|
reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
|
|
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
|
|
}
|
|
} else {
|
|
// LOCK
|
|
lock->set_state(LOCK_MIXED);
|
|
|
|
// no ack needed.
|
|
}
|
|
|
|
issue_caps(in);
|
|
|
|
// waiters
|
|
in->filelock.get_write();
|
|
in->finish_waiting(CINODE_WAIT_FILEW|CINODE_WAIT_FILESTABLE);
|
|
in->filelock.put_write();
|
|
inode_file_eval(in);
|
|
break;
|
|
|
|
|
|
|
|
|
|
// -- auth --
|
|
case LOCK_AC_LOCKACK:
|
|
assert(lock->state == LOCK_GLOCKR ||
|
|
lock->state == LOCK_GLOCKM ||
|
|
lock->state == LOCK_GLONERM ||
|
|
lock->state == LOCK_GLONERR);
|
|
assert(lock->gather_set.count(from));
|
|
lock->gather_set.erase(from);
|
|
|
|
if (lock->gather_set.size()) {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
|
|
inode_file_eval(in);
|
|
}
|
|
break;
|
|
|
|
case LOCK_AC_SYNCACK:
|
|
assert(lock->state == LOCK_GSYNCM);
|
|
assert(lock->gather_set.count(from));
|
|
lock->gather_set.erase(from);
|
|
|
|
/* not used currently
|
|
{
|
|
// merge data (keep largest size, mtime, etc.)
|
|
int off = 0;
|
|
in->decode_merge_file_state(m->get_data(), off);
|
|
}
|
|
*/
|
|
|
|
if (lock->gather_set.size()) {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
|
|
inode_file_eval(in);
|
|
}
|
|
break;
|
|
|
|
case LOCK_AC_MIXEDACK:
|
|
assert(lock->state == LOCK_GMIXEDR);
|
|
assert(lock->gather_set.count(from));
|
|
lock->gather_set.erase(from);
|
|
|
|
if (lock->gather_set.size()) {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
|
|
inode_file_eval(in);
|
|
}
|
|
break;
|
|
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
delete m;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void Locker::handle_lock_dir(MLock *m)
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
// DENTRY
|
|
|
|
bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref)
|
|
{
|
|
dout(7) << "dentry_xlock_start on " << *dn << endl;
|
|
|
|
// locked?
|
|
if (dn->lockstate == DN_LOCK_XLOCK) {
|
|
if (dn->xlockedby == m) return true; // locked by me!
|
|
|
|
// not by me, wait
|
|
dout(7) << "dentry " << *dn << " xlock by someone else" << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNREAD, dn->name,
|
|
new C_MDS_RetryRequest(mds,m,ref));
|
|
return false;
|
|
}
|
|
|
|
// prelock?
|
|
if (dn->lockstate == DN_LOCK_PREXLOCK) {
|
|
if (dn->xlockedby == m) {
|
|
dout(7) << "dentry " << *dn << " prexlock by me" << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNLOCK, dn->name,
|
|
new C_MDS_RetryRequest(mds,m,ref));
|
|
} else {
|
|
dout(7) << "dentry " << *dn << " prexlock by someone else" << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNREAD, dn->name,
|
|
new C_MDS_RetryRequest(mds,m,ref));
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
// lockable!
|
|
assert(dn->lockstate == DN_LOCK_SYNC ||
|
|
dn->lockstate == DN_LOCK_UNPINNING);
|
|
|
|
// dir auth pinnable?
|
|
if (!dn->dir->can_auth_pin()) {
|
|
dout(7) << "dentry " << *dn << " dir not pinnable, waiting" << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_AUTHPINNABLE,
|
|
new C_MDS_RetryRequest(mds,m,ref));
|
|
return false;
|
|
}
|
|
|
|
// is dentry path pinned?
|
|
if (dn->is_pinned()) {
|
|
dout(7) << "dentry " << *dn << " pinned, waiting" << endl;
|
|
dn->lockstate = DN_LOCK_UNPINNING;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNUNPINNED,
|
|
dn->name,
|
|
new C_MDS_RetryRequest(mds,m,ref));
|
|
return false;
|
|
}
|
|
|
|
// pin path up to dentry! (if success, point of no return)
|
|
CDentry *pdn = dn->dir->inode->get_parent_dn();
|
|
if (pdn) {
|
|
if (mdcache->active_requests[m].traces.count(pdn)) {
|
|
dout(7) << "already path pinned parent dentry " << *pdn << endl;
|
|
} else {
|
|
dout(7) << "pinning parent dentry " << *pdn << endl;
|
|
vector<CDentry*> trace;
|
|
mdcache->make_trace(trace, pdn->inode);
|
|
assert(trace.size());
|
|
|
|
if (!mdcache->path_pin(trace, m, new C_MDS_RetryRequest(mds, m, ref))) return false;
|
|
|
|
mdcache->active_requests[m].traces[trace[trace.size()-1]] = trace;
|
|
}
|
|
}
|
|
|
|
// pin dir!
|
|
dn->dir->auth_pin();
|
|
|
|
// mine!
|
|
dn->xlockedby = m;
|
|
|
|
if (dn->dir->is_open_by_anyone()) {
|
|
dn->lockstate = DN_LOCK_PREXLOCK;
|
|
|
|
// xlock with whom?
|
|
set<int> who = dn->dir->get_open_by();
|
|
dn->gather_set = who;
|
|
|
|
// make path
|
|
string path;
|
|
dn->make_path(path);
|
|
dout(10) << "path is " << path << " for " << *dn << endl;
|
|
|
|
for (set<int>::iterator it = who.begin();
|
|
it != who.end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
|
|
m->set_dn(dn->dir->ino(), dn->name);
|
|
m->set_path(path);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// wait
|
|
dout(7) << "dentry_xlock_start locking, waiting for replicas " << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNLOCK, dn->name,
|
|
new C_MDS_RetryRequest(mds, m, ref));
|
|
return false;
|
|
} else {
|
|
dn->lockstate = DN_LOCK_XLOCK;
|
|
mdcache->active_requests[dn->xlockedby].xlocks.insert(dn);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
void Locker::dentry_xlock_finish(CDentry *dn, bool quiet)
|
|
{
|
|
dout(7) << "dentry_xlock_finish on " << *dn << endl;
|
|
|
|
assert(dn->xlockedby);
|
|
if (dn->xlockedby == DN_XLOCK_FOREIGN) {
|
|
dout(7) << "this was a foreign xlock" << endl;
|
|
} else {
|
|
// remove from request record
|
|
assert(mdcache->active_requests[dn->xlockedby].xlocks.count(dn) == 1);
|
|
mdcache->active_requests[dn->xlockedby].xlocks.erase(dn);
|
|
}
|
|
|
|
dn->xlockedby = 0;
|
|
dn->lockstate = DN_LOCK_SYNC;
|
|
|
|
// unpin parent dir?
|
|
// -> no? because we might have xlocked 2 things in this dir.
|
|
// instead, we let request_finish clean up the mess.
|
|
|
|
// tell replicas?
|
|
if (!quiet) {
|
|
// tell even if dn is null.
|
|
if (dn->dir->is_open_by_anyone()) {
|
|
for (set<int>::iterator it = dn->dir->open_by_begin();
|
|
it != dn->dir->open_by_end();
|
|
it++) {
|
|
MLock *m = new MLock(LOCK_AC_SYNC, mds->get_nodeid());
|
|
m->set_dn(dn->dir->ino(), dn->name);
|
|
mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
|
|
}
|
|
}
|
|
}
|
|
|
|
// unpin dir
|
|
dn->dir->auth_unpin();
|
|
}
|
|
|
|
/*
|
|
* onfinish->finish() will be called with
|
|
* 0 on successful xlock,
|
|
* -1 on failure
|
|
*/
|
|
|
|
class C_MDC_XlockRequest : public Context {
|
|
Locker *mdc;
|
|
CDir *dir;
|
|
string dname;
|
|
Message *req;
|
|
Context *finisher;
|
|
public:
|
|
C_MDC_XlockRequest(Locker *mdc,
|
|
CDir *dir, string& dname,
|
|
Message *req,
|
|
Context *finisher) {
|
|
this->mdc = mdc;
|
|
this->dir = dir;
|
|
this->dname = dname;
|
|
this->req = req;
|
|
this->finisher = finisher;
|
|
}
|
|
|
|
void finish(int r) {
|
|
mdc->dentry_xlock_request_finish(r, dir, dname, req, finisher);
|
|
}
|
|
};
|
|
|
|
void Locker::dentry_xlock_request_finish(int r,
|
|
CDir *dir, string& dname,
|
|
Message *req,
|
|
Context *finisher)
|
|
{
|
|
dout(10) << "dentry_xlock_request_finish r = " << r << endl;
|
|
if (r == 1) { // 1 for xlock request success
|
|
CDentry *dn = dir->lookup(dname);
|
|
if (dn && dn->xlockedby == 0) {
|
|
// success
|
|
dn->xlockedby = req; // our request was the winner
|
|
dout(10) << "xlock request success, now xlocked by req " << req << " dn " << *dn << endl;
|
|
|
|
// remember!
|
|
mdcache->active_requests[req].foreign_xlocks.insert(dn);
|
|
}
|
|
}
|
|
|
|
// retry request (or whatever)
|
|
finisher->finish(0);
|
|
delete finisher;
|
|
}
|
|
|
|
void Locker::dentry_xlock_request(CDir *dir, string& dname, bool create,
|
|
Message *req, Context *onfinish)
|
|
{
|
|
dout(10) << "dentry_xlock_request on dn " << dname << " create=" << create << " in " << *dir << endl;
|
|
// send request
|
|
int dauth = dir->dentry_authority(dname);
|
|
MLock *m = new MLock(create ? LOCK_AC_REQXLOCKC:LOCK_AC_REQXLOCK, mds->get_nodeid());
|
|
m->set_dn(dir->ino(), dname);
|
|
mds->send_message_mds(m, dauth, MDS_PORT_LOCKER);
|
|
|
|
// add waiter
|
|
dir->add_waiter(CDIR_WAIT_DNREQXLOCK, dname,
|
|
new C_MDC_XlockRequest(this,
|
|
dir, dname, req,
|
|
onfinish));
|
|
}
|
|
|
|
|
|
|
|
|
|
void Locker::handle_lock_dn(MLock *m)
|
|
{
|
|
assert(m->get_otype() == LOCK_OTYPE_DN);
|
|
|
|
CInode *diri = mdcache->get_inode(m->get_ino()); // may be null
|
|
CDir *dir = 0;
|
|
if (diri) dir = diri->dir; // may be null
|
|
string dname = m->get_dn();
|
|
int from = m->get_asker();
|
|
CDentry *dn = 0;
|
|
|
|
if (LOCK_AC_FOR_AUTH(m->get_action())) {
|
|
// auth
|
|
|
|
// normally we have it always
|
|
if (diri && dir) {
|
|
int dauth = dir->dentry_authority(dname);
|
|
assert(dauth == mds->get_nodeid() || dir->is_proxy() || // mine or proxy,
|
|
m->get_action() == LOCK_AC_REQXLOCKACK || // or we did a REQXLOCK and this is our ack/nak
|
|
m->get_action() == LOCK_AC_REQXLOCKNAK);
|
|
|
|
if (dir->is_proxy()) {
|
|
|
|
assert(dauth >= 0);
|
|
|
|
if (dauth == m->get_asker() &&
|
|
(m->get_action() == LOCK_AC_REQXLOCK ||
|
|
m->get_action() == LOCK_AC_REQXLOCKC)) {
|
|
dout(7) << "handle_lock_dn got reqxlock from " << dauth << " and they are auth.. dropping on floor (their import will have woken them up)" << endl;
|
|
if (mdcache->active_requests.count(m))
|
|
mdcache->request_finish(m);
|
|
else
|
|
delete m;
|
|
return;
|
|
}
|
|
|
|
dout(7) << "handle_lock_dn " << m << " " << m->get_ino() << " dname " << dname << " from " << from << ": proxy, fw to " << dauth << endl;
|
|
|
|
// forward
|
|
if (mdcache->active_requests.count(m)) {
|
|
// xlock requests are requests, use request_* functions!
|
|
assert(m->get_action() == LOCK_AC_REQXLOCK ||
|
|
m->get_action() == LOCK_AC_REQXLOCKC);
|
|
// forward as a request
|
|
mdcache->request_forward(m, dauth, MDS_PORT_LOCKER);
|
|
} else {
|
|
// not an xlock req, or it is and we just didn't register the request yet
|
|
// forward normally
|
|
mds->send_message_mds(m, dauth, MDS_PORT_LOCKER);
|
|
}
|
|
return;
|
|
}
|
|
|
|
dn = dir->lookup(dname);
|
|
}
|
|
|
|
// except with.. an xlock request?
|
|
if (!dn) {
|
|
assert(dir); // we should still have the dir, though! the requester has the dir open.
|
|
switch (m->get_action()) {
|
|
|
|
case LOCK_AC_LOCK:
|
|
dout(7) << "handle_lock_dn xlock on " << dname << ", adding (null)" << endl;
|
|
dn = dir->add_dentry(dname);
|
|
break;
|
|
|
|
case LOCK_AC_REQXLOCK:
|
|
// send nak
|
|
if (dir->state_test(CDIR_STATE_DELETED)) {
|
|
dout(7) << "handle_lock_dn reqxlock on deleted dir " << *dir << ", nak" << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_dn reqxlock on " << dname << " in " << *dir << " dne, nak" << endl;
|
|
}
|
|
{
|
|
MLock *reply = new MLock(LOCK_AC_REQXLOCKNAK, mds->get_nodeid());
|
|
reply->set_dn(dir->ino(), dname);
|
|
reply->set_path(m->get_path());
|
|
mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// finish request (if we got that far)
|
|
if (mdcache->active_requests.count(m))
|
|
mdcache->request_finish(m);
|
|
|
|
delete m;
|
|
return;
|
|
|
|
case LOCK_AC_REQXLOCKC:
|
|
dout(7) << "handle_lock_dn reqxlockc on " << dname << " in " << *dir << " dne (yet!)" << endl;
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
} else {
|
|
// replica
|
|
if (dir) dn = dir->lookup(dname);
|
|
if (!dn) {
|
|
dout(7) << "handle_lock_dn " << m << " don't have " << m->get_ino() << " dname " << dname << endl;
|
|
|
|
if (m->get_action() == LOCK_AC_REQXLOCKACK ||
|
|
m->get_action() == LOCK_AC_REQXLOCKNAK) {
|
|
dout(7) << "handle_lock_dn got reqxlockack/nak, but don't have dn " << m->get_path() << ", discovering" << endl;
|
|
//assert(0); // how can this happen? tell me now!
|
|
|
|
vector<CDentry*> trace;
|
|
filepath path = m->get_path();
|
|
int r = mdcache->path_traverse(path, trace, true,
|
|
m, new C_MDS_RetryMessage(mds,m),
|
|
MDS_TRAVERSE_DISCOVER);
|
|
assert(r>0);
|
|
return;
|
|
}
|
|
|
|
if (m->get_action() == LOCK_AC_LOCK) {
|
|
if (0) { // not anymore
|
|
dout(7) << "handle_lock_dn don't have " << m->get_path() << ", discovering" << endl;
|
|
|
|
vector<CDentry*> trace;
|
|
filepath path = m->get_path();
|
|
int r = mdcache->path_traverse(path, trace, true,
|
|
m, new C_MDS_RetryMessage(mds,m),
|
|
MDS_TRAVERSE_DISCOVER);
|
|
assert(r>0);
|
|
}
|
|
if (1) {
|
|
// NAK
|
|
MLock *reply = new MLock(LOCK_AC_LOCKNAK, mds->get_nodeid());
|
|
reply->set_dn(m->get_ino(), dname);
|
|
mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
|
|
}
|
|
} else {
|
|
dout(7) << "safely ignoring." << endl;
|
|
delete m;
|
|
}
|
|
return;
|
|
}
|
|
|
|
assert(dn);
|
|
}
|
|
|
|
if (dn) {
|
|
dout(7) << "handle_lock_dn a=" << m->get_action() << " from " << from << " " << *dn << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_dn a=" << m->get_action() << " from " << from << " " << dname << " in " << *dir << endl;
|
|
}
|
|
|
|
switch (m->get_action()) {
|
|
// -- replica --
|
|
case LOCK_AC_LOCK:
|
|
assert(dn->lockstate == DN_LOCK_SYNC ||
|
|
dn->lockstate == DN_LOCK_UNPINNING ||
|
|
dn->lockstate == DN_LOCK_XLOCK); // <-- bc the handle_lock_dn did the discover!
|
|
|
|
if (dn->is_pinned()) {
|
|
dn->lockstate = DN_LOCK_UNPINNING;
|
|
|
|
// wait
|
|
dout(7) << "dn pinned, waiting " << *dn << endl;
|
|
dn->dir->add_waiter(CDIR_WAIT_DNUNPINNED,
|
|
dn->name,
|
|
new C_MDS_RetryMessage(mds, m));
|
|
return;
|
|
} else {
|
|
dn->lockstate = DN_LOCK_XLOCK;
|
|
dn->xlockedby = 0;
|
|
|
|
// ack now
|
|
MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
|
|
reply->set_dn(diri->ino(), dname);
|
|
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
|
|
}
|
|
|
|
// wake up waiters
|
|
dir->finish_waiting(CDIR_WAIT_DNLOCK, dname); // ? will this happen on replica ?
|
|
break;
|
|
|
|
case LOCK_AC_SYNC:
|
|
assert(dn->lockstate == DN_LOCK_XLOCK);
|
|
dn->lockstate = DN_LOCK_SYNC;
|
|
dn->xlockedby = 0;
|
|
|
|
// null? hose it.
|
|
if (dn->is_null()) {
|
|
dout(7) << "hosing null (and now sync) dentry " << *dn << endl;
|
|
dir->remove_dentry(dn);
|
|
}
|
|
|
|
// wake up waiters
|
|
dir->finish_waiting(CDIR_WAIT_DNREAD, dname); // will this happen either? YES: if a rename lock backs out
|
|
break;
|
|
|
|
case LOCK_AC_REQXLOCKACK:
|
|
case LOCK_AC_REQXLOCKNAK:
|
|
{
|
|
dout(10) << "handle_lock_dn got ack/nak on a reqxlock for " << *dn << endl;
|
|
list<Context*> finished;
|
|
dir->take_waiting(CDIR_WAIT_DNREQXLOCK, m->get_dn(), finished, 1); // TAKE ONE ONLY!
|
|
finish_contexts(finished,
|
|
(m->get_action() == LOCK_AC_REQXLOCKACK) ? 1:-1);
|
|
}
|
|
break;
|
|
|
|
|
|
// -- auth --
|
|
case LOCK_AC_LOCKACK:
|
|
case LOCK_AC_LOCKNAK:
|
|
assert(dn->gather_set.count(from) == 1);
|
|
dn->gather_set.erase(from);
|
|
if (dn->gather_set.size() == 0) {
|
|
dout(7) << "handle_lock_dn finish gather, now xlock on " << *dn << endl;
|
|
dn->lockstate = DN_LOCK_XLOCK;
|
|
mdcache->active_requests[dn->xlockedby].xlocks.insert(dn);
|
|
dir->finish_waiting(CDIR_WAIT_DNLOCK, dname);
|
|
}
|
|
break;
|
|
|
|
|
|
case LOCK_AC_REQXLOCKC:
|
|
// make sure it's a _file_, if it exists.
|
|
if (dn && dn->inode && dn->inode->is_dir()) {
|
|
dout(7) << "handle_lock_dn failing, reqxlockc on dir " << *dn->inode << endl;
|
|
|
|
// nak
|
|
string path;
|
|
dn->make_path(path);
|
|
|
|
MLock *reply = new MLock(LOCK_AC_REQXLOCKNAK, mds->get_nodeid());
|
|
reply->set_dn(dir->ino(), dname);
|
|
reply->set_path(path);
|
|
mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
|
|
|
|
// done
|
|
if (mdcache->active_requests.count(m))
|
|
mdcache->request_finish(m);
|
|
else
|
|
delete m;
|
|
return;
|
|
}
|
|
|
|
case LOCK_AC_REQXLOCK:
|
|
if (dn) {
|
|
dout(7) << "handle_lock_dn reqxlock on " << *dn << endl;
|
|
} else {
|
|
dout(7) << "handle_lock_dn reqxlock on " << dname << " in " << *dir << endl;
|
|
}
|
|
|
|
|
|
// start request?
|
|
if (!mdcache->active_requests.count(m)) {
|
|
vector<CDentry*> trace;
|
|
if (!mdcache->request_start(m, dir->inode, trace))
|
|
return; // waiting for pin
|
|
}
|
|
|
|
// try to xlock!
|
|
if (!dn) {
|
|
assert(m->get_action() == LOCK_AC_REQXLOCKC);
|
|
dn = dir->add_dentry(dname);
|
|
}
|
|
|
|
if (dn->xlockedby != m) {
|
|
if (!dentry_xlock_start(dn, m, dir->inode)) {
|
|
// hose null dn if we're waiting on something
|
|
if (dn->is_clean() && dn->is_null() && dn->is_sync()) dir->remove_dentry(dn);
|
|
return; // waiting for xlock
|
|
}
|
|
} else {
|
|
// successfully xlocked! on behalf of requestor.
|
|
string path;
|
|
dn->make_path(path);
|
|
|
|
dout(7) << "handle_lock_dn reqxlock success for " << m->get_asker() << " on " << *dn << ", acking" << endl;
|
|
|
|
// ACK xlock request
|
|
MLock *reply = new MLock(LOCK_AC_REQXLOCKACK, mds->get_nodeid());
|
|
reply->set_dn(dir->ino(), dname);
|
|
reply->set_path(path);
|
|
mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
|
|
|
|
// note: keep request around in memory (to hold the xlock/pins on behalf of requester)
|
|
return;
|
|
}
|
|
break;
|
|
|
|
case LOCK_AC_UNXLOCK:
|
|
dout(7) << "handle_lock_dn unxlock on " << *dn << endl;
|
|
{
|
|
string dname = dn->name;
|
|
Message *m = dn->xlockedby;
|
|
|
|
// finish request
|
|
mdcache->request_finish(m); // this will drop the locks (and unpin paths!)
|
|
return;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
delete m;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|