mirror of
https://github.com/ceph/ceph
synced 2025-03-19 17:06:24 +00:00
merged branches/sage/cephmds2 into trunk/ceph
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1407 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
parent
3b6ca5d05b
commit
26c8a9e688
@ -12,7 +12,7 @@ CFLAGS = -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE -DDARWI
|
||||
LDINC = ar -rc
|
||||
else
|
||||
# For linux
|
||||
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -DMPICH_IGNORE_CXX_SEEK -D_REENTRANT -D_THREAD_SAFE
|
||||
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE
|
||||
LDINC = ld -i -o
|
||||
endif
|
||||
|
||||
|
@ -46,22 +46,52 @@ sage doc
|
||||
|
||||
|
||||
sage mds
|
||||
- journal+recovery
|
||||
- local rename
|
||||
- how to notify replicas...
|
||||
/ - stray purge
|
||||
- stray reintegration
|
||||
- remote link
|
||||
- impl remote inode xlock
|
||||
- ESlaveUpdate replay, resolution, etc.
|
||||
- remote unlink
|
||||
- remote rename
|
||||
- file capabilities i/o
|
||||
- dirfrag split/merge
|
||||
- client readdir for dirfrags
|
||||
- consistency points/snapshots
|
||||
- dentry versions vs dirfrags...
|
||||
- statfs?
|
||||
|
||||
- finish multistage rejoin
|
||||
- trim_on_rejoin
|
||||
|
||||
- more testing of failures + thrashing.
|
||||
- is export prep dir open deadlock properly fixed by forge_replica_dir()?
|
||||
|
||||
- locker vs node failure
|
||||
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
|
||||
- failures during recovery stages (resolve, rejoin)... make sure rejoin still works!
|
||||
- fix mds initial osdmap weirdness (which will currently screw up on standby -> almost anything)
|
||||
- incremental mdsmaps?
|
||||
- client failure
|
||||
|
||||
- dirfrag split
|
||||
- make sure we are freezing _before_ we fetch to complete the dirfrag, else
|
||||
we break commit()'s preconditions when it fetches an incomplete dir.
|
||||
|
||||
- detect and deal with client failure
|
||||
|
||||
- recovering open files
|
||||
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
|
||||
- path+cap window will require some fetching of metadata from disk before doing the rejoin
|
||||
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
|
||||
|
||||
- inode.max_size
|
||||
|
||||
- real chdir (directory "open")
|
||||
- relative metadata ops
|
||||
|
||||
|
||||
|
||||
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
|
||||
|
||||
- incremental mdsmaps?
|
||||
|
||||
- EMetablob should return 'expired' if they have higher versions (and are thus described by a newer journal entry)
|
||||
- dir version/committed/etc versus migration, log expires.
|
||||
- DOCUMENT.
|
||||
@ -77,45 +107,14 @@ sage mds
|
||||
|
||||
- test open_remote_ino
|
||||
|
||||
- scatterlock
|
||||
- unlink, link, rename need to pre_dirty and update dir inode's mtime
|
||||
- tho need to make sure that behaves when dirfrag's inode is non-auth...
|
||||
|
||||
- FIXME how to journal root and stray inode content?
|
||||
- in particular, i care about dirfragtree.. get it on rejoin?
|
||||
- and dir sizes, if i add that... also on rejoin?
|
||||
|
||||
- recovering open files
|
||||
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
|
||||
- path+cap window will require some fetching of metadata from disk before doing the rejoin
|
||||
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
|
||||
|
||||
- inode.max_size
|
||||
|
||||
- journal+recovery
|
||||
- local rename
|
||||
- how to notify replicas...
|
||||
/ - stray purge
|
||||
- stray reintegration
|
||||
- remote link
|
||||
- impl remote inode xlock
|
||||
- ESlaveUpdate replay, resolution, etc.
|
||||
- remote unlink
|
||||
- rewrite to look link _link
|
||||
- remote rename
|
||||
- file capabilities i/o
|
||||
- filelock to control directory mtime, dentry changes
|
||||
- hmm, may have to change lock ordering, and Server::rdlock_path_pin_ref()
|
||||
- dirfrag split/merge
|
||||
- client readdir for dirfrags
|
||||
- consistency points/snapshots
|
||||
- dentry versions vs dirfrags...
|
||||
- real chdir (directory "open")
|
||||
- relative metadata ops
|
||||
- statfs?
|
||||
|
||||
|
||||
- fix lock caps gather ack versus ambiguous auth
|
||||
|
||||
|
||||
|
||||
|
||||
foreign rename
|
||||
@ -215,9 +214,13 @@ rados snapshots
|
||||
|
||||
|
||||
objecter
|
||||
- transaction prepare/commit
|
||||
- read+floor_lockout
|
||||
|
||||
osd/rados
|
||||
- transaction prepare/commit
|
||||
- rollback
|
||||
- rollback logging (to fix slow prepare vs rollback race)
|
||||
- read+floor_lockout for clean STOGITH-like/fencing semantics after failover.
|
||||
- separate out replication code into a PG class, to pave way for RAID
|
||||
|
||||
|
@ -150,6 +150,7 @@ md_config_t g_conf = {
|
||||
|
||||
// --- journaler ---
|
||||
journaler_allow_split_entries: true,
|
||||
journaler_safe: false, // wait for COMMIT on journal writes
|
||||
|
||||
// --- mds ---
|
||||
mds_cache_size: MDS_CACHE_SIZE,
|
||||
@ -158,14 +159,13 @@ md_config_t g_conf = {
|
||||
mds_decay_halflife: 30,
|
||||
|
||||
mds_beacon_interval: 5.0,
|
||||
mds_beacon_grace: 100.0,
|
||||
mds_beacon_grace: 10.0,
|
||||
|
||||
mds_log: true,
|
||||
mds_log_max_len: MDS_CACHE_SIZE / 3,
|
||||
mds_log_max_trimming: 10000,
|
||||
mds_log_read_inc: 1<<20,
|
||||
mds_log_pad_entry: 128,//256,//64,
|
||||
mds_log_before_reply: true,
|
||||
mds_log_flush_on_shutdown: true,
|
||||
mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log
|
||||
mds_log_eopen_size: 100, // # open inodes per log entry
|
||||
@ -189,6 +189,7 @@ md_config_t g_conf = {
|
||||
mds_bal_midchunk: .3, // any sub bigger than this taken in full
|
||||
mds_bal_minchunk: .001, // never take anything smaller than this
|
||||
|
||||
mds_trim_on_rejoin: true,
|
||||
mds_commit_on_shutdown: true,
|
||||
mds_shutdown_check: 0, //30,
|
||||
mds_shutdown_on_last_unmount: true,
|
||||
@ -231,7 +232,7 @@ md_config_t g_conf = {
|
||||
ebofs_cloneable: false,
|
||||
ebofs_verify: false,
|
||||
ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing)
|
||||
ebofs_idle_commit_ms: 100, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
|
||||
ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
|
||||
ebofs_oc_size: 10000, // onode cache
|
||||
ebofs_cc_size: 10000, // cnode cache
|
||||
ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB
|
||||
@ -563,6 +564,9 @@ void parse_config_options(std::vector<char*>& args)
|
||||
else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0)
|
||||
g_conf.objecter_buffer_uncommitted = atoi(args[++i]);
|
||||
|
||||
else if (strcmp(args[i], "--journaler_safe") == 0)
|
||||
g_conf.journaler_safe = atoi(args[++i]);
|
||||
|
||||
else if (strcmp(args[i], "--mds_cache_size") == 0)
|
||||
g_conf.mds_cache_size = atoi(args[++i]);
|
||||
|
||||
@ -573,8 +577,6 @@ void parse_config_options(std::vector<char*>& args)
|
||||
|
||||
else if (strcmp(args[i], "--mds_log") == 0)
|
||||
g_conf.mds_log = atoi(args[++i]);
|
||||
else if (strcmp(args[i], "--mds_log_before_reply") == 0)
|
||||
g_conf.mds_log_before_reply = atoi(args[++i]);
|
||||
else if (strcmp(args[i], "--mds_log_max_len") == 0)
|
||||
g_conf.mds_log_max_len = atoi(args[++i]);
|
||||
else if (strcmp(args[i], "--mds_log_read_inc") == 0)
|
||||
|
@ -151,7 +151,8 @@ struct md_config_t {
|
||||
|
||||
// journaler
|
||||
bool journaler_allow_split_entries;
|
||||
|
||||
bool journaler_safe;
|
||||
|
||||
// mds
|
||||
int mds_cache_size;
|
||||
float mds_cache_mid;
|
||||
@ -166,7 +167,6 @@ struct md_config_t {
|
||||
int mds_log_max_trimming;
|
||||
int mds_log_read_inc;
|
||||
int mds_log_pad_entry;
|
||||
bool mds_log_before_reply;
|
||||
bool mds_log_flush_on_shutdown;
|
||||
off_t mds_log_import_map_interval;
|
||||
int mds_log_eopen_size;
|
||||
@ -190,9 +190,11 @@ struct md_config_t {
|
||||
float mds_bal_midchunk;
|
||||
float mds_bal_minchunk;
|
||||
|
||||
bool mds_trim_on_rejoin;
|
||||
bool mds_commit_on_shutdown;
|
||||
int mds_shutdown_check;
|
||||
bool mds_shutdown_on_last_unmount;
|
||||
|
||||
bool mds_verify_export_dirauth; // debug flag
|
||||
|
||||
bool mds_local_osd;
|
||||
@ -322,6 +324,8 @@ extern md_config_t g_debug_after_conf;
|
||||
#define dout(x) if ((x) <= g_conf.debug) std::cout
|
||||
#define dout2(x) if ((x) <= g_conf.debug) std::cout
|
||||
|
||||
#define pdout(x,p) if ((x) <= (p)) std::cout
|
||||
|
||||
/**
|
||||
* for cleaner output, bracket each line with
|
||||
* dbeginl (in the dout macro) and dendl (in place of endl).
|
||||
|
@ -1187,6 +1187,14 @@ void CDir::freeze_tree(Context *c)
|
||||
|
||||
void CDir::freeze_tree_finish(Context *c)
|
||||
{
|
||||
// still freezing? (we may have been canceled)
|
||||
if (!is_freezing()) {
|
||||
dout(10) << "freeze_tree_finish no longer freezing, done on " << *this << endl;
|
||||
c->finish(-1);
|
||||
delete c;
|
||||
return;
|
||||
}
|
||||
|
||||
// freezeable now?
|
||||
if (!is_freezeable()) {
|
||||
// wait again!
|
||||
@ -1242,6 +1250,7 @@ void CDir::unfreeze_tree()
|
||||
state_clear(STATE_FREEZINGTREE);
|
||||
|
||||
// cancel freeze waiters
|
||||
finish_waiting(WAIT_UNFREEZE);
|
||||
finish_waiting(WAIT_FREEZEABLE, -1);
|
||||
}
|
||||
}
|
||||
|
@ -61,17 +61,13 @@ ostream& operator<<(ostream& out, CInode& in)
|
||||
|
||||
out << " v" << in.get_version();
|
||||
|
||||
out << " auth=" << in.authlock;
|
||||
out << " link=" << in.linklock;
|
||||
out << " dft=" << in.dirfragtreelock;
|
||||
out << " file=" << in.filelock;
|
||||
out << " dir=" << in.dirlock;
|
||||
// locks
|
||||
out << " " << in.authlock;
|
||||
out << " " << in.linklock;
|
||||
out << " " << in.dirfragtreelock;
|
||||
out << " " << in.filelock;
|
||||
out << " " << in.dirlock;
|
||||
|
||||
if (in.get_num_ref()) {
|
||||
out << " |";
|
||||
in.print_pin_set(out);
|
||||
}
|
||||
|
||||
// hack: spit out crap on which clients have caps
|
||||
if (!in.get_client_caps().empty()) {
|
||||
out << " caps={";
|
||||
@ -83,6 +79,12 @@ ostream& operator<<(ostream& out, CInode& in)
|
||||
}
|
||||
out << "}";
|
||||
}
|
||||
|
||||
if (in.get_num_ref()) {
|
||||
out << " |";
|
||||
in.print_pin_set(out);
|
||||
}
|
||||
|
||||
out << " " << ∈
|
||||
out << "]";
|
||||
return out;
|
||||
|
@ -210,7 +210,7 @@ class FileLock : public SimpleLock {
|
||||
|
||||
void print(ostream& out) {
|
||||
out << "(";
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_lock_type_name(get_type()) << " ";
|
||||
out << get_filelock_state_name(get_state());
|
||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||
if (is_rdlocked())
|
||||
|
@ -60,6 +60,7 @@
|
||||
|
||||
void Locker::dispatch(Message *m)
|
||||
{
|
||||
|
||||
switch (m->get_type()) {
|
||||
|
||||
// locking
|
||||
@ -89,16 +90,20 @@ void Locker::send_lock_message(SimpleLock *lock, int msg)
|
||||
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
||||
it != lock->get_parent()->replicas_end();
|
||||
it++) {
|
||||
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
|
||||
continue;
|
||||
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
||||
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
||||
}
|
||||
}
|
||||
|
||||
void Locker::send_lock_message(SimpleLock *lock, int msg, bufferlist &data)
|
||||
void Locker::send_lock_message(SimpleLock *lock, int msg, const bufferlist &data)
|
||||
{
|
||||
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
||||
it != lock->get_parent()->replicas_end();
|
||||
it++) {
|
||||
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
|
||||
continue;
|
||||
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
||||
m->set_data(data);
|
||||
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
||||
@ -499,9 +504,11 @@ void Locker::request_inode_file_caps(CInode *in)
|
||||
assert(!in->is_auth());
|
||||
|
||||
in->replica_caps_wanted = wanted;
|
||||
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
|
||||
in->replica_caps_wanted),
|
||||
auth, MDS_PORT_LOCKER);
|
||||
|
||||
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
|
||||
in->replica_caps_wanted),
|
||||
auth, MDS_PORT_LOCKER);
|
||||
} else {
|
||||
in->replica_caps_wanted_keep_until.sec_ref() = 0;
|
||||
}
|
||||
@ -509,18 +516,23 @@ void Locker::request_inode_file_caps(CInode *in)
|
||||
|
||||
void Locker::handle_inode_file_caps(MInodeFileCaps *m)
|
||||
{
|
||||
// nobody should be talking to us during recovery.
|
||||
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
|
||||
|
||||
// ok
|
||||
CInode *in = mdcache->get_inode(m->get_ino());
|
||||
assert(in);
|
||||
assert(in->is_auth());// || in->is_proxy());
|
||||
|
||||
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
|
||||
assert(in->is_auth());
|
||||
|
||||
/*if (in->is_proxy()) {
|
||||
dout(7) << "proxy, fw" << endl;
|
||||
mds->send_message_mds(m, in->authority().first, MDS_PORT_LOCKER);
|
||||
if (mds->is_rejoin() &&
|
||||
in->is_rejoining()) {
|
||||
dout(7) << "handle_inode_file_caps still rejoining " << *in << ", dropping " << *m << endl;
|
||||
delete m;
|
||||
return;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
|
||||
|
||||
if (m->get_caps())
|
||||
in->mds_caps_wanted[m->get_from()] = m->get_caps();
|
||||
@ -703,6 +715,9 @@ ALSO:
|
||||
|
||||
void Locker::handle_lock(MLock *m)
|
||||
{
|
||||
// nobody should be talking to us during recovery.
|
||||
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
|
||||
|
||||
switch (m->get_otype()) {
|
||||
case LOCK_OTYPE_DN:
|
||||
{
|
||||
@ -778,6 +793,15 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
{
|
||||
int from = m->get_asker();
|
||||
|
||||
if (mds->is_rejoin()) {
|
||||
if (lock->get_parent()->is_rejoining()) {
|
||||
dout(7) << "handle_simple_lock still rejoining " << *lock->get_parent()
|
||||
<< ", dropping " << *m << endl;
|
||||
delete m;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
switch (m->get_action()) {
|
||||
// -- replica --
|
||||
case LOCK_AC_SYNC:
|
||||
@ -796,15 +820,12 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->set_state(LOCK_GLOCKR);
|
||||
lock->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
||||
return;
|
||||
} else {
|
||||
// update lock and reply
|
||||
lock->set_state(LOCK_LOCK);
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
||||
from, MDS_PORT_LOCKER);
|
||||
}
|
||||
|
||||
// update lock and reply
|
||||
lock->set_state(LOCK_LOCK);
|
||||
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
||||
from, MDS_PORT_LOCKER);
|
||||
break;
|
||||
|
||||
|
||||
@ -815,6 +836,7 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
MDRequest *mdr = mdcache->request_get(m->get_reqid());
|
||||
mdr->xlocks.insert(lock);
|
||||
mdr->locks.insert(lock);
|
||||
lock->set_state(LOCK_REMOTEXLOCK);
|
||||
lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
|
||||
}
|
||||
break;
|
||||
@ -879,38 +901,66 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
||||
}
|
||||
|
||||
|
||||
class C_Locker_SimpleEval : public Context {
|
||||
Locker *locker;
|
||||
SimpleLock *lock;
|
||||
public:
|
||||
C_Locker_SimpleEval(Locker *l, SimpleLock *lk) : locker(l), lock(lk) {}
|
||||
void finish(int r) {
|
||||
locker->simple_eval(lock);
|
||||
}
|
||||
};
|
||||
|
||||
void Locker::simple_eval(SimpleLock *lock)
|
||||
{
|
||||
// finished gather?
|
||||
if (lock->get_parent()->is_auth() &&
|
||||
!lock->is_stable() &&
|
||||
!lock->is_gathering()) {
|
||||
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
switch (lock->get_state()) {
|
||||
case LOCK_GLOCKR:
|
||||
lock->set_state(LOCK_LOCK);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
// unstable and ambiguous auth?
|
||||
if (!lock->is_stable() &&
|
||||
lock->get_parent()->is_ambiguous_auth()) {
|
||||
dout(7) << "simple_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
|
||||
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_SimpleEval(this, lock));
|
||||
return;
|
||||
}
|
||||
if (!lock->is_stable()) return;
|
||||
|
||||
if (lock->get_parent()->is_auth()) {
|
||||
|
||||
// sync?
|
||||
if (lock->get_state() != LOCK_SYNC &&
|
||||
lock->get_parent()->is_replicated() &&
|
||||
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
||||
dout(7) << "simple_eval stable, syncing " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
simple_sync(lock);
|
||||
}
|
||||
|
||||
} else {
|
||||
// replica
|
||||
// finished remote xlock?
|
||||
if (lock->get_state() == LOCK_REMOTEXLOCK &&
|
||||
!lock->is_xlocked()) {
|
||||
// tell auth
|
||||
assert(!lock->get_parent()->is_auth()); // should be auth_pinned on the auth
|
||||
dout(7) << "simple_eval releasing remote xlock on " << *lock->get_parent() << endl;
|
||||
int auth = lock->get_parent()->authority().first;
|
||||
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
|
||||
auth, MDS_PORT_LOCKER);
|
||||
lock->set_state(LOCK_LOCK);
|
||||
}
|
||||
|
||||
// finished gathering?
|
||||
if (lock->get_state() == LOCK_GLOCKR &&
|
||||
!lock->is_gathering() &&
|
||||
!lock->is_rdlocked()) {
|
||||
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
// replica: tell auth
|
||||
if (!lock->get_parent()->is_auth()) {
|
||||
int auth = lock->get_parent()->authority().first;
|
||||
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
||||
}
|
||||
|
||||
lock->set_state(LOCK_LOCK);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR);
|
||||
}
|
||||
|
||||
// stable -> sync?
|
||||
if (lock->get_parent()->is_auth() &&
|
||||
lock->is_stable() &&
|
||||
lock->get_state() != LOCK_SYNC &&
|
||||
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
||||
dout(7) << "simple_eval stable, syncing " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
simple_sync(lock);
|
||||
}
|
||||
|
||||
}
|
||||
@ -929,13 +979,16 @@ void Locker::simple_sync(SimpleLock *lock)
|
||||
if (lock->get_state() == LOCK_GLOCKR)
|
||||
assert(0); // um... hmm!
|
||||
assert(lock->get_state() == LOCK_LOCK);
|
||||
|
||||
// hard data
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
|
||||
// bcast to replicas
|
||||
send_lock_message(lock, LOCK_AC_SYNC, data);
|
||||
|
||||
// sync.
|
||||
if (lock->get_parent()->is_replicated()) {
|
||||
// hard data
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
|
||||
// bcast to replicas
|
||||
send_lock_message(lock, LOCK_AC_SYNC, data);
|
||||
}
|
||||
|
||||
// change lock
|
||||
lock->set_state(LOCK_SYNC);
|
||||
@ -1015,11 +1068,9 @@ void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
|
||||
dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
if (lock->get_state() == LOCK_GLOCKR &&
|
||||
!lock->is_rdlocked()) {
|
||||
lock->set_state(LOCK_SYNC); // return state to sync, in case the unpinner flails
|
||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
||||
}
|
||||
// last one?
|
||||
if (!lock->is_rdlocked())
|
||||
simple_eval(lock);
|
||||
}
|
||||
|
||||
bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
@ -1066,6 +1117,7 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
new C_MDS_RetryRequest(mdcache, mdr));
|
||||
return false;
|
||||
}
|
||||
int auth = lock->get_parent()->authority().first;
|
||||
|
||||
// wait for sync.
|
||||
// (???????????)
|
||||
@ -1075,7 +1127,6 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||
}
|
||||
|
||||
// send lock request
|
||||
int auth = lock->get_parent()->authority().first;
|
||||
MLock *m = new MLock(lock, LOCK_AC_REQXLOCK, mds->get_nodeid());
|
||||
mds->send_message_mds(m, auth, MDS_PORT_LOCKER);
|
||||
|
||||
@ -1091,29 +1142,16 @@ void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr)
|
||||
// drop ref
|
||||
assert(lock->can_xlock(mdr));
|
||||
lock->put_xlock();
|
||||
assert(mdr);
|
||||
mdr->xlocks.erase(lock);
|
||||
mdr->locks.erase(lock);
|
||||
dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
// slave?
|
||||
if (!lock->get_parent()->is_auth()) {
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
|
||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
||||
}
|
||||
|
||||
// others waiting?
|
||||
if (lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
||||
// wake 'em up
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
||||
} else {
|
||||
// auto-sync if alone.
|
||||
if (lock->get_parent()->is_auth() &&
|
||||
!lock->get_parent()->is_replicated() &&
|
||||
lock->get_state() != LOCK_SYNC)
|
||||
lock->set_state(LOCK_SYNC);
|
||||
|
||||
simple_eval(lock);
|
||||
}
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
||||
|
||||
// eval
|
||||
simple_eval(lock);
|
||||
}
|
||||
|
||||
|
||||
@ -1261,19 +1299,43 @@ void Locker::scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr)
|
||||
scatter_eval(lock);
|
||||
}
|
||||
|
||||
|
||||
class C_Locker_ScatterEval : public Context {
|
||||
Locker *locker;
|
||||
ScatterLock *lock;
|
||||
public:
|
||||
C_Locker_ScatterEval(Locker *l, ScatterLock *lk) : locker(l), lock(lk) {}
|
||||
void finish(int r) {
|
||||
locker->scatter_eval(lock);
|
||||
}
|
||||
};
|
||||
|
||||
void Locker::scatter_eval(ScatterLock *lock)
|
||||
{
|
||||
// unstable and ambiguous auth?
|
||||
if (!lock->is_stable() &&
|
||||
lock->get_parent()->is_ambiguous_auth()) {
|
||||
dout(7) << "scatter_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
|
||||
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_ScatterEval(this, lock));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lock->get_parent()->is_auth()) {
|
||||
// REPLICA
|
||||
|
||||
if (lock->get_state() == LOCK_GSYNCS &&
|
||||
!lock->is_wrlocked()) {
|
||||
dout(10) << "scatter_eval no wrlocks, acking sync" << endl;
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
||||
int auth = lock->get_parent()->authority().first;
|
||||
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) {
|
||||
bufferlist data;
|
||||
lock->encode_locked_state(data);
|
||||
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
||||
auth, MDS_PORT_LOCKER);
|
||||
}
|
||||
lock->set_state(LOCK_SYNC);
|
||||
lock->finish_waiters(ScatterLock::WAIT_STABLE); // ?
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -1286,7 +1348,7 @@ void Locker::scatter_eval(ScatterLock *lock)
|
||||
dout(7) << "scatter_eval finished gather/un-wrlock on " << *lock
|
||||
<< " on " << *lock->get_parent() << endl;
|
||||
lock->set_state(LOCK_SYNC);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_RD|SimpleLock::WAIT_NOLOCKS);
|
||||
lock->finish_waiters(ScatterLock::WAIT_STABLE|ScatterLock::WAIT_RD);
|
||||
}
|
||||
|
||||
// gscatters -> scatter?
|
||||
@ -1301,13 +1363,15 @@ void Locker::scatter_eval(ScatterLock *lock)
|
||||
}
|
||||
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
lock->get_wrlock();
|
||||
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||
lock->put_wrlock();
|
||||
}
|
||||
|
||||
// waiting for rd?
|
||||
if (lock->get_state() == LOCK_SCATTER &&
|
||||
!lock->is_wrlocked() &&
|
||||
lock->is_waiter_for(SimpleLock::WAIT_RD)) {
|
||||
lock->is_waiter_for(ScatterLock::WAIT_RD)) {
|
||||
dout(10) << "scatter_eval no wrlocks, read waiter, syncing" << endl;
|
||||
scatter_sync(lock);
|
||||
}
|
||||
@ -1339,9 +1403,10 @@ void Locker::scatter_sync(ScatterLock *lock)
|
||||
}
|
||||
else if (lock->is_wrlocked()) {
|
||||
lock->set_state(LOCK_GSYNCS);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
lock->set_state(LOCK_SYNC);
|
||||
lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(ScatterLock::WAIT_RD|ScatterLock::WAIT_STABLE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1365,7 +1430,7 @@ void Locker::scatter_scatter(ScatterLock *lock)
|
||||
send_lock_message(lock, LOCK_AC_SCATTER, data);
|
||||
}
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1375,6 +1440,15 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
||||
{
|
||||
int from = m->get_asker();
|
||||
|
||||
if (mds->is_rejoin()) {
|
||||
if (lock->get_parent()->is_rejoining()) {
|
||||
dout(7) << "handle_scatter_lock still rejoining " << *lock->get_parent()
|
||||
<< ", dropping " << *m << endl;
|
||||
delete m;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
switch (m->get_action()) {
|
||||
// -- replica --
|
||||
case LOCK_AC_SYNC:
|
||||
@ -1398,7 +1472,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
||||
assert(lock->get_state() == LOCK_SYNC);
|
||||
lock->decode_locked_state(m->get_data());
|
||||
lock->set_state(LOCK_SCATTER);
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||
break;
|
||||
|
||||
// -- for auth --
|
||||
@ -1416,7 +1490,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
||||
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
|
||||
<< " from " << from << ", last one"
|
||||
<< endl;
|
||||
simple_eval(lock);
|
||||
scatter_eval(lock);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1508,10 +1582,8 @@ void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr)
|
||||
|
||||
dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
if (!lock->is_rdlocked()) {
|
||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
||||
if (!lock->is_rdlocked())
|
||||
file_eval(lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1568,10 +1640,13 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
|
||||
dout(7) << "file_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||
|
||||
assert(lock->get_parent()->is_auth()); // or implement remote xlocks
|
||||
|
||||
// drop lock?
|
||||
if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
|
||||
file_eval(lock);
|
||||
|
||||
// others waiting?
|
||||
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
||||
|
||||
//// drop lock?
|
||||
//if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
|
||||
file_eval(lock);
|
||||
}
|
||||
|
||||
|
||||
@ -1582,11 +1657,31 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
|
||||
* - checks if we're in unstable sfot state and can now move on to next state
|
||||
* - checks if soft state should change (eg bc last writer closed)
|
||||
*/
|
||||
class C_Locker_FileEval : public Context {
|
||||
Locker *locker;
|
||||
FileLock *lock;
|
||||
public:
|
||||
C_Locker_FileEval(Locker *l, FileLock *lk) : locker(l), lock(lk) {}
|
||||
void finish(int r) {
|
||||
locker->file_eval(lock);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void Locker::file_eval(FileLock *lock)
|
||||
{
|
||||
CInode *in = (CInode*)lock->get_parent();
|
||||
|
||||
// unstable and ambiguous auth?
|
||||
if (!lock->is_stable() &&
|
||||
in->is_ambiguous_auth()) {
|
||||
dout(7) << "file_eval not stable and ambiguous auth, waiting on " << *in << endl;
|
||||
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||
in->add_waiter(CInode::WAIT_SINGLEAUTH, new C_Locker_FileEval(this, lock));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
int issued = in->get_caps_issued();
|
||||
|
||||
// [auth] finished gather?
|
||||
@ -1605,7 +1700,7 @@ void Locker::file_eval(FileLock *lock)
|
||||
|
||||
// waiters
|
||||
lock->get_rdlock();
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
||||
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR|SimpleLock::WAIT_RD);
|
||||
lock->put_rdlock();
|
||||
}
|
||||
break;
|
||||
@ -1789,19 +1884,15 @@ bool Locker::file_sync(FileLock *lock)
|
||||
|
||||
if (lock->get_state() == LOCK_LOCK) {
|
||||
if (in->is_replicated()) {
|
||||
// soft data
|
||||
bufferlist softdata;
|
||||
lock->encode_locked_state(softdata);
|
||||
|
||||
// bcast to replicas
|
||||
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||
}
|
||||
|
||||
|
||||
// change lock
|
||||
lock->set_state(LOCK_SYNC);
|
||||
|
||||
// reissue caps
|
||||
issue_caps(in);
|
||||
issue_caps(in); // reissue caps
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1813,10 +1904,10 @@ bool Locker::file_sync(FileLock *lock)
|
||||
issue_caps(in);
|
||||
} else {
|
||||
// no writers, go straight to sync
|
||||
|
||||
if (in->is_replicated()) {
|
||||
// bcast to replicas
|
||||
send_lock_message(lock, LOCK_AC_SYNC);
|
||||
bufferlist softdata;
|
||||
lock->encode_locked_state(softdata);
|
||||
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||
}
|
||||
|
||||
// change lock
|
||||
@ -1834,8 +1925,9 @@ bool Locker::file_sync(FileLock *lock)
|
||||
} else {
|
||||
// no writers, go straight to sync
|
||||
if (in->is_replicated()) {
|
||||
// bcast to replicas
|
||||
send_lock_message(lock, LOCK_AC_SYNC);
|
||||
bufferlist softdata;
|
||||
lock->encode_locked_state(softdata);
|
||||
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||
}
|
||||
|
||||
// change lock
|
||||
@ -2070,6 +2162,16 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
|
||||
CInode *in = (CInode*)lock->get_parent();
|
||||
int from = m->get_asker();
|
||||
|
||||
if (mds->is_rejoin()) {
|
||||
if (in->is_rejoining()) {
|
||||
dout(7) << "handle_file_lock still rejoining " << *in
|
||||
<< ", dropping " << *m << endl;
|
||||
delete m;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
|
||||
<< *in << " filelock=" << *lock << endl;
|
||||
|
||||
@ -2104,16 +2206,15 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
|
||||
}
|
||||
if (lock->is_rdlocked()) {
|
||||
dout(7) << "handle_file_lock rdlocked, waiting before ack on " << *in << endl;
|
||||
in->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
||||
lock->set_state(LOCK_GLOCKR);
|
||||
assert(0);// i am broken.. why retry message when state captures all the info i need?
|
||||
return;
|
||||
break;
|
||||
}
|
||||
if (issued & CAP_FILE_RD) {
|
||||
dout(7) << "handle_file_lock RD cap issued, waiting before ack on " << *in << endl;
|
||||
lock->set_state(LOCK_GLOCKR);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// nothing to wait for, lock and ack.
|
||||
{
|
||||
lock->set_state(LOCK_LOCK);
|
||||
|
@ -60,7 +60,7 @@ private:
|
||||
void handle_lock(MLock *m);
|
||||
|
||||
void send_lock_message(SimpleLock *lock, int msg);
|
||||
void send_lock_message(SimpleLock *lock, int msg, bufferlist &data);
|
||||
void send_lock_message(SimpleLock *lock, int msg, const bufferlist &data);
|
||||
|
||||
// -- locks --
|
||||
bool acquire_locks(MDRequest *mdr,
|
||||
|
@ -706,13 +706,22 @@ void MDBalancer::find_exports(CDir *dir,
|
||||
void MDBalancer::hit_inode(CInode *in, int type)
|
||||
{
|
||||
// hit me
|
||||
in->popularity[MDS_POP_JUSTME].pop[type].hit();
|
||||
in->popularity[MDS_POP_NESTED].pop[type].hit();
|
||||
float me = in->popularity[MDS_POP_JUSTME].pop[type].hit();
|
||||
float nested = in->popularity[MDS_POP_NESTED].pop[type].hit();
|
||||
float curdom = 0;
|
||||
float anydom = 0;
|
||||
if (in->is_auth()) {
|
||||
in->popularity[MDS_POP_CURDOM].pop[type].hit();
|
||||
in->popularity[MDS_POP_ANYDOM].pop[type].hit();
|
||||
curdom = in->popularity[MDS_POP_CURDOM].pop[type].hit();
|
||||
anydom = in->popularity[MDS_POP_ANYDOM].pop[type].hit();
|
||||
}
|
||||
|
||||
|
||||
dout(20) << "hit_inode " << type << " pop " << me << " me, "
|
||||
<< nested << " nested, "
|
||||
<< curdom << " curdom, "
|
||||
<< anydom << " anydom"
|
||||
<< " on " << *in
|
||||
<< endl;
|
||||
|
||||
// hit auth up to import
|
||||
CDir *dir = in->get_parent_dir();
|
||||
if (dir) hit_dir(dir, type);
|
||||
@ -728,7 +737,8 @@ void MDBalancer::hit_dir(CDir *dir, int type)
|
||||
if (g_conf.num_mds > 2 && // FIXME >2 thing
|
||||
!dir->inode->is_root() && // not root (for now at least)
|
||||
dir->is_auth()) {
|
||||
//dout(-20) << "hit_dir " << type << " pop is " << v << " " << *dir << endl;
|
||||
dout(20) << "hit_dir " << type << " pop " << v << " me "
|
||||
<< *dir << endl;
|
||||
|
||||
// hash this dir? (later?)
|
||||
if (((v > g_conf.mds_bal_hash_rd && type == META_POP_IRD) ||
|
||||
@ -756,6 +766,8 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
||||
// replicate?
|
||||
float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
|
||||
|
||||
dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
|
||||
|
||||
if (dir->is_auth()) {
|
||||
if (!dir->is_rep() &&
|
||||
dir_pop >= g_conf.mds_bal_replicate_threshold) {
|
||||
@ -764,7 +776,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
||||
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
|
||||
rd_adj /= 2.0; // temper somewhat
|
||||
|
||||
dout(1) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
|
||||
dout(2) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
|
||||
|
||||
dir->dir_rep = CDir::REP_ALL;
|
||||
mds->mdcache->send_dir_updates(dir, true);
|
||||
@ -777,7 +789,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
||||
dir->is_rep() &&
|
||||
dir_pop < g_conf.mds_bal_unreplicate_threshold) {
|
||||
// unreplicate
|
||||
dout(1) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
|
||||
dout(2) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
|
||||
|
||||
dir->dir_rep = CDir::REP_NONE;
|
||||
mds->mdcache->send_dir_updates(dir);
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "events/ESlaveUpdate.h"
|
||||
#include "events/EString.h"
|
||||
#include "events/EPurgeFinish.h"
|
||||
#include "events/EImportFinish.h"
|
||||
|
||||
#include "messages/MGenericMessage.h"
|
||||
|
||||
@ -1200,9 +1201,11 @@ void MDCache::disambiguate_imports()
|
||||
if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
|
||||
dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
|
||||
cancel_ambiguous_import(q->first);
|
||||
mds->mdlog->submit_entry(new EImportFinish(dir, false));
|
||||
} else {
|
||||
dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
|
||||
finish_ambiguous_import(q->first);
|
||||
mds->mdlog->submit_entry(new EImportFinish(dir, true));
|
||||
}
|
||||
}
|
||||
assert(my_ambiguous_imports.empty());
|
||||
@ -1262,50 +1265,71 @@ void MDCache::finish_ambiguous_import(dirfrag_t df)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/** recalc_auth_bits()
|
||||
* once subtree auth is disambiguated, we need to adjust all the
|
||||
* auth (and dirty) bits in our cache before moving on.
|
||||
* auth and dirty bits in our cache before moving on.
|
||||
*/
|
||||
void MDCache::recalc_auth_bits()
|
||||
{
|
||||
dout(7) << "recalc_auth_bits" << endl;
|
||||
|
||||
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
|
||||
p != inode_map.end();
|
||||
for (map<CDir*,set<CDir*> >::iterator p = subtrees.begin();
|
||||
p != subtrees.end();
|
||||
++p) {
|
||||
CInode *in = p->second;
|
||||
if (in->authority().first == mds->get_nodeid())
|
||||
in->state_set(CInode::STATE_AUTH);
|
||||
else {
|
||||
in->state_clear(CInode::STATE_AUTH);
|
||||
if (in->is_dirty())
|
||||
in->mark_clean();
|
||||
}
|
||||
list<CDir*> dfq; // dirfrag queue
|
||||
dfq.push_back(p->first);
|
||||
|
||||
if (in->parent) {
|
||||
if (in->parent->authority().first == mds->get_nodeid())
|
||||
in->parent->state_set(CDentry::STATE_AUTH);
|
||||
else {
|
||||
in->parent->state_clear(CDentry::STATE_AUTH);
|
||||
if (in->parent->is_dirty())
|
||||
in->parent->mark_clean();
|
||||
}
|
||||
}
|
||||
bool auth = p->first->authority().first == mds->get_nodeid();
|
||||
dout(10) << " subtree auth=" << auth << " for " << *p->first << endl;
|
||||
|
||||
list<CDir*> ls;
|
||||
for (list<CDir*>::iterator p = ls.begin();
|
||||
p != ls.end();
|
||||
++p) {
|
||||
CDir *dir = *p;
|
||||
if (dir->authority().first == mds->get_nodeid())
|
||||
while (!dfq.empty()) {
|
||||
CDir *dir = dfq.front();
|
||||
dfq.pop_front();
|
||||
|
||||
// dir
|
||||
if (auth)
|
||||
dir->state_set(CDir::STATE_AUTH);
|
||||
else {
|
||||
dir->state_set(CDir::STATE_REJOINING);
|
||||
dir->state_clear(CDir::STATE_AUTH);
|
||||
if (dir->is_dirty())
|
||||
dir->mark_clean();
|
||||
}
|
||||
|
||||
// dentries in this dir
|
||||
for (map<string,CDentry*>::iterator q = dir->items.begin();
|
||||
q != dir->items.end();
|
||||
++q) {
|
||||
// dn
|
||||
CDentry *dn = q->second;
|
||||
if (auth)
|
||||
dn->state_set(CDentry::STATE_AUTH);
|
||||
else {
|
||||
dn->state_set(CDentry::STATE_REJOINING);
|
||||
dn->state_clear(CDentry::STATE_AUTH);
|
||||
if (dn->is_dirty())
|
||||
dn->mark_clean();
|
||||
}
|
||||
|
||||
if (dn->is_primary()) {
|
||||
// inode
|
||||
if (auth)
|
||||
dn->inode->state_set(CInode::STATE_AUTH);
|
||||
else {
|
||||
dn->inode->state_set(CInode::STATE_REJOINING);
|
||||
dn->inode->state_clear(CInode::STATE_AUTH);
|
||||
if (dn->inode->is_dirty())
|
||||
dn->inode->mark_clean();
|
||||
}
|
||||
|
||||
// recurse?
|
||||
if (dn->inode->is_dir())
|
||||
dn->inode->get_nested_dirfrags(dfq);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
show_subtrees();
|
||||
show_cache();
|
||||
}
|
||||
@ -1410,7 +1434,8 @@ void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
|
||||
in->authlock.get_state(),
|
||||
in->linklock.get_state(),
|
||||
in->dirfragtreelock.get_state(),
|
||||
in->filelock.get_state());
|
||||
in->filelock.get_state(),
|
||||
in->dirlock.get_state());
|
||||
if (in->authlock.is_xlocked())
|
||||
rejoin->add_inode_xlock(in->ino(), in->authlock.get_type(),
|
||||
in->authlock.get_xlocked_by()->reqid);
|
||||
@ -1489,7 +1514,7 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
||||
if (mds->is_active() || mds->is_stopping()) {
|
||||
dout(10) << "i am active. removing stale cache replicas" << endl;
|
||||
|
||||
// first, scour cache of replica references
|
||||
// first, scour cache of unmentioned replica references
|
||||
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
|
||||
p != inode_map.end();
|
||||
++p) {
|
||||
@ -1548,7 +1573,8 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
||||
if (dn) {
|
||||
int nonce = dn->add_replica(from);
|
||||
dout(10) << " have " << *dn << endl;
|
||||
ack->add_strong_dentry(*p, *q, dn->lock.get_state(), nonce);
|
||||
if (ack)
|
||||
ack->add_strong_dentry(*p, *q, dn->lock.get_state(), nonce);
|
||||
} else {
|
||||
dout(10) << " missing " << *p << " " << *q << endl;
|
||||
if (!missing) missing = new MMDSCacheRejoin(MMDSCacheRejoin::OP_MISSING);
|
||||
@ -1578,19 +1604,22 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
||||
if (in) {
|
||||
int nonce = in->add_replica(from);
|
||||
in->mds_caps_wanted.erase(from);
|
||||
in->authlock.remove_gather(from); // just in case
|
||||
in->linklock.remove_gather(from); // just in case
|
||||
in->dirfragtreelock.remove_gather(from); // just in case
|
||||
in->filelock.remove_gather(from); // just in case
|
||||
dout(10) << " have (weak) " << *in << endl;
|
||||
if (ack)
|
||||
if (ack) {
|
||||
in->authlock.remove_gather(from);
|
||||
in->linklock.remove_gather(from);
|
||||
in->dirfragtreelock.remove_gather(from);
|
||||
in->filelock.remove_gather(from);
|
||||
in->dirlock.remove_gather(from);
|
||||
ack->add_strong_inode(in->ino(),
|
||||
nonce,
|
||||
0,
|
||||
in->authlock.get_replica_state(),
|
||||
in->linklock.get_replica_state(),
|
||||
in->dirfragtreelock.get_replica_state(),
|
||||
in->filelock.get_replica_state());
|
||||
in->filelock.get_replica_state(),
|
||||
in->dirlock.get_replica_state());
|
||||
}
|
||||
} else {
|
||||
dout(10) << " missing " << *p << endl;
|
||||
if (!missing) missing = new MMDSCacheRejoin(MMDSCacheRejoin::OP_MISSING);
|
||||
@ -1609,23 +1638,34 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
||||
in->mds_caps_wanted[from] = p->second.caps_wanted;
|
||||
else
|
||||
in->mds_caps_wanted.erase(from);
|
||||
in->authlock.remove_gather(from); // just in case
|
||||
in->linklock.remove_gather(from); // just in case
|
||||
in->dirfragtreelock.remove_gather(from); // just in case
|
||||
in->filelock.remove_gather(from); // just in case
|
||||
dout(10) << " have (strong) " << *in << endl;
|
||||
if (ack) {
|
||||
// i had inode, just tell replica the correct state
|
||||
in->authlock.remove_gather(from);
|
||||
in->linklock.remove_gather(from);
|
||||
in->dirfragtreelock.remove_gather(from);
|
||||
in->filelock.remove_gather(from);
|
||||
in->dirlock.remove_gather(from);
|
||||
ack->add_strong_inode(in->ino(),
|
||||
nonce,
|
||||
0,
|
||||
in->authlock.get_replica_state(),
|
||||
in->linklock.get_replica_state(),
|
||||
in->dirfragtreelock.get_replica_state(),
|
||||
in->filelock.get_replica_state());
|
||||
in->filelock.get_replica_state(),
|
||||
in->dirlock.get_replica_state());
|
||||
} else {
|
||||
// note strong replica filelock state requests
|
||||
//if (p->second.filelock & CAP_FILE_RD)
|
||||
//filelock_replica_readers.insert(in);
|
||||
// take note of replica state values.
|
||||
// SimpleLock --
|
||||
// we can ignore; locked replicas can be safely changed to sync.
|
||||
// FileLock --
|
||||
// we can also ignore.
|
||||
// replicas will at most issue RDCACHE|RD, which is covered by the default SYNC,
|
||||
// so only _locally_ opened files are significant.
|
||||
// ScatterLock -- adjust accordingly
|
||||
if (p->second.dirlock == LOCK_SCATTER ||
|
||||
p->second.dirlock == LOCK_GSCATTERS) // replica still has rdlocks
|
||||
in->dirlock.set_state(LOCK_SCATTER);
|
||||
}
|
||||
} else {
|
||||
dout(10) << " missing " << p->first << endl;
|
||||
@ -1711,6 +1751,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
||||
assert(dir);
|
||||
|
||||
dir->set_replica_nonce(p->second.nonce);
|
||||
dir->state_clear(CDir::STATE_REJOINING);
|
||||
dout(10) << " got " << *dir << endl;
|
||||
|
||||
// dentries
|
||||
@ -1721,6 +1762,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
||||
assert(dn);
|
||||
dn->set_replica_nonce(q->second.nonce);
|
||||
dn->lock.set_state(q->second.lock);
|
||||
dn->state_clear(CDentry::STATE_REJOINING);
|
||||
dout(10) << " got " << *dn << endl;
|
||||
}
|
||||
}
|
||||
@ -1736,6 +1778,8 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
||||
in->linklock.set_state(p->second.linklock);
|
||||
in->dirfragtreelock.set_state(p->second.dirfragtreelock);
|
||||
in->filelock.set_state(p->second.filelock);
|
||||
in->dirlock.set_state(p->second.dirlock);
|
||||
in->state_clear(CInode::STATE_REJOINING);
|
||||
dout(10) << " got " << *in << endl;
|
||||
}
|
||||
|
||||
@ -1767,7 +1811,11 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
||||
p != m->weak_dirfrags.end();
|
||||
++p) {
|
||||
CDir *dir = get_dirfrag(*p);
|
||||
assert(dir);
|
||||
if (!dir) {
|
||||
dout(10) << " don't have dirfrag " << *p << endl;
|
||||
continue; // we must have trimmed it after the original rejoin
|
||||
}
|
||||
|
||||
dout(10) << " sending " << *dir << endl;
|
||||
|
||||
// dentries
|
||||
@ -1775,7 +1823,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
||||
q != m->weak_dentries[*p].end();
|
||||
++q) {
|
||||
CDentry *dn = dir->lookup(*q);
|
||||
assert(dn);
|
||||
if (!dn) {
|
||||
dout(10) << " don't have dentry " << *q << " in " << *dir << endl;
|
||||
continue; // we must have trimmed it after our original rejoin
|
||||
}
|
||||
dout(10) << " sending " << *dn << endl;
|
||||
if (mds->is_rejoin())
|
||||
full->add_weak_dentry(*p, *q);
|
||||
@ -1789,7 +1840,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
||||
p != m->weak_inodes.end();
|
||||
++p) {
|
||||
CInode *in = get_inode(*p);
|
||||
assert(in);
|
||||
if (!in) {
|
||||
dout(10) << " don't have inode " << *p << endl;
|
||||
continue; // we must have trimmed it after the originalo rejoin
|
||||
}
|
||||
|
||||
dout(10) << " sending " << *in << endl;
|
||||
full->add_full_inode(in->inode, in->symlink, in->dirfragtree);
|
||||
@ -1802,7 +1856,8 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
||||
in->authlock.get_replica_state(),
|
||||
in->linklock.get_replica_state(),
|
||||
in->dirfragtreelock.get_replica_state(),
|
||||
in->filelock.get_replica_state());
|
||||
in->filelock.get_replica_state(),
|
||||
in->dirlock.get_replica_state());
|
||||
}
|
||||
|
||||
mds->send_message_mds(full, m->get_source().num(), MDS_PORT_CACHE);
|
||||
@ -1810,7 +1865,13 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
||||
|
||||
void MDCache::handle_cache_rejoin_full(MMDSCacheRejoin *m)
|
||||
{
|
||||
dout(7) << "handle_cache_rejoin_full from " << m->get_source() << endl;
|
||||
|
||||
|
||||
assert(0); // write me
|
||||
|
||||
|
||||
delete m;
|
||||
}
|
||||
|
||||
void MDCache::send_cache_rejoin_acks()
|
||||
@ -1893,7 +1954,8 @@ void MDCache::send_cache_rejoin_acks()
|
||||
in->authlock.get_replica_state(),
|
||||
in->linklock.get_replica_state(),
|
||||
in->dirfragtreelock.get_replica_state(),
|
||||
in->filelock.get_replica_state());
|
||||
in->filelock.get_replica_state(),
|
||||
in->dirlock.get_replica_state());
|
||||
}
|
||||
|
||||
// subdirs in this subtree?
|
||||
@ -1914,6 +1976,7 @@ void MDCache::send_cache_rejoin_acks()
|
||||
|
||||
// ===============================================================================
|
||||
|
||||
/*
|
||||
void MDCache::rename_file(CDentry *srcdn,
|
||||
CDentry *destdn)
|
||||
{
|
||||
@ -1928,7 +1991,7 @@ void MDCache::rename_file(CDentry *srcdn,
|
||||
// link inode w/ dentry
|
||||
destdn->dir->link_inode( destdn, in );
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
void MDCache::set_root(CInode *in)
|
||||
@ -4647,9 +4710,9 @@ void MDCache::show_cache()
|
||||
p != dir->items.end();
|
||||
++p) {
|
||||
CDentry *dn = p->second;
|
||||
dout(7) << " dentry " << *dn << endl;
|
||||
dout(7) << " dentry " << *dn << endl;
|
||||
if (dn->is_primary() && dn->inode)
|
||||
dout(7) << " inode " << *dn->inode << endl;
|
||||
dout(7) << " inode " << *dn->inode << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -441,6 +441,7 @@ void MDS::beacon_kill(utime_t lab)
|
||||
|
||||
void MDS::handle_mds_map(MMDSMap *m)
|
||||
{
|
||||
version_t hadepoch = mdsmap->get_epoch();
|
||||
version_t epoch = m->get_epoch();
|
||||
dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << endl;
|
||||
|
||||
@ -671,6 +672,12 @@ void MDS::handle_mds_map(MMDSMap *m)
|
||||
}
|
||||
*/
|
||||
|
||||
// just got mdsmap+osdmap?
|
||||
if (hadepoch == 0 &&
|
||||
mdsmap->get_epoch() > 0 &&
|
||||
osdmap->get_epoch() > 0)
|
||||
boot();
|
||||
|
||||
delete m;
|
||||
}
|
||||
|
||||
@ -691,24 +698,30 @@ void MDS::bcast_mds_map()
|
||||
|
||||
void MDS::handle_osd_map(MOSDMap *m)
|
||||
{
|
||||
version_t had = osdmap->get_epoch();
|
||||
version_t hadepoch = osdmap->get_epoch();
|
||||
dout(10) << "handle_osd_map had " << hadepoch << endl;
|
||||
|
||||
dout(10) << "handle_osd_map had " << had << endl;
|
||||
|
||||
// process locally
|
||||
// process
|
||||
objecter->handle_osd_map(m);
|
||||
|
||||
if (had == 0 && osdmap->get_epoch() > 0) {
|
||||
if (is_creating())
|
||||
boot_create(); // new tables, journal
|
||||
else if (is_starting())
|
||||
boot_start(); // old tables, empty journal
|
||||
else if (is_replay())
|
||||
boot_replay(); // replay, join
|
||||
else
|
||||
assert(is_standby());
|
||||
}
|
||||
|
||||
// just got mdsmap+osdmap?
|
||||
if (hadepoch == 0 &&
|
||||
osdmap->get_epoch() > 0 &&
|
||||
mdsmap->get_epoch() > 0)
|
||||
boot();
|
||||
}
|
||||
|
||||
|
||||
void MDS::boot()
|
||||
{
|
||||
if (is_creating())
|
||||
boot_create(); // new tables, journal
|
||||
else if (is_starting())
|
||||
boot_start(); // old tables, empty journal
|
||||
else if (is_replay())
|
||||
boot_replay(); // replay, join
|
||||
else
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
@ -1050,6 +1063,7 @@ void MDS::my_dispatch(Message *m)
|
||||
// hack: thrash exports
|
||||
for (int i=0; i<g_conf.mds_thrash_exports; i++) {
|
||||
set<int> s;
|
||||
if (!is_active()) break;
|
||||
mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
|
||||
if (s.size() < 2 || mdcache->get_num_inodes() < 10)
|
||||
break; // need peers for this to work.
|
||||
|
@ -189,6 +189,7 @@ class MDS : public Dispatcher {
|
||||
int init(bool standby=false);
|
||||
void reopen_logger();
|
||||
|
||||
void boot();
|
||||
void boot_create(); // i am new mds.
|
||||
void boot_start(); // i am old but empty (was down:out) mds.
|
||||
void boot_replay(int step=0); // i am recovering existing (down:failed) mds.
|
||||
|
@ -84,7 +84,7 @@ public:
|
||||
|
||||
void print(ostream& out) {
|
||||
out << "(";
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_lock_type_name(get_type()) << " ";
|
||||
out << get_scatterlock_state_name(get_state());
|
||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||
if (is_rdlocked())
|
||||
|
@ -891,6 +891,54 @@ CDir* Server::try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr)
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/** predirty_dn_diri
|
||||
* predirty the directory inode for a new dentry, if it is auth (and not root)
|
||||
* BUG: root inode doesn't get dirtied properly, currently. blech.
|
||||
*/
|
||||
version_t Server::predirty_dn_diri(CDentry *dn, EMetaBlob *blob, utime_t mtime)
|
||||
{
|
||||
version_t dirpv = 0;
|
||||
CInode *diri = dn->dir->inode;
|
||||
|
||||
if (diri->is_auth() && !diri->is_root()) {
|
||||
dirpv = diri->pre_dirty();
|
||||
inode_t *pi = blob->add_primary_dentry(diri->get_parent_dn(), true);
|
||||
pi->version = dirpv;
|
||||
pi->ctime = pi->mtime = mtime;
|
||||
dout(10) << "predirty_dn_diri ctime/mtime " << mtime << " pv " << dirpv << " on " << *diri << endl;
|
||||
}
|
||||
|
||||
return dirpv;
|
||||
}
|
||||
|
||||
/** dirty_dn_diri
|
||||
* follow-up with actual dirty of inode after journal entry commits.
|
||||
*/
|
||||
void Server::dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime)
|
||||
{
|
||||
CInode *diri = dn->dir->inode;
|
||||
|
||||
// make the udpate
|
||||
diri->inode.ctime = diri->inode.mtime = mtime;
|
||||
|
||||
if (diri->is_auth() && !diri->is_root()) {
|
||||
// we're auth.
|
||||
diri->mark_dirty(dirpv);
|
||||
dout(10) << "dirty_dn_diri ctime/mtime " << mtime << " v " << diri->inode.version << " on " << *diri << endl;
|
||||
} else {
|
||||
// we're not auth. dirlock scatterlock will propagate the update.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ===============================================================================
|
||||
// STAT
|
||||
|
||||
@ -1238,10 +1286,11 @@ class C_MDS_mknod_finish : public Context {
|
||||
CDentry *dn;
|
||||
CInode *newi;
|
||||
version_t pv;
|
||||
version_t dirpv;
|
||||
public:
|
||||
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) :
|
||||
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni, version_t dirpv_) :
|
||||
mds(m), mdr(r), dn(d), newi(ni),
|
||||
pv(d->get_projected_version()) {}
|
||||
pv(d->get_projected_version()), dirpv(dirpv_) {}
|
||||
void finish(int r) {
|
||||
assert(r == 0);
|
||||
|
||||
@ -1252,9 +1301,8 @@ public:
|
||||
newi->mark_dirty(pv);
|
||||
|
||||
// dir inode's mtime
|
||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
||||
newi->inode.ctime);
|
||||
|
||||
mds->server->dirty_dn_diri(dn, dirpv, newi->inode.ctime);
|
||||
|
||||
// hit pop
|
||||
mds->balancer->hit_inode(newi, META_POP_IWR);
|
||||
|
||||
@ -1265,6 +1313,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
void Server::handle_client_mknod(MDRequest *mdr)
|
||||
{
|
||||
MClientRequest *req = mdr->client_request();
|
||||
@ -1282,14 +1332,17 @@ void Server::handle_client_mknod(MDRequest *mdr)
|
||||
newi->inode.mode |= INODE_MODE_FILE;
|
||||
|
||||
// prepare finisher
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
||||
EUpdate *le = new EUpdate("mknod");
|
||||
le->metablob.add_client_req(req->get_reqid());
|
||||
|
||||
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||
|
||||
le->metablob.add_dir_context(dn->dir);
|
||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||
pi->version = dn->get_projected_version();
|
||||
|
||||
// log + wait
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||
mdlog->submit_entry(le);
|
||||
mdlog->wait_for_sync(fin);
|
||||
}
|
||||
@ -1322,15 +1375,16 @@ void Server::handle_client_mkdir(MDRequest *mdr)
|
||||
newdir->mark_dirty(newdir->pre_dirty());
|
||||
|
||||
// prepare finisher
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
||||
EUpdate *le = new EUpdate("mkdir");
|
||||
le->metablob.add_client_req(req->get_reqid());
|
||||
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||
le->metablob.add_dir_context(dn->dir);
|
||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||
pi->version = dn->get_projected_version();
|
||||
le->metablob.add_dir(newdir, true);
|
||||
|
||||
// log + wait
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||
mdlog->submit_entry(le);
|
||||
mdlog->wait_for_sync(fin);
|
||||
|
||||
@ -1370,14 +1424,15 @@ void Server::handle_client_symlink(MDRequest *mdr)
|
||||
newi->symlink = req->get_sarg();
|
||||
|
||||
// prepare finisher
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
||||
EUpdate *le = new EUpdate("symlink");
|
||||
le->metablob.add_client_req(req->get_reqid());
|
||||
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||
le->metablob.add_dir_context(dn->dir);
|
||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||
pi->version = dn->get_projected_version();
|
||||
|
||||
// log + wait
|
||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||
mdlog->submit_entry(le);
|
||||
mdlog->wait_for_sync(fin);
|
||||
}
|
||||
@ -1490,15 +1545,17 @@ class C_MDS_link_local_finish : public Context {
|
||||
version_t dpv;
|
||||
utime_t tctime;
|
||||
version_t tpv;
|
||||
version_t dirpv;
|
||||
public:
|
||||
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, utime_t ct) :
|
||||
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, version_t dirpv_, utime_t ct) :
|
||||
mds(m), mdr(r), dn(d), targeti(ti),
|
||||
dpv(d->get_projected_version()),
|
||||
tctime(ct),
|
||||
tpv(targeti->get_parent_dn()->get_projected_version()) {}
|
||||
tpv(targeti->get_parent_dn()->get_projected_version()),
|
||||
dirpv(dirpv_) { }
|
||||
void finish(int r) {
|
||||
assert(r == 0);
|
||||
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv);
|
||||
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv, dirpv);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1517,6 +1574,8 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
||||
version_t tpdv = targeti->pre_dirty();
|
||||
|
||||
// add to event
|
||||
utime_t now = g_clock.real_now();
|
||||
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now); // dir inode's mtime
|
||||
le->metablob.add_dir_context(dn->get_dir());
|
||||
le->metablob.add_remote_dentry(dn, true, targeti->ino()); // new remote
|
||||
le->metablob.add_dir_context(targeti->get_parent_dir());
|
||||
@ -1524,11 +1583,11 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
||||
|
||||
// update journaled target inode
|
||||
pi->nlink++;
|
||||
pi->ctime = g_clock.real_now();
|
||||
pi->ctime = now;
|
||||
pi->version = tpdv;
|
||||
|
||||
// finisher
|
||||
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, pi->ctime);
|
||||
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, dirpv, now);
|
||||
|
||||
// log + wait
|
||||
mdlog->submit_entry(le);
|
||||
@ -1536,7 +1595,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
||||
}
|
||||
|
||||
void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
|
||||
version_t dpv, utime_t tctime, version_t tpv)
|
||||
version_t dpv, utime_t tctime, version_t tpv, version_t dirpv)
|
||||
{
|
||||
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl;
|
||||
|
||||
@ -1551,8 +1610,7 @@ void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
|
||||
targeti->mark_dirty(tpv);
|
||||
|
||||
// dir inode's mtime
|
||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
||||
tctime);
|
||||
dirty_dn_diri(dn, dirpv, tctime);
|
||||
|
||||
// bump target popularity
|
||||
mds->balancer->hit_inode(targeti, META_POP_IWR);
|
||||
@ -1738,16 +1796,17 @@ class C_MDS_unlink_local_finish : public Context {
|
||||
CDentry *straydn;
|
||||
version_t ipv; // referred inode
|
||||
utime_t ictime;
|
||||
version_t dpv; // deleted dentry
|
||||
version_t dnpv; // deleted dentry
|
||||
version_t dirpv;
|
||||
public:
|
||||
C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd,
|
||||
version_t v, utime_t ct) :
|
||||
version_t v, version_t dirpv_, utime_t ct) :
|
||||
mds(m), mdr(r), dn(d), straydn(sd),
|
||||
ipv(v), ictime(ct),
|
||||
dpv(d->get_projected_version()) { }
|
||||
dnpv(d->get_projected_version()), dirpv(dirpv_) { }
|
||||
void finish(int r) {
|
||||
assert(r == 0);
|
||||
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dpv);
|
||||
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dnpv, dirpv);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1790,18 +1849,20 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
|
||||
}
|
||||
|
||||
// the unlinked dentry
|
||||
utime_t now = g_clock.real_now();
|
||||
dn->pre_dirty();
|
||||
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now);
|
||||
le->metablob.add_dir_context(dn->get_dir());
|
||||
le->metablob.add_null_dentry(dn, true);
|
||||
|
||||
// update journaled target inode
|
||||
pi->nlink--;
|
||||
pi->ctime = g_clock.real_now();
|
||||
pi->ctime = now;
|
||||
pi->version = ipv;
|
||||
|
||||
// finisher
|
||||
C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
|
||||
ipv, pi->ctime);
|
||||
ipv, dirpv, now);
|
||||
|
||||
journal_opens(); // journal pending opens, just in case
|
||||
|
||||
@ -1814,7 +1875,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
|
||||
|
||||
void Server::_unlink_local_finish(MDRequest *mdr,
|
||||
CDentry *dn, CDentry *straydn,
|
||||
version_t ipv, utime_t ictime, version_t dpv)
|
||||
version_t ipv, utime_t ictime, version_t dnpv, version_t dirpv)
|
||||
{
|
||||
dout(10) << "_unlink_local " << *dn << endl;
|
||||
|
||||
@ -1829,11 +1890,10 @@ void Server::_unlink_local_finish(MDRequest *mdr,
|
||||
in->inode.ctime = ictime;
|
||||
in->inode.nlink--;
|
||||
in->mark_dirty(ipv); // dirty inode
|
||||
dn->mark_dirty(dpv); // dirty old dentry
|
||||
dn->mark_dirty(dnpv); // dirty old dentry
|
||||
|
||||
// dir inode's mtime
|
||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
||||
ictime);
|
||||
dirty_dn_diri(dn, dirpv, ictime);
|
||||
|
||||
// share unlink news with replicas
|
||||
for (map<int,int>::iterator it = dn->replicas_begin();
|
||||
@ -2147,25 +2207,27 @@ class C_MDS_rename_local_finish : public Context {
|
||||
version_t straypv;
|
||||
version_t destpv;
|
||||
version_t srcpv;
|
||||
version_t ddirpv, sdirpv;
|
||||
utime_t ictime;
|
||||
public:
|
||||
version_t atid1;
|
||||
version_t atid2;
|
||||
C_MDS_rename_local_finish(MDS *m, MDRequest *r,
|
||||
CDentry *sdn, CDentry *ddn, CDentry *stdn,
|
||||
version_t v, utime_t ct) :
|
||||
version_t v, version_t ddirpv_, version_t sdirpv_, utime_t ct) :
|
||||
mds(m), mdr(r),
|
||||
srcdn(sdn), destdn(ddn), straydn(stdn),
|
||||
ipv(v),
|
||||
straypv(straydn ? straydn->get_projected_version():0),
|
||||
destpv(destdn->get_projected_version()),
|
||||
srcpv(srcdn->get_projected_version()),
|
||||
ddirpv(ddirpv_), sdirpv(sdirpv_),
|
||||
ictime(ct),
|
||||
atid1(0), atid2(0) { }
|
||||
void finish(int r) {
|
||||
assert(r == 0);
|
||||
mds->server->_rename_local_finish(mdr, srcdn, destdn, straydn,
|
||||
srcpv, destpv, straypv, ipv, ictime,
|
||||
srcpv, destpv, straypv, ipv, ddirpv, sdirpv, ictime,
|
||||
atid1, atid2);
|
||||
}
|
||||
};
|
||||
@ -2194,6 +2256,8 @@ void Server::_rename_local(MDRequest *mdr,
|
||||
EUpdate *le = new EUpdate("rename_local");
|
||||
le->metablob.add_client_req(mdr->reqid);
|
||||
|
||||
utime_t now = g_clock.real_now();
|
||||
|
||||
CDentry *straydn = 0;
|
||||
inode_t *pi = 0;
|
||||
version_t ipv = 0;
|
||||
@ -2204,9 +2268,14 @@ void Server::_rename_local(MDRequest *mdr,
|
||||
// primary+remote link merge?
|
||||
bool linkmerge = (srcdn->inode == destdn->inode &&
|
||||
(srcdn->is_primary() || destdn->is_primary()));
|
||||
|
||||
// dir mtimes
|
||||
version_t ddirpv = predirty_dn_diri(destdn, &le->metablob, now);
|
||||
version_t sdirpv = predirty_dn_diri(srcdn, &le->metablob, now);
|
||||
|
||||
if (linkmerge) {
|
||||
dout(10) << "will merge remote+primary links" << endl;
|
||||
|
||||
|
||||
// destdn -> primary
|
||||
le->metablob.add_dir_context(destdn->dir);
|
||||
ipv = destdn->pre_dirty(destdn->inode->inode.version);
|
||||
@ -2300,13 +2369,13 @@ void Server::_rename_local(MDRequest *mdr,
|
||||
if (pi) {
|
||||
// update journaled target inode
|
||||
pi->nlink--;
|
||||
pi->ctime = g_clock.real_now();
|
||||
pi->ctime = now;
|
||||
pi->version = ipv;
|
||||
}
|
||||
|
||||
C_MDS_rename_local_finish *fin = new C_MDS_rename_local_finish(mds, mdr,
|
||||
srcdn, destdn, straydn,
|
||||
ipv, pi ? pi->ctime:utime_t());
|
||||
ipv, ddirpv, sdirpv, now);
|
||||
|
||||
journal_opens(); // journal pending opens, just in case
|
||||
|
||||
@ -2340,6 +2409,7 @@ void Server::_rename_local_reanchored(LogEvent *le, C_MDS_rename_local_finish *f
|
||||
void Server::_rename_local_finish(MDRequest *mdr,
|
||||
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
||||
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
||||
version_t ddirpv, version_t sdirpv,
|
||||
utime_t ictime,
|
||||
version_t atid1, version_t atid2)
|
||||
{
|
||||
@ -2352,8 +2422,13 @@ void Server::_rename_local_finish(MDRequest *mdr,
|
||||
bool linkmerge = (srcdn->inode == destdn->inode &&
|
||||
(srcdn->is_primary() || destdn->is_primary()));
|
||||
|
||||
// dir mtimes
|
||||
dirty_dn_diri(destdn, ddirpv, ictime);
|
||||
dirty_dn_diri(srcdn, sdirpv, ictime);
|
||||
|
||||
if (linkmerge) {
|
||||
assert(ipv);
|
||||
|
||||
if (destdn->is_primary()) {
|
||||
dout(10) << "merging remote onto primary link" << endl;
|
||||
|
||||
@ -2755,7 +2830,11 @@ void Server::_do_open(MDRequest *mdr, CInode *cur)
|
||||
<< " on " << *cur << endl;
|
||||
|
||||
// hit pop
|
||||
mds->balancer->hit_inode(cur, META_POP_IRD);
|
||||
if (cmode == FILE_MODE_RW ||
|
||||
cmode == FILE_MODE_W)
|
||||
mds->balancer->hit_inode(cur, META_POP_IWR);
|
||||
else
|
||||
mds->balancer->hit_inode(cur, META_POP_IRD);
|
||||
|
||||
// reply
|
||||
MClientReply *reply = new MClientReply(req, 0);
|
||||
|
@ -70,6 +70,9 @@ public:
|
||||
CDir* try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
||||
//CDir* try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
||||
|
||||
version_t predirty_dn_diri(CDentry *dn, class EMetaBlob *blob, utime_t mtime);
|
||||
void dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime);
|
||||
|
||||
// requests on existing inodes.
|
||||
void handle_client_stat(MDRequest *mdr);
|
||||
void handle_client_utime(MDRequest *mdr);
|
||||
@ -108,7 +111,7 @@ public:
|
||||
void _link_local(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
||||
void _link_local_finish(MDRequest *mdr,
|
||||
CDentry *dn, CInode *targeti,
|
||||
version_t, utime_t, version_t);
|
||||
version_t, utime_t, version_t, version_t);
|
||||
void _link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
||||
|
||||
// unlink
|
||||
@ -117,7 +120,7 @@ public:
|
||||
void _unlink_local(MDRequest *mdr, CDentry *dn);
|
||||
void _unlink_local_finish(MDRequest *mdr,
|
||||
CDentry *dn, CDentry *straydn,
|
||||
version_t, utime_t, version_t);
|
||||
version_t, utime_t, version_t, version_t);
|
||||
void _unlink_remote(MDRequest *mdr, CDentry *dn);
|
||||
|
||||
// rename
|
||||
@ -134,7 +137,7 @@ public:
|
||||
void _rename_local_finish(MDRequest *mdr,
|
||||
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
||||
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
||||
utime_t ictime,
|
||||
version_t ddirpv, version_t sdirpv, utime_t ictime,
|
||||
version_t atid1, version_t atid2);
|
||||
};
|
||||
|
||||
|
@ -30,29 +30,31 @@
|
||||
|
||||
inline const char *get_lock_type_name(int t) {
|
||||
switch (t) {
|
||||
case LOCK_OTYPE_DN: return "dentry";
|
||||
case LOCK_OTYPE_IFILE: return "inode_file";
|
||||
case LOCK_OTYPE_IAUTH: return "inode_auth";
|
||||
case LOCK_OTYPE_ILINK: return "inode_link";
|
||||
case LOCK_OTYPE_IDIRFRAGTREE: return "inode_dirfragtree";
|
||||
case LOCK_OTYPE_IDIR: return "inode_dir";
|
||||
case LOCK_OTYPE_DN: return "dn";
|
||||
case LOCK_OTYPE_IFILE: return "ifile";
|
||||
case LOCK_OTYPE_IAUTH: return "iauth";
|
||||
case LOCK_OTYPE_ILINK: return "ilink";
|
||||
case LOCK_OTYPE_IDIRFRAGTREE: return "idft";
|
||||
case LOCK_OTYPE_IDIR: return "idir";
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
// -- lock states --
|
||||
#define LOCK_UNDEF 0
|
||||
// auth rep
|
||||
// auth rep
|
||||
#define LOCK_SYNC 1 // AR R . R .
|
||||
#define LOCK_LOCK 2 // AR R W . .
|
||||
#define LOCK_GLOCKR -3 // AR R . . .
|
||||
#define LOCK_REMOTEXLOCK -50 // on NON-auth
|
||||
|
||||
inline const char *get_simplelock_state_name(int n) {
|
||||
switch (n) {
|
||||
case LOCK_UNDEF: return "undef";
|
||||
case LOCK_UNDEF: return "UNDEF";
|
||||
case LOCK_SYNC: return "sync";
|
||||
case LOCK_LOCK: return "lock";
|
||||
case LOCK_GLOCKR: return "glockr";
|
||||
case LOCK_REMOTEXLOCK: return "remote_xlock";
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
@ -63,8 +65,7 @@ class SimpleLock {
|
||||
public:
|
||||
static const int WAIT_RD = (1<<0); // to read
|
||||
static const int WAIT_WR = (1<<1); // to write
|
||||
static const int WAIT_NOLOCKS = (1<<2); // for last rdlock to finish
|
||||
//static const int WAIT_LOCK = (1<<3); // for locked state
|
||||
static const int WAIT_SINGLEAUTH = (1<<2);
|
||||
static const int WAIT_STABLE = (1<<3); // for a stable state
|
||||
static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock
|
||||
static const int WAIT_BITS = 5;
|
||||
@ -248,7 +249,7 @@ public:
|
||||
|
||||
virtual void print(ostream& out) {
|
||||
out << "(";
|
||||
//out << get_lock_type_name(l.get_type()) << " ";
|
||||
out << get_lock_type_name(get_type()) << " ";
|
||||
out << get_simplelock_state_name(get_state());
|
||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||
if (is_rdlocked())
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
set<dirfrag_t> &get_bounds() { return bounds; }
|
||||
|
||||
void print(ostream& out) {
|
||||
out << "export " << base << " " << metablob;
|
||||
out << "EExport " << base << " " << metablob;
|
||||
}
|
||||
|
||||
virtual void encode_payload(bufferlist& bl) {
|
||||
|
@ -33,7 +33,7 @@ class EImportFinish : public LogEvent {
|
||||
EImportFinish() : LogEvent(EVENT_IMPORTFINISH) { }
|
||||
|
||||
void print(ostream& out) {
|
||||
out << "import_finish " << base;
|
||||
out << "EImportFinish " << base;
|
||||
if (success)
|
||||
out << " success";
|
||||
else
|
||||
|
@ -292,8 +292,9 @@ class MDSCacheObject {
|
||||
}
|
||||
|
||||
// -- state --
|
||||
const static int STATE_AUTH = (1<<30);
|
||||
const static int STATE_DIRTY = (1<<29);
|
||||
const static int STATE_AUTH = (1<<30);
|
||||
const static int STATE_DIRTY = (1<<29);
|
||||
const static int STATE_REJOINING = (1<<28); // replica has not joined w/ primary copy
|
||||
|
||||
// -- wait --
|
||||
const static int WAIT_SINGLEAUTH = (1<<30);
|
||||
@ -327,8 +328,9 @@ class MDSCacheObject {
|
||||
void state_reset(unsigned s) { state = s; }
|
||||
|
||||
bool is_auth() { return state_test(STATE_AUTH); }
|
||||
bool is_dirty() { return state & STATE_DIRTY; }
|
||||
bool is_dirty() { return state_test(STATE_DIRTY); }
|
||||
bool is_clean() { return !is_dirty(); }
|
||||
bool is_rejoining() { return state_test(STATE_REJOINING); }
|
||||
|
||||
// --------------------------------------------
|
||||
// authority
|
||||
@ -457,7 +459,7 @@ protected:
|
||||
if (waiting.empty())
|
||||
get(PIN_WAITER);
|
||||
waiting.insert(pair<int,Context*>(mask, c));
|
||||
dout(10) << (mdsco_db_line_prefix(this))
|
||||
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
|
||||
<< "add_waiter " << mask << " " << c
|
||||
<< " on " << *this
|
||||
<< endl;
|
||||
@ -469,14 +471,14 @@ protected:
|
||||
while (it != waiting.end()) {
|
||||
if (it->first & mask) {
|
||||
ls.push_back(it->second);
|
||||
dout(10) << (mdsco_db_line_prefix(this))
|
||||
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
|
||||
<< "take_waiting mask " << mask << " took " << it->second
|
||||
<< " tag " << it->first
|
||||
<< " on " << *this
|
||||
<< endl;
|
||||
waiting.erase(it++);
|
||||
} else {
|
||||
dout(10) << "take_waiting mask " << mask << " SKIPPING " << it->second
|
||||
pdout(10,g_conf.debug_mds) << "take_waiting mask " << mask << " SKIPPING " << it->second
|
||||
<< " tag " << it->first
|
||||
<< " on " << *this
|
||||
<< endl;
|
||||
|
@ -185,7 +185,9 @@ class MClientReply : public Message {
|
||||
virtual char *get_type_name() { return "creply"; }
|
||||
void print(ostream& o) {
|
||||
o << "creply(" << env.dst.name << "." << st.tid;
|
||||
if (st.result) o << " = " << st.result;
|
||||
o << " = " << st.result;
|
||||
if (st.result <= 0)
|
||||
o << " " << strerror(-st.result);
|
||||
o << ")";
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
#define __MLOCK_H
|
||||
|
||||
#include "msg/Message.h"
|
||||
|
||||
#include "mds/SimpleLock.h"
|
||||
|
||||
// for replicas
|
||||
#define LOCK_AC_SYNC -1
|
||||
@ -91,6 +91,12 @@ class MLock : public Message {
|
||||
data.claim(bl);
|
||||
}
|
||||
virtual char *get_type_name() { return "ILock"; }
|
||||
void print(ostream& out) {
|
||||
out << "lock(a=" << action
|
||||
<< " " << ino
|
||||
<< " " << get_lock_type_name(otype)
|
||||
<< ")";
|
||||
}
|
||||
|
||||
void set_ino(inodeno_t ino, char ot) {
|
||||
otype = ot;
|
||||
@ -111,32 +117,27 @@ class MLock : public Message {
|
||||
this->dn = dn;
|
||||
}
|
||||
void set_reqid(metareqid_t ri) { reqid = ri; }
|
||||
void set_data(bufferlist& data) {
|
||||
this->data.claim( data );
|
||||
void set_data(const bufferlist& data) {
|
||||
this->data = data;
|
||||
}
|
||||
|
||||
void decode_payload() {
|
||||
int off = 0;
|
||||
payload.copy(off,sizeof(action), (char*)&action);
|
||||
off += sizeof(action);
|
||||
payload.copy(off,sizeof(asker), (char*)&asker);
|
||||
off += sizeof(asker);
|
||||
payload.copy(off,sizeof(otype), (char*)&otype);
|
||||
off += sizeof(otype);
|
||||
payload.copy(off,sizeof(ino), (char*)&ino);
|
||||
off += sizeof(ino);
|
||||
payload.copy(off,sizeof(dirfrag), (char*)&dirfrag);
|
||||
off += sizeof(dirfrag);
|
||||
::_decode(action, payload, off);
|
||||
::_decode(asker, payload, off);
|
||||
::_decode(otype, payload, off);
|
||||
::_decode(ino, payload, off);
|
||||
::_decode(dirfrag, payload, off);
|
||||
::_decode(reqid, payload, off);
|
||||
::_decode(dn, payload, off);
|
||||
::_decode(data, payload, off);
|
||||
}
|
||||
virtual void encode_payload() {
|
||||
payload.append((char*)&action, sizeof(action));
|
||||
payload.append((char*)&asker, sizeof(asker));
|
||||
payload.append((char*)&otype, sizeof(otype));
|
||||
payload.append((char*)&ino, sizeof(ino));
|
||||
payload.append((char*)&dirfrag, sizeof(dirfrag));
|
||||
::_encode(action, payload);
|
||||
::_encode(asker, payload);
|
||||
::_encode(otype, payload);
|
||||
::_encode(ino, payload);
|
||||
::_encode(dirfrag, payload);
|
||||
::_encode(reqid, payload);
|
||||
::_encode(dn, payload);
|
||||
::_encode(data, payload);
|
||||
|
@ -45,11 +45,12 @@ class MMDSCacheRejoin : public Message {
|
||||
int32_t linklock;
|
||||
int32_t dirfragtreelock;
|
||||
int32_t filelock;
|
||||
__int32_t dirlock;
|
||||
inode_strong() {}
|
||||
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0) :
|
||||
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0, int dl=0) :
|
||||
caps_wanted(cw),
|
||||
nonce(n),
|
||||
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f) { }
|
||||
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), dirlock(dl) { }
|
||||
};
|
||||
struct inode_full {
|
||||
inode_t inode;
|
||||
@ -112,8 +113,8 @@ class MMDSCacheRejoin : public Message {
|
||||
void add_weak_inode(inodeno_t ino) {
|
||||
weak_inodes.insert(ino);
|
||||
}
|
||||
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f) {
|
||||
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f);
|
||||
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
|
||||
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
|
||||
}
|
||||
void add_full_inode(inode_t &i, const string& s, const fragtree_t &f) {
|
||||
full_inodes.push_back(inode_full(i, s, f));
|
||||
|
@ -169,7 +169,8 @@ void Journaler::write_head(Context *oncommit)
|
||||
bufferlist bl;
|
||||
bl.append((char*)&last_written, sizeof(last_written));
|
||||
filer.write(inode, 0, bl.length(), bl, 0,
|
||||
0, new C_WriteHead(this, last_written, oncommit));
|
||||
0,
|
||||
new C_WriteHead(this, last_written, oncommit));
|
||||
}
|
||||
|
||||
void Journaler::_finish_write_head(Header &wrote, Context *oncommit)
|
||||
@ -293,8 +294,10 @@ void Journaler::flush(Context *onsync)
|
||||
dout(10) << "flush flushing " << flush_pos << "~" << len << endl;
|
||||
|
||||
// submit write for anything pending
|
||||
// flush _start_ pos to _finish_flush
|
||||
filer.write(inode, flush_pos, len, write_buf, 0,
|
||||
new C_Flush(this, flush_pos), 0); // flush _start_ pos to _finish_flush
|
||||
g_conf.journaler_safe ? 0:new C_Flush(this, flush_pos), // on ACK
|
||||
g_conf.journaler_safe ? new C_Flush(this, flush_pos):0); // on COMMIT
|
||||
pending_flush[flush_pos] = g_clock.now();
|
||||
|
||||
// adjust pointers
|
||||
|
Loading…
Reference in New Issue
Block a user