mirror of
https://github.com/ceph/ceph
synced 2025-03-21 17:57:38 +00:00
merged branches/sage/cephmds2 into trunk/ceph
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1407 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
parent
3b6ca5d05b
commit
26c8a9e688
@ -12,7 +12,7 @@ CFLAGS = -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE -DDARWI
|
|||||||
LDINC = ar -rc
|
LDINC = ar -rc
|
||||||
else
|
else
|
||||||
# For linux
|
# For linux
|
||||||
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -DMPICH_IGNORE_CXX_SEEK -D_REENTRANT -D_THREAD_SAFE
|
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE
|
||||||
LDINC = ld -i -o
|
LDINC = ld -i -o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -46,22 +46,52 @@ sage doc
|
|||||||
|
|
||||||
|
|
||||||
sage mds
|
sage mds
|
||||||
|
- journal+recovery
|
||||||
|
- local rename
|
||||||
|
- how to notify replicas...
|
||||||
|
/ - stray purge
|
||||||
|
- stray reintegration
|
||||||
|
- remote link
|
||||||
|
- impl remote inode xlock
|
||||||
|
- ESlaveUpdate replay, resolution, etc.
|
||||||
|
- remote unlink
|
||||||
|
- remote rename
|
||||||
|
- file capabilities i/o
|
||||||
|
- dirfrag split/merge
|
||||||
|
- client readdir for dirfrags
|
||||||
|
- consistency points/snapshots
|
||||||
|
- dentry versions vs dirfrags...
|
||||||
|
- statfs?
|
||||||
|
|
||||||
- finish multistage rejoin
|
- finish multistage rejoin
|
||||||
|
- trim_on_rejoin
|
||||||
|
|
||||||
- more testing of failures + thrashing.
|
- more testing of failures + thrashing.
|
||||||
- is export prep dir open deadlock properly fixed by forge_replica_dir()?
|
- is export prep dir open deadlock properly fixed by forge_replica_dir()?
|
||||||
|
|
||||||
- locker vs node failure
|
|
||||||
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
|
|
||||||
- failures during recovery stages (resolve, rejoin)... make sure rejoin still works!
|
- failures during recovery stages (resolve, rejoin)... make sure rejoin still works!
|
||||||
- fix mds initial osdmap weirdness (which will currently screw up on standby -> almost anything)
|
|
||||||
- incremental mdsmaps?
|
|
||||||
- client failure
|
|
||||||
|
|
||||||
- dirfrag split
|
- dirfrag split
|
||||||
- make sure we are freezing _before_ we fetch to complete the dirfrag, else
|
- make sure we are freezing _before_ we fetch to complete the dirfrag, else
|
||||||
we break commit()'s preconditions when it fetches an incomplete dir.
|
we break commit()'s preconditions when it fetches an incomplete dir.
|
||||||
|
|
||||||
|
- detect and deal with client failure
|
||||||
|
|
||||||
|
- recovering open files
|
||||||
|
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
|
||||||
|
- path+cap window will require some fetching of metadata from disk before doing the rejoin
|
||||||
|
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
|
||||||
|
|
||||||
|
- inode.max_size
|
||||||
|
|
||||||
|
- real chdir (directory "open")
|
||||||
|
- relative metadata ops
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
|
||||||
|
|
||||||
|
- incremental mdsmaps?
|
||||||
|
|
||||||
- EMetablob should return 'expired' if they have higher versions (and are thus described by a newer journal entry)
|
- EMetablob should return 'expired' if they have higher versions (and are thus described by a newer journal entry)
|
||||||
- dir version/committed/etc versus migration, log expires.
|
- dir version/committed/etc versus migration, log expires.
|
||||||
- DOCUMENT.
|
- DOCUMENT.
|
||||||
@ -77,45 +107,14 @@ sage mds
|
|||||||
|
|
||||||
- test open_remote_ino
|
- test open_remote_ino
|
||||||
|
|
||||||
- scatterlock
|
|
||||||
- unlink, link, rename need to pre_dirty and update dir inode's mtime
|
|
||||||
- tho need to make sure that behaves when dirfrag's inode is non-auth...
|
|
||||||
|
|
||||||
- FIXME how to journal root and stray inode content?
|
- FIXME how to journal root and stray inode content?
|
||||||
- in particular, i care about dirfragtree.. get it on rejoin?
|
- in particular, i care about dirfragtree.. get it on rejoin?
|
||||||
- and dir sizes, if i add that... also on rejoin?
|
- and dir sizes, if i add that... also on rejoin?
|
||||||
|
|
||||||
- recovering open files
|
|
||||||
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
|
|
||||||
- path+cap window will require some fetching of metadata from disk before doing the rejoin
|
|
||||||
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
|
|
||||||
|
|
||||||
- inode.max_size
|
|
||||||
|
|
||||||
- journal+recovery
|
|
||||||
- local rename
|
|
||||||
- how to notify replicas...
|
|
||||||
/ - stray purge
|
|
||||||
- stray reintegration
|
|
||||||
- remote link
|
|
||||||
- impl remote inode xlock
|
|
||||||
- ESlaveUpdate replay, resolution, etc.
|
|
||||||
- remote unlink
|
|
||||||
- rewrite to look link _link
|
|
||||||
- remote rename
|
|
||||||
- file capabilities i/o
|
|
||||||
- filelock to control directory mtime, dentry changes
|
|
||||||
- hmm, may have to change lock ordering, and Server::rdlock_path_pin_ref()
|
|
||||||
- dirfrag split/merge
|
|
||||||
- client readdir for dirfrags
|
|
||||||
- consistency points/snapshots
|
|
||||||
- dentry versions vs dirfrags...
|
|
||||||
- real chdir (directory "open")
|
|
||||||
- relative metadata ops
|
|
||||||
- statfs?
|
|
||||||
|
|
||||||
|
|
||||||
- fix lock caps gather ack versus ambiguous auth
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
foreign rename
|
foreign rename
|
||||||
@ -215,9 +214,13 @@ rados snapshots
|
|||||||
|
|
||||||
|
|
||||||
objecter
|
objecter
|
||||||
|
- transaction prepare/commit
|
||||||
- read+floor_lockout
|
- read+floor_lockout
|
||||||
|
|
||||||
osd/rados
|
osd/rados
|
||||||
|
- transaction prepare/commit
|
||||||
|
- rollback
|
||||||
|
- rollback logging (to fix slow prepare vs rollback race)
|
||||||
- read+floor_lockout for clean STOGITH-like/fencing semantics after failover.
|
- read+floor_lockout for clean STOGITH-like/fencing semantics after failover.
|
||||||
- separate out replication code into a PG class, to pave way for RAID
|
- separate out replication code into a PG class, to pave way for RAID
|
||||||
|
|
||||||
|
@ -150,6 +150,7 @@ md_config_t g_conf = {
|
|||||||
|
|
||||||
// --- journaler ---
|
// --- journaler ---
|
||||||
journaler_allow_split_entries: true,
|
journaler_allow_split_entries: true,
|
||||||
|
journaler_safe: false, // wait for COMMIT on journal writes
|
||||||
|
|
||||||
// --- mds ---
|
// --- mds ---
|
||||||
mds_cache_size: MDS_CACHE_SIZE,
|
mds_cache_size: MDS_CACHE_SIZE,
|
||||||
@ -158,14 +159,13 @@ md_config_t g_conf = {
|
|||||||
mds_decay_halflife: 30,
|
mds_decay_halflife: 30,
|
||||||
|
|
||||||
mds_beacon_interval: 5.0,
|
mds_beacon_interval: 5.0,
|
||||||
mds_beacon_grace: 100.0,
|
mds_beacon_grace: 10.0,
|
||||||
|
|
||||||
mds_log: true,
|
mds_log: true,
|
||||||
mds_log_max_len: MDS_CACHE_SIZE / 3,
|
mds_log_max_len: MDS_CACHE_SIZE / 3,
|
||||||
mds_log_max_trimming: 10000,
|
mds_log_max_trimming: 10000,
|
||||||
mds_log_read_inc: 1<<20,
|
mds_log_read_inc: 1<<20,
|
||||||
mds_log_pad_entry: 128,//256,//64,
|
mds_log_pad_entry: 128,//256,//64,
|
||||||
mds_log_before_reply: true,
|
|
||||||
mds_log_flush_on_shutdown: true,
|
mds_log_flush_on_shutdown: true,
|
||||||
mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log
|
mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log
|
||||||
mds_log_eopen_size: 100, // # open inodes per log entry
|
mds_log_eopen_size: 100, // # open inodes per log entry
|
||||||
@ -189,6 +189,7 @@ md_config_t g_conf = {
|
|||||||
mds_bal_midchunk: .3, // any sub bigger than this taken in full
|
mds_bal_midchunk: .3, // any sub bigger than this taken in full
|
||||||
mds_bal_minchunk: .001, // never take anything smaller than this
|
mds_bal_minchunk: .001, // never take anything smaller than this
|
||||||
|
|
||||||
|
mds_trim_on_rejoin: true,
|
||||||
mds_commit_on_shutdown: true,
|
mds_commit_on_shutdown: true,
|
||||||
mds_shutdown_check: 0, //30,
|
mds_shutdown_check: 0, //30,
|
||||||
mds_shutdown_on_last_unmount: true,
|
mds_shutdown_on_last_unmount: true,
|
||||||
@ -231,7 +232,7 @@ md_config_t g_conf = {
|
|||||||
ebofs_cloneable: false,
|
ebofs_cloneable: false,
|
||||||
ebofs_verify: false,
|
ebofs_verify: false,
|
||||||
ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing)
|
ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing)
|
||||||
ebofs_idle_commit_ms: 100, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
|
ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
|
||||||
ebofs_oc_size: 10000, // onode cache
|
ebofs_oc_size: 10000, // onode cache
|
||||||
ebofs_cc_size: 10000, // cnode cache
|
ebofs_cc_size: 10000, // cnode cache
|
||||||
ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB
|
ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB
|
||||||
@ -563,6 +564,9 @@ void parse_config_options(std::vector<char*>& args)
|
|||||||
else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0)
|
else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0)
|
||||||
g_conf.objecter_buffer_uncommitted = atoi(args[++i]);
|
g_conf.objecter_buffer_uncommitted = atoi(args[++i]);
|
||||||
|
|
||||||
|
else if (strcmp(args[i], "--journaler_safe") == 0)
|
||||||
|
g_conf.journaler_safe = atoi(args[++i]);
|
||||||
|
|
||||||
else if (strcmp(args[i], "--mds_cache_size") == 0)
|
else if (strcmp(args[i], "--mds_cache_size") == 0)
|
||||||
g_conf.mds_cache_size = atoi(args[++i]);
|
g_conf.mds_cache_size = atoi(args[++i]);
|
||||||
|
|
||||||
@ -573,8 +577,6 @@ void parse_config_options(std::vector<char*>& args)
|
|||||||
|
|
||||||
else if (strcmp(args[i], "--mds_log") == 0)
|
else if (strcmp(args[i], "--mds_log") == 0)
|
||||||
g_conf.mds_log = atoi(args[++i]);
|
g_conf.mds_log = atoi(args[++i]);
|
||||||
else if (strcmp(args[i], "--mds_log_before_reply") == 0)
|
|
||||||
g_conf.mds_log_before_reply = atoi(args[++i]);
|
|
||||||
else if (strcmp(args[i], "--mds_log_max_len") == 0)
|
else if (strcmp(args[i], "--mds_log_max_len") == 0)
|
||||||
g_conf.mds_log_max_len = atoi(args[++i]);
|
g_conf.mds_log_max_len = atoi(args[++i]);
|
||||||
else if (strcmp(args[i], "--mds_log_read_inc") == 0)
|
else if (strcmp(args[i], "--mds_log_read_inc") == 0)
|
||||||
|
@ -151,6 +151,7 @@ struct md_config_t {
|
|||||||
|
|
||||||
// journaler
|
// journaler
|
||||||
bool journaler_allow_split_entries;
|
bool journaler_allow_split_entries;
|
||||||
|
bool journaler_safe;
|
||||||
|
|
||||||
// mds
|
// mds
|
||||||
int mds_cache_size;
|
int mds_cache_size;
|
||||||
@ -166,7 +167,6 @@ struct md_config_t {
|
|||||||
int mds_log_max_trimming;
|
int mds_log_max_trimming;
|
||||||
int mds_log_read_inc;
|
int mds_log_read_inc;
|
||||||
int mds_log_pad_entry;
|
int mds_log_pad_entry;
|
||||||
bool mds_log_before_reply;
|
|
||||||
bool mds_log_flush_on_shutdown;
|
bool mds_log_flush_on_shutdown;
|
||||||
off_t mds_log_import_map_interval;
|
off_t mds_log_import_map_interval;
|
||||||
int mds_log_eopen_size;
|
int mds_log_eopen_size;
|
||||||
@ -190,9 +190,11 @@ struct md_config_t {
|
|||||||
float mds_bal_midchunk;
|
float mds_bal_midchunk;
|
||||||
float mds_bal_minchunk;
|
float mds_bal_minchunk;
|
||||||
|
|
||||||
|
bool mds_trim_on_rejoin;
|
||||||
bool mds_commit_on_shutdown;
|
bool mds_commit_on_shutdown;
|
||||||
int mds_shutdown_check;
|
int mds_shutdown_check;
|
||||||
bool mds_shutdown_on_last_unmount;
|
bool mds_shutdown_on_last_unmount;
|
||||||
|
|
||||||
bool mds_verify_export_dirauth; // debug flag
|
bool mds_verify_export_dirauth; // debug flag
|
||||||
|
|
||||||
bool mds_local_osd;
|
bool mds_local_osd;
|
||||||
@ -322,6 +324,8 @@ extern md_config_t g_debug_after_conf;
|
|||||||
#define dout(x) if ((x) <= g_conf.debug) std::cout
|
#define dout(x) if ((x) <= g_conf.debug) std::cout
|
||||||
#define dout2(x) if ((x) <= g_conf.debug) std::cout
|
#define dout2(x) if ((x) <= g_conf.debug) std::cout
|
||||||
|
|
||||||
|
#define pdout(x,p) if ((x) <= (p)) std::cout
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* for cleaner output, bracket each line with
|
* for cleaner output, bracket each line with
|
||||||
* dbeginl (in the dout macro) and dendl (in place of endl).
|
* dbeginl (in the dout macro) and dendl (in place of endl).
|
||||||
|
@ -1187,6 +1187,14 @@ void CDir::freeze_tree(Context *c)
|
|||||||
|
|
||||||
void CDir::freeze_tree_finish(Context *c)
|
void CDir::freeze_tree_finish(Context *c)
|
||||||
{
|
{
|
||||||
|
// still freezing? (we may have been canceled)
|
||||||
|
if (!is_freezing()) {
|
||||||
|
dout(10) << "freeze_tree_finish no longer freezing, done on " << *this << endl;
|
||||||
|
c->finish(-1);
|
||||||
|
delete c;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// freezeable now?
|
// freezeable now?
|
||||||
if (!is_freezeable()) {
|
if (!is_freezeable()) {
|
||||||
// wait again!
|
// wait again!
|
||||||
@ -1242,6 +1250,7 @@ void CDir::unfreeze_tree()
|
|||||||
state_clear(STATE_FREEZINGTREE);
|
state_clear(STATE_FREEZINGTREE);
|
||||||
|
|
||||||
// cancel freeze waiters
|
// cancel freeze waiters
|
||||||
|
finish_waiting(WAIT_UNFREEZE);
|
||||||
finish_waiting(WAIT_FREEZEABLE, -1);
|
finish_waiting(WAIT_FREEZEABLE, -1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,16 +61,12 @@ ostream& operator<<(ostream& out, CInode& in)
|
|||||||
|
|
||||||
out << " v" << in.get_version();
|
out << " v" << in.get_version();
|
||||||
|
|
||||||
out << " auth=" << in.authlock;
|
// locks
|
||||||
out << " link=" << in.linklock;
|
out << " " << in.authlock;
|
||||||
out << " dft=" << in.dirfragtreelock;
|
out << " " << in.linklock;
|
||||||
out << " file=" << in.filelock;
|
out << " " << in.dirfragtreelock;
|
||||||
out << " dir=" << in.dirlock;
|
out << " " << in.filelock;
|
||||||
|
out << " " << in.dirlock;
|
||||||
if (in.get_num_ref()) {
|
|
||||||
out << " |";
|
|
||||||
in.print_pin_set(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
// hack: spit out crap on which clients have caps
|
// hack: spit out crap on which clients have caps
|
||||||
if (!in.get_client_caps().empty()) {
|
if (!in.get_client_caps().empty()) {
|
||||||
@ -83,6 +79,12 @@ ostream& operator<<(ostream& out, CInode& in)
|
|||||||
}
|
}
|
||||||
out << "}";
|
out << "}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (in.get_num_ref()) {
|
||||||
|
out << " |";
|
||||||
|
in.print_pin_set(out);
|
||||||
|
}
|
||||||
|
|
||||||
out << " " << ∈
|
out << " " << ∈
|
||||||
out << "]";
|
out << "]";
|
||||||
return out;
|
return out;
|
||||||
|
@ -210,7 +210,7 @@ class FileLock : public SimpleLock {
|
|||||||
|
|
||||||
void print(ostream& out) {
|
void print(ostream& out) {
|
||||||
out << "(";
|
out << "(";
|
||||||
//out << get_lock_type_name(l.get_type()) << " ";
|
out << get_lock_type_name(get_type()) << " ";
|
||||||
out << get_filelock_state_name(get_state());
|
out << get_filelock_state_name(get_state());
|
||||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||||
if (is_rdlocked())
|
if (is_rdlocked())
|
||||||
|
@ -60,6 +60,7 @@
|
|||||||
|
|
||||||
void Locker::dispatch(Message *m)
|
void Locker::dispatch(Message *m)
|
||||||
{
|
{
|
||||||
|
|
||||||
switch (m->get_type()) {
|
switch (m->get_type()) {
|
||||||
|
|
||||||
// locking
|
// locking
|
||||||
@ -89,16 +90,20 @@ void Locker::send_lock_message(SimpleLock *lock, int msg)
|
|||||||
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
||||||
it != lock->get_parent()->replicas_end();
|
it != lock->get_parent()->replicas_end();
|
||||||
it++) {
|
it++) {
|
||||||
|
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
|
||||||
|
continue;
|
||||||
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
||||||
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Locker::send_lock_message(SimpleLock *lock, int msg, bufferlist &data)
|
void Locker::send_lock_message(SimpleLock *lock, int msg, const bufferlist &data)
|
||||||
{
|
{
|
||||||
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
|
||||||
it != lock->get_parent()->replicas_end();
|
it != lock->get_parent()->replicas_end();
|
||||||
it++) {
|
it++) {
|
||||||
|
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
|
||||||
|
continue;
|
||||||
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
MLock *m = new MLock(lock, msg, mds->get_nodeid());
|
||||||
m->set_data(data);
|
m->set_data(data);
|
||||||
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
|
||||||
@ -499,6 +504,8 @@ void Locker::request_inode_file_caps(CInode *in)
|
|||||||
assert(!in->is_auth());
|
assert(!in->is_auth());
|
||||||
|
|
||||||
in->replica_caps_wanted = wanted;
|
in->replica_caps_wanted = wanted;
|
||||||
|
|
||||||
|
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||||
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
|
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
|
||||||
in->replica_caps_wanted),
|
in->replica_caps_wanted),
|
||||||
auth, MDS_PORT_LOCKER);
|
auth, MDS_PORT_LOCKER);
|
||||||
@ -509,18 +516,23 @@ void Locker::request_inode_file_caps(CInode *in)
|
|||||||
|
|
||||||
void Locker::handle_inode_file_caps(MInodeFileCaps *m)
|
void Locker::handle_inode_file_caps(MInodeFileCaps *m)
|
||||||
{
|
{
|
||||||
|
// nobody should be talking to us during recovery.
|
||||||
|
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
|
||||||
|
|
||||||
|
// ok
|
||||||
CInode *in = mdcache->get_inode(m->get_ino());
|
CInode *in = mdcache->get_inode(m->get_ino());
|
||||||
assert(in);
|
assert(in);
|
||||||
assert(in->is_auth());// || in->is_proxy());
|
assert(in->is_auth());
|
||||||
|
|
||||||
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
|
if (mds->is_rejoin() &&
|
||||||
|
in->is_rejoining()) {
|
||||||
/*if (in->is_proxy()) {
|
dout(7) << "handle_inode_file_caps still rejoining " << *in << ", dropping " << *m << endl;
|
||||||
dout(7) << "proxy, fw" << endl;
|
delete m;
|
||||||
mds->send_message_mds(m, in->authority().first, MDS_PORT_LOCKER);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
|
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
|
||||||
|
|
||||||
if (m->get_caps())
|
if (m->get_caps())
|
||||||
in->mds_caps_wanted[m->get_from()] = m->get_caps();
|
in->mds_caps_wanted[m->get_from()] = m->get_caps();
|
||||||
@ -703,6 +715,9 @@ ALSO:
|
|||||||
|
|
||||||
void Locker::handle_lock(MLock *m)
|
void Locker::handle_lock(MLock *m)
|
||||||
{
|
{
|
||||||
|
// nobody should be talking to us during recovery.
|
||||||
|
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
|
||||||
|
|
||||||
switch (m->get_otype()) {
|
switch (m->get_otype()) {
|
||||||
case LOCK_OTYPE_DN:
|
case LOCK_OTYPE_DN:
|
||||||
{
|
{
|
||||||
@ -778,6 +793,15 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
|||||||
{
|
{
|
||||||
int from = m->get_asker();
|
int from = m->get_asker();
|
||||||
|
|
||||||
|
if (mds->is_rejoin()) {
|
||||||
|
if (lock->get_parent()->is_rejoining()) {
|
||||||
|
dout(7) << "handle_simple_lock still rejoining " << *lock->get_parent()
|
||||||
|
<< ", dropping " << *m << endl;
|
||||||
|
delete m;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (m->get_action()) {
|
switch (m->get_action()) {
|
||||||
// -- replica --
|
// -- replica --
|
||||||
case LOCK_AC_SYNC:
|
case LOCK_AC_SYNC:
|
||||||
@ -796,15 +820,12 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
|||||||
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock
|
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock
|
||||||
<< " on " << *lock->get_parent() << endl;
|
<< " on " << *lock->get_parent() << endl;
|
||||||
lock->set_state(LOCK_GLOCKR);
|
lock->set_state(LOCK_GLOCKR);
|
||||||
lock->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
} else {
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update lock and reply
|
// update lock and reply
|
||||||
lock->set_state(LOCK_LOCK);
|
lock->set_state(LOCK_LOCK);
|
||||||
|
|
||||||
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
||||||
from, MDS_PORT_LOCKER);
|
from, MDS_PORT_LOCKER);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
@ -815,6 +836,7 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
|||||||
MDRequest *mdr = mdcache->request_get(m->get_reqid());
|
MDRequest *mdr = mdcache->request_get(m->get_reqid());
|
||||||
mdr->xlocks.insert(lock);
|
mdr->xlocks.insert(lock);
|
||||||
mdr->locks.insert(lock);
|
mdr->locks.insert(lock);
|
||||||
|
lock->set_state(LOCK_REMOTEXLOCK);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
|
lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -879,40 +901,68 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class C_Locker_SimpleEval : public Context {
|
||||||
|
Locker *locker;
|
||||||
|
SimpleLock *lock;
|
||||||
|
public:
|
||||||
|
C_Locker_SimpleEval(Locker *l, SimpleLock *lk) : locker(l), lock(lk) {}
|
||||||
|
void finish(int r) {
|
||||||
|
locker->simple_eval(lock);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void Locker::simple_eval(SimpleLock *lock)
|
void Locker::simple_eval(SimpleLock *lock)
|
||||||
{
|
{
|
||||||
// finished gather?
|
// unstable and ambiguous auth?
|
||||||
if (lock->get_parent()->is_auth() &&
|
if (!lock->is_stable() &&
|
||||||
!lock->is_stable() &&
|
lock->get_parent()->is_ambiguous_auth()) {
|
||||||
!lock->is_gathering()) {
|
dout(7) << "simple_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
|
||||||
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
|
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||||
switch (lock->get_state()) {
|
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_SimpleEval(this, lock));
|
||||||
case LOCK_GLOCKR:
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// finished remote xlock?
|
||||||
|
if (lock->get_state() == LOCK_REMOTEXLOCK &&
|
||||||
|
!lock->is_xlocked()) {
|
||||||
|
// tell auth
|
||||||
|
assert(!lock->get_parent()->is_auth()); // should be auth_pinned on the auth
|
||||||
|
dout(7) << "simple_eval releasing remote xlock on " << *lock->get_parent() << endl;
|
||||||
|
int auth = lock->get_parent()->authority().first;
|
||||||
|
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||||
|
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
|
||||||
|
auth, MDS_PORT_LOCKER);
|
||||||
lock->set_state(LOCK_LOCK);
|
lock->set_state(LOCK_LOCK);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// finished gathering?
|
||||||
|
if (lock->get_state() == LOCK_GLOCKR &&
|
||||||
|
!lock->is_gathering() &&
|
||||||
|
!lock->is_rdlocked()) {
|
||||||
|
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
|
||||||
|
|
||||||
|
// replica: tell auth
|
||||||
|
if (!lock->get_parent()->is_auth()) {
|
||||||
|
int auth = lock->get_parent()->authority().first;
|
||||||
|
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
|
||||||
|
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
|
||||||
|
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
||||||
}
|
}
|
||||||
if (!lock->is_stable()) return;
|
|
||||||
|
|
||||||
if (lock->get_parent()->is_auth()) {
|
lock->set_state(LOCK_LOCK);
|
||||||
|
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR);
|
||||||
|
}
|
||||||
|
|
||||||
// sync?
|
// stable -> sync?
|
||||||
if (lock->get_state() != LOCK_SYNC &&
|
if (lock->get_parent()->is_auth() &&
|
||||||
lock->get_parent()->is_replicated() &&
|
lock->is_stable() &&
|
||||||
|
lock->get_state() != LOCK_SYNC &&
|
||||||
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
||||||
dout(7) << "simple_eval stable, syncing " << *lock
|
dout(7) << "simple_eval stable, syncing " << *lock
|
||||||
<< " on " << *lock->get_parent() << endl;
|
<< " on " << *lock->get_parent() << endl;
|
||||||
simple_sync(lock);
|
simple_sync(lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
// replica
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -930,12 +980,15 @@ void Locker::simple_sync(SimpleLock *lock)
|
|||||||
assert(0); // um... hmm!
|
assert(0); // um... hmm!
|
||||||
assert(lock->get_state() == LOCK_LOCK);
|
assert(lock->get_state() == LOCK_LOCK);
|
||||||
|
|
||||||
|
// sync.
|
||||||
|
if (lock->get_parent()->is_replicated()) {
|
||||||
// hard data
|
// hard data
|
||||||
bufferlist data;
|
bufferlist data;
|
||||||
lock->encode_locked_state(data);
|
lock->encode_locked_state(data);
|
||||||
|
|
||||||
// bcast to replicas
|
// bcast to replicas
|
||||||
send_lock_message(lock, LOCK_AC_SYNC, data);
|
send_lock_message(lock, LOCK_AC_SYNC, data);
|
||||||
|
}
|
||||||
|
|
||||||
// change lock
|
// change lock
|
||||||
lock->set_state(LOCK_SYNC);
|
lock->set_state(LOCK_SYNC);
|
||||||
@ -1015,11 +1068,9 @@ void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr)
|
|||||||
|
|
||||||
dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||||
|
|
||||||
if (lock->get_state() == LOCK_GLOCKR &&
|
// last one?
|
||||||
!lock->is_rdlocked()) {
|
if (!lock->is_rdlocked())
|
||||||
lock->set_state(LOCK_SYNC); // return state to sync, in case the unpinner flails
|
simple_eval(lock);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
||||||
@ -1066,6 +1117,7 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
|||||||
new C_MDS_RetryRequest(mdcache, mdr));
|
new C_MDS_RetryRequest(mdcache, mdr));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
int auth = lock->get_parent()->authority().first;
|
||||||
|
|
||||||
// wait for sync.
|
// wait for sync.
|
||||||
// (???????????)
|
// (???????????)
|
||||||
@ -1075,7 +1127,6 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// send lock request
|
// send lock request
|
||||||
int auth = lock->get_parent()->authority().first;
|
|
||||||
MLock *m = new MLock(lock, LOCK_AC_REQXLOCK, mds->get_nodeid());
|
MLock *m = new MLock(lock, LOCK_AC_REQXLOCK, mds->get_nodeid());
|
||||||
mds->send_message_mds(m, auth, MDS_PORT_LOCKER);
|
mds->send_message_mds(m, auth, MDS_PORT_LOCKER);
|
||||||
|
|
||||||
@ -1091,29 +1142,16 @@ void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr)
|
|||||||
// drop ref
|
// drop ref
|
||||||
assert(lock->can_xlock(mdr));
|
assert(lock->can_xlock(mdr));
|
||||||
lock->put_xlock();
|
lock->put_xlock();
|
||||||
|
assert(mdr);
|
||||||
mdr->xlocks.erase(lock);
|
mdr->xlocks.erase(lock);
|
||||||
mdr->locks.erase(lock);
|
mdr->locks.erase(lock);
|
||||||
dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||||
|
|
||||||
// slave?
|
|
||||||
if (!lock->get_parent()->is_auth()) {
|
|
||||||
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
|
|
||||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
|
||||||
}
|
|
||||||
|
|
||||||
// others waiting?
|
// others waiting?
|
||||||
if (lock->is_waiter_for(SimpleLock::WAIT_WR)) {
|
|
||||||
// wake 'em up
|
|
||||||
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
||||||
} else {
|
|
||||||
// auto-sync if alone.
|
|
||||||
if (lock->get_parent()->is_auth() &&
|
|
||||||
!lock->get_parent()->is_replicated() &&
|
|
||||||
lock->get_state() != LOCK_SYNC)
|
|
||||||
lock->set_state(LOCK_SYNC);
|
|
||||||
|
|
||||||
|
// eval
|
||||||
simple_eval(lock);
|
simple_eval(lock);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1261,19 +1299,43 @@ void Locker::scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr)
|
|||||||
scatter_eval(lock);
|
scatter_eval(lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class C_Locker_ScatterEval : public Context {
|
||||||
|
Locker *locker;
|
||||||
|
ScatterLock *lock;
|
||||||
|
public:
|
||||||
|
C_Locker_ScatterEval(Locker *l, ScatterLock *lk) : locker(l), lock(lk) {}
|
||||||
|
void finish(int r) {
|
||||||
|
locker->scatter_eval(lock);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void Locker::scatter_eval(ScatterLock *lock)
|
void Locker::scatter_eval(ScatterLock *lock)
|
||||||
{
|
{
|
||||||
|
// unstable and ambiguous auth?
|
||||||
|
if (!lock->is_stable() &&
|
||||||
|
lock->get_parent()->is_ambiguous_auth()) {
|
||||||
|
dout(7) << "scatter_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
|
||||||
|
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||||
|
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_ScatterEval(this, lock));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!lock->get_parent()->is_auth()) {
|
if (!lock->get_parent()->is_auth()) {
|
||||||
// REPLICA
|
// REPLICA
|
||||||
|
|
||||||
if (lock->get_state() == LOCK_GSYNCS &&
|
if (lock->get_state() == LOCK_GSYNCS &&
|
||||||
!lock->is_wrlocked()) {
|
!lock->is_wrlocked()) {
|
||||||
dout(10) << "scatter_eval no wrlocks, acking sync" << endl;
|
dout(10) << "scatter_eval no wrlocks, acking sync" << endl;
|
||||||
|
int auth = lock->get_parent()->authority().first;
|
||||||
|
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) {
|
||||||
bufferlist data;
|
bufferlist data;
|
||||||
lock->encode_locked_state(data);
|
lock->encode_locked_state(data);
|
||||||
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
|
||||||
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
|
auth, MDS_PORT_LOCKER);
|
||||||
|
}
|
||||||
lock->set_state(LOCK_SYNC);
|
lock->set_state(LOCK_SYNC);
|
||||||
|
lock->finish_waiters(ScatterLock::WAIT_STABLE); // ?
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -1286,7 +1348,7 @@ void Locker::scatter_eval(ScatterLock *lock)
|
|||||||
dout(7) << "scatter_eval finished gather/un-wrlock on " << *lock
|
dout(7) << "scatter_eval finished gather/un-wrlock on " << *lock
|
||||||
<< " on " << *lock->get_parent() << endl;
|
<< " on " << *lock->get_parent() << endl;
|
||||||
lock->set_state(LOCK_SYNC);
|
lock->set_state(LOCK_SYNC);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_RD|SimpleLock::WAIT_NOLOCKS);
|
lock->finish_waiters(ScatterLock::WAIT_STABLE|ScatterLock::WAIT_RD);
|
||||||
}
|
}
|
||||||
|
|
||||||
// gscatters -> scatter?
|
// gscatters -> scatter?
|
||||||
@ -1301,13 +1363,15 @@ void Locker::scatter_eval(ScatterLock *lock)
|
|||||||
}
|
}
|
||||||
|
|
||||||
lock->set_state(LOCK_SCATTER);
|
lock->set_state(LOCK_SCATTER);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
lock->get_wrlock();
|
||||||
|
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||||
|
lock->put_wrlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// waiting for rd?
|
// waiting for rd?
|
||||||
if (lock->get_state() == LOCK_SCATTER &&
|
if (lock->get_state() == LOCK_SCATTER &&
|
||||||
!lock->is_wrlocked() &&
|
!lock->is_wrlocked() &&
|
||||||
lock->is_waiter_for(SimpleLock::WAIT_RD)) {
|
lock->is_waiter_for(ScatterLock::WAIT_RD)) {
|
||||||
dout(10) << "scatter_eval no wrlocks, read waiter, syncing" << endl;
|
dout(10) << "scatter_eval no wrlocks, read waiter, syncing" << endl;
|
||||||
scatter_sync(lock);
|
scatter_sync(lock);
|
||||||
}
|
}
|
||||||
@ -1339,9 +1403,10 @@ void Locker::scatter_sync(ScatterLock *lock)
|
|||||||
}
|
}
|
||||||
else if (lock->is_wrlocked()) {
|
else if (lock->is_wrlocked()) {
|
||||||
lock->set_state(LOCK_GSYNCS);
|
lock->set_state(LOCK_GSYNCS);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
lock->set_state(LOCK_SYNC);
|
lock->set_state(LOCK_SYNC);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
|
lock->finish_waiters(ScatterLock::WAIT_RD|ScatterLock::WAIT_STABLE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1365,7 +1430,7 @@ void Locker::scatter_scatter(ScatterLock *lock)
|
|||||||
send_lock_message(lock, LOCK_AC_SCATTER, data);
|
send_lock_message(lock, LOCK_AC_SCATTER, data);
|
||||||
}
|
}
|
||||||
lock->set_state(LOCK_SCATTER);
|
lock->set_state(LOCK_SCATTER);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1375,6 +1440,15 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
|||||||
{
|
{
|
||||||
int from = m->get_asker();
|
int from = m->get_asker();
|
||||||
|
|
||||||
|
if (mds->is_rejoin()) {
|
||||||
|
if (lock->get_parent()->is_rejoining()) {
|
||||||
|
dout(7) << "handle_scatter_lock still rejoining " << *lock->get_parent()
|
||||||
|
<< ", dropping " << *m << endl;
|
||||||
|
delete m;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (m->get_action()) {
|
switch (m->get_action()) {
|
||||||
// -- replica --
|
// -- replica --
|
||||||
case LOCK_AC_SYNC:
|
case LOCK_AC_SYNC:
|
||||||
@ -1398,7 +1472,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
|||||||
assert(lock->get_state() == LOCK_SYNC);
|
assert(lock->get_state() == LOCK_SYNC);
|
||||||
lock->decode_locked_state(m->get_data());
|
lock->decode_locked_state(m->get_data());
|
||||||
lock->set_state(LOCK_SCATTER);
|
lock->set_state(LOCK_SCATTER);
|
||||||
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
|
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// -- for auth --
|
// -- for auth --
|
||||||
@ -1416,7 +1490,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
|
|||||||
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
|
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
|
||||||
<< " from " << from << ", last one"
|
<< " from " << from << ", last one"
|
||||||
<< endl;
|
<< endl;
|
||||||
simple_eval(lock);
|
scatter_eval(lock);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1508,10 +1582,8 @@ void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr)
|
|||||||
|
|
||||||
dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
|
||||||
|
|
||||||
if (!lock->is_rdlocked()) {
|
if (!lock->is_rdlocked())
|
||||||
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
|
|
||||||
file_eval(lock);
|
file_eval(lock);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1569,8 +1641,11 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
|
|||||||
|
|
||||||
assert(lock->get_parent()->is_auth()); // or implement remote xlocks
|
assert(lock->get_parent()->is_auth()); // or implement remote xlocks
|
||||||
|
|
||||||
// drop lock?
|
// others waiting?
|
||||||
if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
|
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
|
||||||
|
|
||||||
|
//// drop lock?
|
||||||
|
//if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
|
||||||
file_eval(lock);
|
file_eval(lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1582,11 +1657,31 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
|
|||||||
* - checks if we're in unstable sfot state and can now move on to next state
|
* - checks if we're in unstable sfot state and can now move on to next state
|
||||||
* - checks if soft state should change (eg bc last writer closed)
|
* - checks if soft state should change (eg bc last writer closed)
|
||||||
*/
|
*/
|
||||||
|
class C_Locker_FileEval : public Context {
|
||||||
|
Locker *locker;
|
||||||
|
FileLock *lock;
|
||||||
|
public:
|
||||||
|
C_Locker_FileEval(Locker *l, FileLock *lk) : locker(l), lock(lk) {}
|
||||||
|
void finish(int r) {
|
||||||
|
locker->file_eval(lock);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
void Locker::file_eval(FileLock *lock)
|
void Locker::file_eval(FileLock *lock)
|
||||||
{
|
{
|
||||||
CInode *in = (CInode*)lock->get_parent();
|
CInode *in = (CInode*)lock->get_parent();
|
||||||
|
|
||||||
|
// unstable and ambiguous auth?
|
||||||
|
if (!lock->is_stable() &&
|
||||||
|
in->is_ambiguous_auth()) {
|
||||||
|
dout(7) << "file_eval not stable and ambiguous auth, waiting on " << *in << endl;
|
||||||
|
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
|
||||||
|
in->add_waiter(CInode::WAIT_SINGLEAUTH, new C_Locker_FileEval(this, lock));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int issued = in->get_caps_issued();
|
int issued = in->get_caps_issued();
|
||||||
|
|
||||||
// [auth] finished gather?
|
// [auth] finished gather?
|
||||||
@ -1605,7 +1700,7 @@ void Locker::file_eval(FileLock *lock)
|
|||||||
|
|
||||||
// waiters
|
// waiters
|
||||||
lock->get_rdlock();
|
lock->get_rdlock();
|
||||||
lock->finish_waiters(SimpleLock::WAIT_STABLE);
|
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR|SimpleLock::WAIT_RD);
|
||||||
lock->put_rdlock();
|
lock->put_rdlock();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -1789,19 +1884,15 @@ bool Locker::file_sync(FileLock *lock)
|
|||||||
|
|
||||||
if (lock->get_state() == LOCK_LOCK) {
|
if (lock->get_state() == LOCK_LOCK) {
|
||||||
if (in->is_replicated()) {
|
if (in->is_replicated()) {
|
||||||
// soft data
|
|
||||||
bufferlist softdata;
|
bufferlist softdata;
|
||||||
lock->encode_locked_state(softdata);
|
lock->encode_locked_state(softdata);
|
||||||
|
|
||||||
// bcast to replicas
|
|
||||||
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// change lock
|
// change lock
|
||||||
lock->set_state(LOCK_SYNC);
|
lock->set_state(LOCK_SYNC);
|
||||||
|
|
||||||
// reissue caps
|
issue_caps(in); // reissue caps
|
||||||
issue_caps(in);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1813,10 +1904,10 @@ bool Locker::file_sync(FileLock *lock)
|
|||||||
issue_caps(in);
|
issue_caps(in);
|
||||||
} else {
|
} else {
|
||||||
// no writers, go straight to sync
|
// no writers, go straight to sync
|
||||||
|
|
||||||
if (in->is_replicated()) {
|
if (in->is_replicated()) {
|
||||||
// bcast to replicas
|
bufferlist softdata;
|
||||||
send_lock_message(lock, LOCK_AC_SYNC);
|
lock->encode_locked_state(softdata);
|
||||||
|
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// change lock
|
// change lock
|
||||||
@ -1834,8 +1925,9 @@ bool Locker::file_sync(FileLock *lock)
|
|||||||
} else {
|
} else {
|
||||||
// no writers, go straight to sync
|
// no writers, go straight to sync
|
||||||
if (in->is_replicated()) {
|
if (in->is_replicated()) {
|
||||||
// bcast to replicas
|
bufferlist softdata;
|
||||||
send_lock_message(lock, LOCK_AC_SYNC);
|
lock->encode_locked_state(softdata);
|
||||||
|
send_lock_message(lock, LOCK_AC_SYNC, softdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// change lock
|
// change lock
|
||||||
@ -2070,6 +2162,16 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
|
|||||||
CInode *in = (CInode*)lock->get_parent();
|
CInode *in = (CInode*)lock->get_parent();
|
||||||
int from = m->get_asker();
|
int from = m->get_asker();
|
||||||
|
|
||||||
|
if (mds->is_rejoin()) {
|
||||||
|
if (in->is_rejoining()) {
|
||||||
|
dout(7) << "handle_file_lock still rejoining " << *in
|
||||||
|
<< ", dropping " << *m << endl;
|
||||||
|
delete m;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
|
dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
|
||||||
<< *in << " filelock=" << *lock << endl;
|
<< *in << " filelock=" << *lock << endl;
|
||||||
|
|
||||||
@ -2104,12 +2206,11 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
|
|||||||
}
|
}
|
||||||
if (lock->is_rdlocked()) {
|
if (lock->is_rdlocked()) {
|
||||||
dout(7) << "handle_file_lock rdlocked, waiting before ack on " << *in << endl;
|
dout(7) << "handle_file_lock rdlocked, waiting before ack on " << *in << endl;
|
||||||
in->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
|
|
||||||
lock->set_state(LOCK_GLOCKR);
|
lock->set_state(LOCK_GLOCKR);
|
||||||
assert(0);// i am broken.. why retry message when state captures all the info i need?
|
break;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if (issued & CAP_FILE_RD) {
|
if (issued & CAP_FILE_RD) {
|
||||||
|
dout(7) << "handle_file_lock RD cap issued, waiting before ack on " << *in << endl;
|
||||||
lock->set_state(LOCK_GLOCKR);
|
lock->set_state(LOCK_GLOCKR);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -60,7 +60,7 @@ private:
|
|||||||
void handle_lock(MLock *m);
|
void handle_lock(MLock *m);
|
||||||
|
|
||||||
void send_lock_message(SimpleLock *lock, int msg);
|
void send_lock_message(SimpleLock *lock, int msg);
|
||||||
void send_lock_message(SimpleLock *lock, int msg, bufferlist &data);
|
void send_lock_message(SimpleLock *lock, int msg, const bufferlist &data);
|
||||||
|
|
||||||
// -- locks --
|
// -- locks --
|
||||||
bool acquire_locks(MDRequest *mdr,
|
bool acquire_locks(MDRequest *mdr,
|
||||||
|
@ -706,13 +706,22 @@ void MDBalancer::find_exports(CDir *dir,
|
|||||||
void MDBalancer::hit_inode(CInode *in, int type)
|
void MDBalancer::hit_inode(CInode *in, int type)
|
||||||
{
|
{
|
||||||
// hit me
|
// hit me
|
||||||
in->popularity[MDS_POP_JUSTME].pop[type].hit();
|
float me = in->popularity[MDS_POP_JUSTME].pop[type].hit();
|
||||||
in->popularity[MDS_POP_NESTED].pop[type].hit();
|
float nested = in->popularity[MDS_POP_NESTED].pop[type].hit();
|
||||||
|
float curdom = 0;
|
||||||
|
float anydom = 0;
|
||||||
if (in->is_auth()) {
|
if (in->is_auth()) {
|
||||||
in->popularity[MDS_POP_CURDOM].pop[type].hit();
|
curdom = in->popularity[MDS_POP_CURDOM].pop[type].hit();
|
||||||
in->popularity[MDS_POP_ANYDOM].pop[type].hit();
|
anydom = in->popularity[MDS_POP_ANYDOM].pop[type].hit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dout(20) << "hit_inode " << type << " pop " << me << " me, "
|
||||||
|
<< nested << " nested, "
|
||||||
|
<< curdom << " curdom, "
|
||||||
|
<< anydom << " anydom"
|
||||||
|
<< " on " << *in
|
||||||
|
<< endl;
|
||||||
|
|
||||||
// hit auth up to import
|
// hit auth up to import
|
||||||
CDir *dir = in->get_parent_dir();
|
CDir *dir = in->get_parent_dir();
|
||||||
if (dir) hit_dir(dir, type);
|
if (dir) hit_dir(dir, type);
|
||||||
@ -728,7 +737,8 @@ void MDBalancer::hit_dir(CDir *dir, int type)
|
|||||||
if (g_conf.num_mds > 2 && // FIXME >2 thing
|
if (g_conf.num_mds > 2 && // FIXME >2 thing
|
||||||
!dir->inode->is_root() && // not root (for now at least)
|
!dir->inode->is_root() && // not root (for now at least)
|
||||||
dir->is_auth()) {
|
dir->is_auth()) {
|
||||||
//dout(-20) << "hit_dir " << type << " pop is " << v << " " << *dir << endl;
|
dout(20) << "hit_dir " << type << " pop " << v << " me "
|
||||||
|
<< *dir << endl;
|
||||||
|
|
||||||
// hash this dir? (later?)
|
// hash this dir? (later?)
|
||||||
if (((v > g_conf.mds_bal_hash_rd && type == META_POP_IRD) ||
|
if (((v > g_conf.mds_bal_hash_rd && type == META_POP_IRD) ||
|
||||||
@ -756,6 +766,8 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
|||||||
// replicate?
|
// replicate?
|
||||||
float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
|
float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
|
||||||
|
|
||||||
|
dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
|
||||||
|
|
||||||
if (dir->is_auth()) {
|
if (dir->is_auth()) {
|
||||||
if (!dir->is_rep() &&
|
if (!dir->is_rep() &&
|
||||||
dir_pop >= g_conf.mds_bal_replicate_threshold) {
|
dir_pop >= g_conf.mds_bal_replicate_threshold) {
|
||||||
@ -764,7 +776,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
|||||||
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
|
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
|
||||||
rd_adj /= 2.0; // temper somewhat
|
rd_adj /= 2.0; // temper somewhat
|
||||||
|
|
||||||
dout(1) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
|
dout(2) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
|
||||||
|
|
||||||
dir->dir_rep = CDir::REP_ALL;
|
dir->dir_rep = CDir::REP_ALL;
|
||||||
mds->mdcache->send_dir_updates(dir, true);
|
mds->mdcache->send_dir_updates(dir, true);
|
||||||
@ -777,7 +789,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
|
|||||||
dir->is_rep() &&
|
dir->is_rep() &&
|
||||||
dir_pop < g_conf.mds_bal_unreplicate_threshold) {
|
dir_pop < g_conf.mds_bal_unreplicate_threshold) {
|
||||||
// unreplicate
|
// unreplicate
|
||||||
dout(1) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
|
dout(2) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
|
||||||
|
|
||||||
dir->dir_rep = CDir::REP_NONE;
|
dir->dir_rep = CDir::REP_NONE;
|
||||||
mds->mdcache->send_dir_updates(dir);
|
mds->mdcache->send_dir_updates(dir);
|
||||||
|
@ -42,6 +42,7 @@
|
|||||||
#include "events/ESlaveUpdate.h"
|
#include "events/ESlaveUpdate.h"
|
||||||
#include "events/EString.h"
|
#include "events/EString.h"
|
||||||
#include "events/EPurgeFinish.h"
|
#include "events/EPurgeFinish.h"
|
||||||
|
#include "events/EImportFinish.h"
|
||||||
|
|
||||||
#include "messages/MGenericMessage.h"
|
#include "messages/MGenericMessage.h"
|
||||||
|
|
||||||
@ -1200,9 +1201,11 @@ void MDCache::disambiguate_imports()
|
|||||||
if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
|
if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
|
||||||
dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
|
dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
|
||||||
cancel_ambiguous_import(q->first);
|
cancel_ambiguous_import(q->first);
|
||||||
|
mds->mdlog->submit_entry(new EImportFinish(dir, false));
|
||||||
} else {
|
} else {
|
||||||
dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
|
dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
|
||||||
finish_ambiguous_import(q->first);
|
finish_ambiguous_import(q->first);
|
||||||
|
mds->mdlog->submit_entry(new EImportFinish(dir, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(my_ambiguous_imports.empty());
|
assert(my_ambiguous_imports.empty());
|
||||||
@ -1262,50 +1265,71 @@ void MDCache::finish_ambiguous_import(dirfrag_t df)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/** recalc_auth_bits()
|
||||||
* once subtree auth is disambiguated, we need to adjust all the
|
* once subtree auth is disambiguated, we need to adjust all the
|
||||||
* auth (and dirty) bits in our cache before moving on.
|
* auth and dirty bits in our cache before moving on.
|
||||||
*/
|
*/
|
||||||
void MDCache::recalc_auth_bits()
|
void MDCache::recalc_auth_bits()
|
||||||
{
|
{
|
||||||
dout(7) << "recalc_auth_bits" << endl;
|
dout(7) << "recalc_auth_bits" << endl;
|
||||||
|
|
||||||
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
|
for (map<CDir*,set<CDir*> >::iterator p = subtrees.begin();
|
||||||
p != inode_map.end();
|
p != subtrees.end();
|
||||||
++p) {
|
++p) {
|
||||||
CInode *in = p->second;
|
list<CDir*> dfq; // dirfrag queue
|
||||||
if (in->authority().first == mds->get_nodeid())
|
dfq.push_back(p->first);
|
||||||
in->state_set(CInode::STATE_AUTH);
|
|
||||||
else {
|
|
||||||
in->state_clear(CInode::STATE_AUTH);
|
|
||||||
if (in->is_dirty())
|
|
||||||
in->mark_clean();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in->parent) {
|
bool auth = p->first->authority().first == mds->get_nodeid();
|
||||||
if (in->parent->authority().first == mds->get_nodeid())
|
dout(10) << " subtree auth=" << auth << " for " << *p->first << endl;
|
||||||
in->parent->state_set(CDentry::STATE_AUTH);
|
|
||||||
else {
|
|
||||||
in->parent->state_clear(CDentry::STATE_AUTH);
|
|
||||||
if (in->parent->is_dirty())
|
|
||||||
in->parent->mark_clean();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list<CDir*> ls;
|
while (!dfq.empty()) {
|
||||||
for (list<CDir*>::iterator p = ls.begin();
|
CDir *dir = dfq.front();
|
||||||
p != ls.end();
|
dfq.pop_front();
|
||||||
++p) {
|
|
||||||
CDir *dir = *p;
|
// dir
|
||||||
if (dir->authority().first == mds->get_nodeid())
|
if (auth)
|
||||||
dir->state_set(CDir::STATE_AUTH);
|
dir->state_set(CDir::STATE_AUTH);
|
||||||
else {
|
else {
|
||||||
|
dir->state_set(CDir::STATE_REJOINING);
|
||||||
dir->state_clear(CDir::STATE_AUTH);
|
dir->state_clear(CDir::STATE_AUTH);
|
||||||
if (dir->is_dirty())
|
if (dir->is_dirty())
|
||||||
dir->mark_clean();
|
dir->mark_clean();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dentries in this dir
|
||||||
|
for (map<string,CDentry*>::iterator q = dir->items.begin();
|
||||||
|
q != dir->items.end();
|
||||||
|
++q) {
|
||||||
|
// dn
|
||||||
|
CDentry *dn = q->second;
|
||||||
|
if (auth)
|
||||||
|
dn->state_set(CDentry::STATE_AUTH);
|
||||||
|
else {
|
||||||
|
dn->state_set(CDentry::STATE_REJOINING);
|
||||||
|
dn->state_clear(CDentry::STATE_AUTH);
|
||||||
|
if (dn->is_dirty())
|
||||||
|
dn->mark_clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dn->is_primary()) {
|
||||||
|
// inode
|
||||||
|
if (auth)
|
||||||
|
dn->inode->state_set(CInode::STATE_AUTH);
|
||||||
|
else {
|
||||||
|
dn->inode->state_set(CInode::STATE_REJOINING);
|
||||||
|
dn->inode->state_clear(CInode::STATE_AUTH);
|
||||||
|
if (dn->inode->is_dirty())
|
||||||
|
dn->inode->mark_clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
// recurse?
|
||||||
|
if (dn->inode->is_dir())
|
||||||
|
dn->inode->get_nested_dirfrags(dfq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
show_subtrees();
|
show_subtrees();
|
||||||
show_cache();
|
show_cache();
|
||||||
}
|
}
|
||||||
@ -1410,7 +1434,8 @@ void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
|
|||||||
in->authlock.get_state(),
|
in->authlock.get_state(),
|
||||||
in->linklock.get_state(),
|
in->linklock.get_state(),
|
||||||
in->dirfragtreelock.get_state(),
|
in->dirfragtreelock.get_state(),
|
||||||
in->filelock.get_state());
|
in->filelock.get_state(),
|
||||||
|
in->dirlock.get_state());
|
||||||
if (in->authlock.is_xlocked())
|
if (in->authlock.is_xlocked())
|
||||||
rejoin->add_inode_xlock(in->ino(), in->authlock.get_type(),
|
rejoin->add_inode_xlock(in->ino(), in->authlock.get_type(),
|
||||||
in->authlock.get_xlocked_by()->reqid);
|
in->authlock.get_xlocked_by()->reqid);
|
||||||
@ -1489,7 +1514,7 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
|||||||
if (mds->is_active() || mds->is_stopping()) {
|
if (mds->is_active() || mds->is_stopping()) {
|
||||||
dout(10) << "i am active. removing stale cache replicas" << endl;
|
dout(10) << "i am active. removing stale cache replicas" << endl;
|
||||||
|
|
||||||
// first, scour cache of replica references
|
// first, scour cache of unmentioned replica references
|
||||||
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
|
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
|
||||||
p != inode_map.end();
|
p != inode_map.end();
|
||||||
++p) {
|
++p) {
|
||||||
@ -1548,6 +1573,7 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
|||||||
if (dn) {
|
if (dn) {
|
||||||
int nonce = dn->add_replica(from);
|
int nonce = dn->add_replica(from);
|
||||||
dout(10) << " have " << *dn << endl;
|
dout(10) << " have " << *dn << endl;
|
||||||
|
if (ack)
|
||||||
ack->add_strong_dentry(*p, *q, dn->lock.get_state(), nonce);
|
ack->add_strong_dentry(*p, *q, dn->lock.get_state(), nonce);
|
||||||
} else {
|
} else {
|
||||||
dout(10) << " missing " << *p << " " << *q << endl;
|
dout(10) << " missing " << *p << " " << *q << endl;
|
||||||
@ -1578,19 +1604,22 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
|||||||
if (in) {
|
if (in) {
|
||||||
int nonce = in->add_replica(from);
|
int nonce = in->add_replica(from);
|
||||||
in->mds_caps_wanted.erase(from);
|
in->mds_caps_wanted.erase(from);
|
||||||
in->authlock.remove_gather(from); // just in case
|
|
||||||
in->linklock.remove_gather(from); // just in case
|
|
||||||
in->dirfragtreelock.remove_gather(from); // just in case
|
|
||||||
in->filelock.remove_gather(from); // just in case
|
|
||||||
dout(10) << " have (weak) " << *in << endl;
|
dout(10) << " have (weak) " << *in << endl;
|
||||||
if (ack)
|
if (ack) {
|
||||||
|
in->authlock.remove_gather(from);
|
||||||
|
in->linklock.remove_gather(from);
|
||||||
|
in->dirfragtreelock.remove_gather(from);
|
||||||
|
in->filelock.remove_gather(from);
|
||||||
|
in->dirlock.remove_gather(from);
|
||||||
ack->add_strong_inode(in->ino(),
|
ack->add_strong_inode(in->ino(),
|
||||||
nonce,
|
nonce,
|
||||||
0,
|
0,
|
||||||
in->authlock.get_replica_state(),
|
in->authlock.get_replica_state(),
|
||||||
in->linklock.get_replica_state(),
|
in->linklock.get_replica_state(),
|
||||||
in->dirfragtreelock.get_replica_state(),
|
in->dirfragtreelock.get_replica_state(),
|
||||||
in->filelock.get_replica_state());
|
in->filelock.get_replica_state(),
|
||||||
|
in->dirlock.get_replica_state());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
dout(10) << " missing " << *p << endl;
|
dout(10) << " missing " << *p << endl;
|
||||||
if (!missing) missing = new MMDSCacheRejoin(MMDSCacheRejoin::OP_MISSING);
|
if (!missing) missing = new MMDSCacheRejoin(MMDSCacheRejoin::OP_MISSING);
|
||||||
@ -1609,23 +1638,34 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
|
|||||||
in->mds_caps_wanted[from] = p->second.caps_wanted;
|
in->mds_caps_wanted[from] = p->second.caps_wanted;
|
||||||
else
|
else
|
||||||
in->mds_caps_wanted.erase(from);
|
in->mds_caps_wanted.erase(from);
|
||||||
in->authlock.remove_gather(from); // just in case
|
|
||||||
in->linklock.remove_gather(from); // just in case
|
|
||||||
in->dirfragtreelock.remove_gather(from); // just in case
|
|
||||||
in->filelock.remove_gather(from); // just in case
|
|
||||||
dout(10) << " have (strong) " << *in << endl;
|
dout(10) << " have (strong) " << *in << endl;
|
||||||
if (ack) {
|
if (ack) {
|
||||||
|
// i had inode, just tell replica the correct state
|
||||||
|
in->authlock.remove_gather(from);
|
||||||
|
in->linklock.remove_gather(from);
|
||||||
|
in->dirfragtreelock.remove_gather(from);
|
||||||
|
in->filelock.remove_gather(from);
|
||||||
|
in->dirlock.remove_gather(from);
|
||||||
ack->add_strong_inode(in->ino(),
|
ack->add_strong_inode(in->ino(),
|
||||||
nonce,
|
nonce,
|
||||||
0,
|
0,
|
||||||
in->authlock.get_replica_state(),
|
in->authlock.get_replica_state(),
|
||||||
in->linklock.get_replica_state(),
|
in->linklock.get_replica_state(),
|
||||||
in->dirfragtreelock.get_replica_state(),
|
in->dirfragtreelock.get_replica_state(),
|
||||||
in->filelock.get_replica_state());
|
in->filelock.get_replica_state(),
|
||||||
|
in->dirlock.get_replica_state());
|
||||||
} else {
|
} else {
|
||||||
// note strong replica filelock state requests
|
// take note of replica state values.
|
||||||
//if (p->second.filelock & CAP_FILE_RD)
|
// SimpleLock --
|
||||||
//filelock_replica_readers.insert(in);
|
// we can ignore; locked replicas can be safely changed to sync.
|
||||||
|
// FileLock --
|
||||||
|
// we can also ignore.
|
||||||
|
// replicas will at most issue RDCACHE|RD, which is covered by the default SYNC,
|
||||||
|
// so only _locally_ opened files are significant.
|
||||||
|
// ScatterLock -- adjust accordingly
|
||||||
|
if (p->second.dirlock == LOCK_SCATTER ||
|
||||||
|
p->second.dirlock == LOCK_GSCATTERS) // replica still has rdlocks
|
||||||
|
in->dirlock.set_state(LOCK_SCATTER);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dout(10) << " missing " << p->first << endl;
|
dout(10) << " missing " << p->first << endl;
|
||||||
@ -1711,6 +1751,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
|||||||
assert(dir);
|
assert(dir);
|
||||||
|
|
||||||
dir->set_replica_nonce(p->second.nonce);
|
dir->set_replica_nonce(p->second.nonce);
|
||||||
|
dir->state_clear(CDir::STATE_REJOINING);
|
||||||
dout(10) << " got " << *dir << endl;
|
dout(10) << " got " << *dir << endl;
|
||||||
|
|
||||||
// dentries
|
// dentries
|
||||||
@ -1721,6 +1762,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
|||||||
assert(dn);
|
assert(dn);
|
||||||
dn->set_replica_nonce(q->second.nonce);
|
dn->set_replica_nonce(q->second.nonce);
|
||||||
dn->lock.set_state(q->second.lock);
|
dn->lock.set_state(q->second.lock);
|
||||||
|
dn->state_clear(CDentry::STATE_REJOINING);
|
||||||
dout(10) << " got " << *dn << endl;
|
dout(10) << " got " << *dn << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1736,6 +1778,8 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
|
|||||||
in->linklock.set_state(p->second.linklock);
|
in->linklock.set_state(p->second.linklock);
|
||||||
in->dirfragtreelock.set_state(p->second.dirfragtreelock);
|
in->dirfragtreelock.set_state(p->second.dirfragtreelock);
|
||||||
in->filelock.set_state(p->second.filelock);
|
in->filelock.set_state(p->second.filelock);
|
||||||
|
in->dirlock.set_state(p->second.dirlock);
|
||||||
|
in->state_clear(CInode::STATE_REJOINING);
|
||||||
dout(10) << " got " << *in << endl;
|
dout(10) << " got " << *in << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1767,7 +1811,11 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
|||||||
p != m->weak_dirfrags.end();
|
p != m->weak_dirfrags.end();
|
||||||
++p) {
|
++p) {
|
||||||
CDir *dir = get_dirfrag(*p);
|
CDir *dir = get_dirfrag(*p);
|
||||||
assert(dir);
|
if (!dir) {
|
||||||
|
dout(10) << " don't have dirfrag " << *p << endl;
|
||||||
|
continue; // we must have trimmed it after the original rejoin
|
||||||
|
}
|
||||||
|
|
||||||
dout(10) << " sending " << *dir << endl;
|
dout(10) << " sending " << *dir << endl;
|
||||||
|
|
||||||
// dentries
|
// dentries
|
||||||
@ -1775,7 +1823,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
|||||||
q != m->weak_dentries[*p].end();
|
q != m->weak_dentries[*p].end();
|
||||||
++q) {
|
++q) {
|
||||||
CDentry *dn = dir->lookup(*q);
|
CDentry *dn = dir->lookup(*q);
|
||||||
assert(dn);
|
if (!dn) {
|
||||||
|
dout(10) << " don't have dentry " << *q << " in " << *dir << endl;
|
||||||
|
continue; // we must have trimmed it after our original rejoin
|
||||||
|
}
|
||||||
dout(10) << " sending " << *dn << endl;
|
dout(10) << " sending " << *dn << endl;
|
||||||
if (mds->is_rejoin())
|
if (mds->is_rejoin())
|
||||||
full->add_weak_dentry(*p, *q);
|
full->add_weak_dentry(*p, *q);
|
||||||
@ -1789,7 +1840,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
|||||||
p != m->weak_inodes.end();
|
p != m->weak_inodes.end();
|
||||||
++p) {
|
++p) {
|
||||||
CInode *in = get_inode(*p);
|
CInode *in = get_inode(*p);
|
||||||
assert(in);
|
if (!in) {
|
||||||
|
dout(10) << " don't have inode " << *p << endl;
|
||||||
|
continue; // we must have trimmed it after the originalo rejoin
|
||||||
|
}
|
||||||
|
|
||||||
dout(10) << " sending " << *in << endl;
|
dout(10) << " sending " << *in << endl;
|
||||||
full->add_full_inode(in->inode, in->symlink, in->dirfragtree);
|
full->add_full_inode(in->inode, in->symlink, in->dirfragtree);
|
||||||
@ -1802,7 +1856,8 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
|||||||
in->authlock.get_replica_state(),
|
in->authlock.get_replica_state(),
|
||||||
in->linklock.get_replica_state(),
|
in->linklock.get_replica_state(),
|
||||||
in->dirfragtreelock.get_replica_state(),
|
in->dirfragtreelock.get_replica_state(),
|
||||||
in->filelock.get_replica_state());
|
in->filelock.get_replica_state(),
|
||||||
|
in->dirlock.get_replica_state());
|
||||||
}
|
}
|
||||||
|
|
||||||
mds->send_message_mds(full, m->get_source().num(), MDS_PORT_CACHE);
|
mds->send_message_mds(full, m->get_source().num(), MDS_PORT_CACHE);
|
||||||
@ -1810,7 +1865,13 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
|
|||||||
|
|
||||||
void MDCache::handle_cache_rejoin_full(MMDSCacheRejoin *m)
|
void MDCache::handle_cache_rejoin_full(MMDSCacheRejoin *m)
|
||||||
{
|
{
|
||||||
|
dout(7) << "handle_cache_rejoin_full from " << m->get_source() << endl;
|
||||||
|
|
||||||
|
|
||||||
assert(0); // write me
|
assert(0); // write me
|
||||||
|
|
||||||
|
|
||||||
|
delete m;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MDCache::send_cache_rejoin_acks()
|
void MDCache::send_cache_rejoin_acks()
|
||||||
@ -1893,7 +1954,8 @@ void MDCache::send_cache_rejoin_acks()
|
|||||||
in->authlock.get_replica_state(),
|
in->authlock.get_replica_state(),
|
||||||
in->linklock.get_replica_state(),
|
in->linklock.get_replica_state(),
|
||||||
in->dirfragtreelock.get_replica_state(),
|
in->dirfragtreelock.get_replica_state(),
|
||||||
in->filelock.get_replica_state());
|
in->filelock.get_replica_state(),
|
||||||
|
in->dirlock.get_replica_state());
|
||||||
}
|
}
|
||||||
|
|
||||||
// subdirs in this subtree?
|
// subdirs in this subtree?
|
||||||
@ -1914,6 +1976,7 @@ void MDCache::send_cache_rejoin_acks()
|
|||||||
|
|
||||||
// ===============================================================================
|
// ===============================================================================
|
||||||
|
|
||||||
|
/*
|
||||||
void MDCache::rename_file(CDentry *srcdn,
|
void MDCache::rename_file(CDentry *srcdn,
|
||||||
CDentry *destdn)
|
CDentry *destdn)
|
||||||
{
|
{
|
||||||
@ -1928,7 +1991,7 @@ void MDCache::rename_file(CDentry *srcdn,
|
|||||||
// link inode w/ dentry
|
// link inode w/ dentry
|
||||||
destdn->dir->link_inode( destdn, in );
|
destdn->dir->link_inode( destdn, in );
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
void MDCache::set_root(CInode *in)
|
void MDCache::set_root(CInode *in)
|
||||||
|
@ -441,6 +441,7 @@ void MDS::beacon_kill(utime_t lab)
|
|||||||
|
|
||||||
void MDS::handle_mds_map(MMDSMap *m)
|
void MDS::handle_mds_map(MMDSMap *m)
|
||||||
{
|
{
|
||||||
|
version_t hadepoch = mdsmap->get_epoch();
|
||||||
version_t epoch = m->get_epoch();
|
version_t epoch = m->get_epoch();
|
||||||
dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << endl;
|
dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << endl;
|
||||||
|
|
||||||
@ -671,6 +672,12 @@ void MDS::handle_mds_map(MMDSMap *m)
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// just got mdsmap+osdmap?
|
||||||
|
if (hadepoch == 0 &&
|
||||||
|
mdsmap->get_epoch() > 0 &&
|
||||||
|
osdmap->get_epoch() > 0)
|
||||||
|
boot();
|
||||||
|
|
||||||
delete m;
|
delete m;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -691,14 +698,22 @@ void MDS::bcast_mds_map()
|
|||||||
|
|
||||||
void MDS::handle_osd_map(MOSDMap *m)
|
void MDS::handle_osd_map(MOSDMap *m)
|
||||||
{
|
{
|
||||||
version_t had = osdmap->get_epoch();
|
version_t hadepoch = osdmap->get_epoch();
|
||||||
|
dout(10) << "handle_osd_map had " << hadepoch << endl;
|
||||||
|
|
||||||
dout(10) << "handle_osd_map had " << had << endl;
|
// process
|
||||||
|
|
||||||
// process locally
|
|
||||||
objecter->handle_osd_map(m);
|
objecter->handle_osd_map(m);
|
||||||
|
|
||||||
if (had == 0 && osdmap->get_epoch() > 0) {
|
// just got mdsmap+osdmap?
|
||||||
|
if (hadepoch == 0 &&
|
||||||
|
osdmap->get_epoch() > 0 &&
|
||||||
|
mdsmap->get_epoch() > 0)
|
||||||
|
boot();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void MDS::boot()
|
||||||
|
{
|
||||||
if (is_creating())
|
if (is_creating())
|
||||||
boot_create(); // new tables, journal
|
boot_create(); // new tables, journal
|
||||||
else if (is_starting())
|
else if (is_starting())
|
||||||
@ -706,9 +721,7 @@ void MDS::handle_osd_map(MOSDMap *m)
|
|||||||
else if (is_replay())
|
else if (is_replay())
|
||||||
boot_replay(); // replay, join
|
boot_replay(); // replay, join
|
||||||
else
|
else
|
||||||
assert(is_standby());
|
assert(0);
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1050,6 +1063,7 @@ void MDS::my_dispatch(Message *m)
|
|||||||
// hack: thrash exports
|
// hack: thrash exports
|
||||||
for (int i=0; i<g_conf.mds_thrash_exports; i++) {
|
for (int i=0; i<g_conf.mds_thrash_exports; i++) {
|
||||||
set<int> s;
|
set<int> s;
|
||||||
|
if (!is_active()) break;
|
||||||
mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
|
mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
|
||||||
if (s.size() < 2 || mdcache->get_num_inodes() < 10)
|
if (s.size() < 2 || mdcache->get_num_inodes() < 10)
|
||||||
break; // need peers for this to work.
|
break; // need peers for this to work.
|
||||||
|
@ -189,6 +189,7 @@ class MDS : public Dispatcher {
|
|||||||
int init(bool standby=false);
|
int init(bool standby=false);
|
||||||
void reopen_logger();
|
void reopen_logger();
|
||||||
|
|
||||||
|
void boot();
|
||||||
void boot_create(); // i am new mds.
|
void boot_create(); // i am new mds.
|
||||||
void boot_start(); // i am old but empty (was down:out) mds.
|
void boot_start(); // i am old but empty (was down:out) mds.
|
||||||
void boot_replay(int step=0); // i am recovering existing (down:failed) mds.
|
void boot_replay(int step=0); // i am recovering existing (down:failed) mds.
|
||||||
|
@ -84,7 +84,7 @@ public:
|
|||||||
|
|
||||||
void print(ostream& out) {
|
void print(ostream& out) {
|
||||||
out << "(";
|
out << "(";
|
||||||
//out << get_lock_type_name(l.get_type()) << " ";
|
out << get_lock_type_name(get_type()) << " ";
|
||||||
out << get_scatterlock_state_name(get_state());
|
out << get_scatterlock_state_name(get_state());
|
||||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||||
if (is_rdlocked())
|
if (is_rdlocked())
|
||||||
|
@ -891,6 +891,54 @@ CDir* Server::try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr)
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/** predirty_dn_diri
|
||||||
|
* predirty the directory inode for a new dentry, if it is auth (and not root)
|
||||||
|
* BUG: root inode doesn't get dirtied properly, currently. blech.
|
||||||
|
*/
|
||||||
|
version_t Server::predirty_dn_diri(CDentry *dn, EMetaBlob *blob, utime_t mtime)
|
||||||
|
{
|
||||||
|
version_t dirpv = 0;
|
||||||
|
CInode *diri = dn->dir->inode;
|
||||||
|
|
||||||
|
if (diri->is_auth() && !diri->is_root()) {
|
||||||
|
dirpv = diri->pre_dirty();
|
||||||
|
inode_t *pi = blob->add_primary_dentry(diri->get_parent_dn(), true);
|
||||||
|
pi->version = dirpv;
|
||||||
|
pi->ctime = pi->mtime = mtime;
|
||||||
|
dout(10) << "predirty_dn_diri ctime/mtime " << mtime << " pv " << dirpv << " on " << *diri << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dirpv;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** dirty_dn_diri
|
||||||
|
* follow-up with actual dirty of inode after journal entry commits.
|
||||||
|
*/
|
||||||
|
void Server::dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime)
|
||||||
|
{
|
||||||
|
CInode *diri = dn->dir->inode;
|
||||||
|
|
||||||
|
// make the udpate
|
||||||
|
diri->inode.ctime = diri->inode.mtime = mtime;
|
||||||
|
|
||||||
|
if (diri->is_auth() && !diri->is_root()) {
|
||||||
|
// we're auth.
|
||||||
|
diri->mark_dirty(dirpv);
|
||||||
|
dout(10) << "dirty_dn_diri ctime/mtime " << mtime << " v " << diri->inode.version << " on " << *diri << endl;
|
||||||
|
} else {
|
||||||
|
// we're not auth. dirlock scatterlock will propagate the update.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ===============================================================================
|
// ===============================================================================
|
||||||
// STAT
|
// STAT
|
||||||
|
|
||||||
@ -1238,10 +1286,11 @@ class C_MDS_mknod_finish : public Context {
|
|||||||
CDentry *dn;
|
CDentry *dn;
|
||||||
CInode *newi;
|
CInode *newi;
|
||||||
version_t pv;
|
version_t pv;
|
||||||
|
version_t dirpv;
|
||||||
public:
|
public:
|
||||||
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) :
|
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni, version_t dirpv_) :
|
||||||
mds(m), mdr(r), dn(d), newi(ni),
|
mds(m), mdr(r), dn(d), newi(ni),
|
||||||
pv(d->get_projected_version()) {}
|
pv(d->get_projected_version()), dirpv(dirpv_) {}
|
||||||
void finish(int r) {
|
void finish(int r) {
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
|
|
||||||
@ -1252,8 +1301,7 @@ public:
|
|||||||
newi->mark_dirty(pv);
|
newi->mark_dirty(pv);
|
||||||
|
|
||||||
// dir inode's mtime
|
// dir inode's mtime
|
||||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
mds->server->dirty_dn_diri(dn, dirpv, newi->inode.ctime);
|
||||||
newi->inode.ctime);
|
|
||||||
|
|
||||||
// hit pop
|
// hit pop
|
||||||
mds->balancer->hit_inode(newi, META_POP_IWR);
|
mds->balancer->hit_inode(newi, META_POP_IWR);
|
||||||
@ -1265,6 +1313,8 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void Server::handle_client_mknod(MDRequest *mdr)
|
void Server::handle_client_mknod(MDRequest *mdr)
|
||||||
{
|
{
|
||||||
MClientRequest *req = mdr->client_request();
|
MClientRequest *req = mdr->client_request();
|
||||||
@ -1282,14 +1332,17 @@ void Server::handle_client_mknod(MDRequest *mdr)
|
|||||||
newi->inode.mode |= INODE_MODE_FILE;
|
newi->inode.mode |= INODE_MODE_FILE;
|
||||||
|
|
||||||
// prepare finisher
|
// prepare finisher
|
||||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
|
||||||
EUpdate *le = new EUpdate("mknod");
|
EUpdate *le = new EUpdate("mknod");
|
||||||
le->metablob.add_client_req(req->get_reqid());
|
le->metablob.add_client_req(req->get_reqid());
|
||||||
|
|
||||||
|
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||||
|
|
||||||
le->metablob.add_dir_context(dn->dir);
|
le->metablob.add_dir_context(dn->dir);
|
||||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||||
pi->version = dn->get_projected_version();
|
pi->version = dn->get_projected_version();
|
||||||
|
|
||||||
// log + wait
|
// log + wait
|
||||||
|
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||||
mdlog->submit_entry(le);
|
mdlog->submit_entry(le);
|
||||||
mdlog->wait_for_sync(fin);
|
mdlog->wait_for_sync(fin);
|
||||||
}
|
}
|
||||||
@ -1322,15 +1375,16 @@ void Server::handle_client_mkdir(MDRequest *mdr)
|
|||||||
newdir->mark_dirty(newdir->pre_dirty());
|
newdir->mark_dirty(newdir->pre_dirty());
|
||||||
|
|
||||||
// prepare finisher
|
// prepare finisher
|
||||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
|
||||||
EUpdate *le = new EUpdate("mkdir");
|
EUpdate *le = new EUpdate("mkdir");
|
||||||
le->metablob.add_client_req(req->get_reqid());
|
le->metablob.add_client_req(req->get_reqid());
|
||||||
|
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||||
le->metablob.add_dir_context(dn->dir);
|
le->metablob.add_dir_context(dn->dir);
|
||||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||||
pi->version = dn->get_projected_version();
|
pi->version = dn->get_projected_version();
|
||||||
le->metablob.add_dir(newdir, true);
|
le->metablob.add_dir(newdir, true);
|
||||||
|
|
||||||
// log + wait
|
// log + wait
|
||||||
|
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||||
mdlog->submit_entry(le);
|
mdlog->submit_entry(le);
|
||||||
mdlog->wait_for_sync(fin);
|
mdlog->wait_for_sync(fin);
|
||||||
|
|
||||||
@ -1370,14 +1424,15 @@ void Server::handle_client_symlink(MDRequest *mdr)
|
|||||||
newi->symlink = req->get_sarg();
|
newi->symlink = req->get_sarg();
|
||||||
|
|
||||||
// prepare finisher
|
// prepare finisher
|
||||||
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
|
|
||||||
EUpdate *le = new EUpdate("symlink");
|
EUpdate *le = new EUpdate("symlink");
|
||||||
le->metablob.add_client_req(req->get_reqid());
|
le->metablob.add_client_req(req->get_reqid());
|
||||||
|
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
|
||||||
le->metablob.add_dir_context(dn->dir);
|
le->metablob.add_dir_context(dn->dir);
|
||||||
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
|
||||||
pi->version = dn->get_projected_version();
|
pi->version = dn->get_projected_version();
|
||||||
|
|
||||||
// log + wait
|
// log + wait
|
||||||
|
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
|
||||||
mdlog->submit_entry(le);
|
mdlog->submit_entry(le);
|
||||||
mdlog->wait_for_sync(fin);
|
mdlog->wait_for_sync(fin);
|
||||||
}
|
}
|
||||||
@ -1490,15 +1545,17 @@ class C_MDS_link_local_finish : public Context {
|
|||||||
version_t dpv;
|
version_t dpv;
|
||||||
utime_t tctime;
|
utime_t tctime;
|
||||||
version_t tpv;
|
version_t tpv;
|
||||||
|
version_t dirpv;
|
||||||
public:
|
public:
|
||||||
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, utime_t ct) :
|
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, version_t dirpv_, utime_t ct) :
|
||||||
mds(m), mdr(r), dn(d), targeti(ti),
|
mds(m), mdr(r), dn(d), targeti(ti),
|
||||||
dpv(d->get_projected_version()),
|
dpv(d->get_projected_version()),
|
||||||
tctime(ct),
|
tctime(ct),
|
||||||
tpv(targeti->get_parent_dn()->get_projected_version()) {}
|
tpv(targeti->get_parent_dn()->get_projected_version()),
|
||||||
|
dirpv(dirpv_) { }
|
||||||
void finish(int r) {
|
void finish(int r) {
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv);
|
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv, dirpv);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1517,6 +1574,8 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
|||||||
version_t tpdv = targeti->pre_dirty();
|
version_t tpdv = targeti->pre_dirty();
|
||||||
|
|
||||||
// add to event
|
// add to event
|
||||||
|
utime_t now = g_clock.real_now();
|
||||||
|
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now); // dir inode's mtime
|
||||||
le->metablob.add_dir_context(dn->get_dir());
|
le->metablob.add_dir_context(dn->get_dir());
|
||||||
le->metablob.add_remote_dentry(dn, true, targeti->ino()); // new remote
|
le->metablob.add_remote_dentry(dn, true, targeti->ino()); // new remote
|
||||||
le->metablob.add_dir_context(targeti->get_parent_dir());
|
le->metablob.add_dir_context(targeti->get_parent_dir());
|
||||||
@ -1524,11 +1583,11 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
|||||||
|
|
||||||
// update journaled target inode
|
// update journaled target inode
|
||||||
pi->nlink++;
|
pi->nlink++;
|
||||||
pi->ctime = g_clock.real_now();
|
pi->ctime = now;
|
||||||
pi->version = tpdv;
|
pi->version = tpdv;
|
||||||
|
|
||||||
// finisher
|
// finisher
|
||||||
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, pi->ctime);
|
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, dirpv, now);
|
||||||
|
|
||||||
// log + wait
|
// log + wait
|
||||||
mdlog->submit_entry(le);
|
mdlog->submit_entry(le);
|
||||||
@ -1536,7 +1595,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
|
void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
|
||||||
version_t dpv, utime_t tctime, version_t tpv)
|
version_t dpv, utime_t tctime, version_t tpv, version_t dirpv)
|
||||||
{
|
{
|
||||||
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl;
|
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl;
|
||||||
|
|
||||||
@ -1551,8 +1610,7 @@ void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
|
|||||||
targeti->mark_dirty(tpv);
|
targeti->mark_dirty(tpv);
|
||||||
|
|
||||||
// dir inode's mtime
|
// dir inode's mtime
|
||||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
dirty_dn_diri(dn, dirpv, tctime);
|
||||||
tctime);
|
|
||||||
|
|
||||||
// bump target popularity
|
// bump target popularity
|
||||||
mds->balancer->hit_inode(targeti, META_POP_IWR);
|
mds->balancer->hit_inode(targeti, META_POP_IWR);
|
||||||
@ -1738,16 +1796,17 @@ class C_MDS_unlink_local_finish : public Context {
|
|||||||
CDentry *straydn;
|
CDentry *straydn;
|
||||||
version_t ipv; // referred inode
|
version_t ipv; // referred inode
|
||||||
utime_t ictime;
|
utime_t ictime;
|
||||||
version_t dpv; // deleted dentry
|
version_t dnpv; // deleted dentry
|
||||||
|
version_t dirpv;
|
||||||
public:
|
public:
|
||||||
C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd,
|
C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd,
|
||||||
version_t v, utime_t ct) :
|
version_t v, version_t dirpv_, utime_t ct) :
|
||||||
mds(m), mdr(r), dn(d), straydn(sd),
|
mds(m), mdr(r), dn(d), straydn(sd),
|
||||||
ipv(v), ictime(ct),
|
ipv(v), ictime(ct),
|
||||||
dpv(d->get_projected_version()) { }
|
dnpv(d->get_projected_version()), dirpv(dirpv_) { }
|
||||||
void finish(int r) {
|
void finish(int r) {
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dpv);
|
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dnpv, dirpv);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1790,18 +1849,20 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the unlinked dentry
|
// the unlinked dentry
|
||||||
|
utime_t now = g_clock.real_now();
|
||||||
dn->pre_dirty();
|
dn->pre_dirty();
|
||||||
|
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now);
|
||||||
le->metablob.add_dir_context(dn->get_dir());
|
le->metablob.add_dir_context(dn->get_dir());
|
||||||
le->metablob.add_null_dentry(dn, true);
|
le->metablob.add_null_dentry(dn, true);
|
||||||
|
|
||||||
// update journaled target inode
|
// update journaled target inode
|
||||||
pi->nlink--;
|
pi->nlink--;
|
||||||
pi->ctime = g_clock.real_now();
|
pi->ctime = now;
|
||||||
pi->version = ipv;
|
pi->version = ipv;
|
||||||
|
|
||||||
// finisher
|
// finisher
|
||||||
C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
|
C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
|
||||||
ipv, pi->ctime);
|
ipv, dirpv, now);
|
||||||
|
|
||||||
journal_opens(); // journal pending opens, just in case
|
journal_opens(); // journal pending opens, just in case
|
||||||
|
|
||||||
@ -1814,7 +1875,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
|
|||||||
|
|
||||||
void Server::_unlink_local_finish(MDRequest *mdr,
|
void Server::_unlink_local_finish(MDRequest *mdr,
|
||||||
CDentry *dn, CDentry *straydn,
|
CDentry *dn, CDentry *straydn,
|
||||||
version_t ipv, utime_t ictime, version_t dpv)
|
version_t ipv, utime_t ictime, version_t dnpv, version_t dirpv)
|
||||||
{
|
{
|
||||||
dout(10) << "_unlink_local " << *dn << endl;
|
dout(10) << "_unlink_local " << *dn << endl;
|
||||||
|
|
||||||
@ -1829,11 +1890,10 @@ void Server::_unlink_local_finish(MDRequest *mdr,
|
|||||||
in->inode.ctime = ictime;
|
in->inode.ctime = ictime;
|
||||||
in->inode.nlink--;
|
in->inode.nlink--;
|
||||||
in->mark_dirty(ipv); // dirty inode
|
in->mark_dirty(ipv); // dirty inode
|
||||||
dn->mark_dirty(dpv); // dirty old dentry
|
dn->mark_dirty(dnpv); // dirty old dentry
|
||||||
|
|
||||||
// dir inode's mtime
|
// dir inode's mtime
|
||||||
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
|
dirty_dn_diri(dn, dirpv, ictime);
|
||||||
ictime);
|
|
||||||
|
|
||||||
// share unlink news with replicas
|
// share unlink news with replicas
|
||||||
for (map<int,int>::iterator it = dn->replicas_begin();
|
for (map<int,int>::iterator it = dn->replicas_begin();
|
||||||
@ -2147,25 +2207,27 @@ class C_MDS_rename_local_finish : public Context {
|
|||||||
version_t straypv;
|
version_t straypv;
|
||||||
version_t destpv;
|
version_t destpv;
|
||||||
version_t srcpv;
|
version_t srcpv;
|
||||||
|
version_t ddirpv, sdirpv;
|
||||||
utime_t ictime;
|
utime_t ictime;
|
||||||
public:
|
public:
|
||||||
version_t atid1;
|
version_t atid1;
|
||||||
version_t atid2;
|
version_t atid2;
|
||||||
C_MDS_rename_local_finish(MDS *m, MDRequest *r,
|
C_MDS_rename_local_finish(MDS *m, MDRequest *r,
|
||||||
CDentry *sdn, CDentry *ddn, CDentry *stdn,
|
CDentry *sdn, CDentry *ddn, CDentry *stdn,
|
||||||
version_t v, utime_t ct) :
|
version_t v, version_t ddirpv_, version_t sdirpv_, utime_t ct) :
|
||||||
mds(m), mdr(r),
|
mds(m), mdr(r),
|
||||||
srcdn(sdn), destdn(ddn), straydn(stdn),
|
srcdn(sdn), destdn(ddn), straydn(stdn),
|
||||||
ipv(v),
|
ipv(v),
|
||||||
straypv(straydn ? straydn->get_projected_version():0),
|
straypv(straydn ? straydn->get_projected_version():0),
|
||||||
destpv(destdn->get_projected_version()),
|
destpv(destdn->get_projected_version()),
|
||||||
srcpv(srcdn->get_projected_version()),
|
srcpv(srcdn->get_projected_version()),
|
||||||
|
ddirpv(ddirpv_), sdirpv(sdirpv_),
|
||||||
ictime(ct),
|
ictime(ct),
|
||||||
atid1(0), atid2(0) { }
|
atid1(0), atid2(0) { }
|
||||||
void finish(int r) {
|
void finish(int r) {
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
mds->server->_rename_local_finish(mdr, srcdn, destdn, straydn,
|
mds->server->_rename_local_finish(mdr, srcdn, destdn, straydn,
|
||||||
srcpv, destpv, straypv, ipv, ictime,
|
srcpv, destpv, straypv, ipv, ddirpv, sdirpv, ictime,
|
||||||
atid1, atid2);
|
atid1, atid2);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -2194,6 +2256,8 @@ void Server::_rename_local(MDRequest *mdr,
|
|||||||
EUpdate *le = new EUpdate("rename_local");
|
EUpdate *le = new EUpdate("rename_local");
|
||||||
le->metablob.add_client_req(mdr->reqid);
|
le->metablob.add_client_req(mdr->reqid);
|
||||||
|
|
||||||
|
utime_t now = g_clock.real_now();
|
||||||
|
|
||||||
CDentry *straydn = 0;
|
CDentry *straydn = 0;
|
||||||
inode_t *pi = 0;
|
inode_t *pi = 0;
|
||||||
version_t ipv = 0;
|
version_t ipv = 0;
|
||||||
@ -2204,6 +2268,11 @@ void Server::_rename_local(MDRequest *mdr,
|
|||||||
// primary+remote link merge?
|
// primary+remote link merge?
|
||||||
bool linkmerge = (srcdn->inode == destdn->inode &&
|
bool linkmerge = (srcdn->inode == destdn->inode &&
|
||||||
(srcdn->is_primary() || destdn->is_primary()));
|
(srcdn->is_primary() || destdn->is_primary()));
|
||||||
|
|
||||||
|
// dir mtimes
|
||||||
|
version_t ddirpv = predirty_dn_diri(destdn, &le->metablob, now);
|
||||||
|
version_t sdirpv = predirty_dn_diri(srcdn, &le->metablob, now);
|
||||||
|
|
||||||
if (linkmerge) {
|
if (linkmerge) {
|
||||||
dout(10) << "will merge remote+primary links" << endl;
|
dout(10) << "will merge remote+primary links" << endl;
|
||||||
|
|
||||||
@ -2300,13 +2369,13 @@ void Server::_rename_local(MDRequest *mdr,
|
|||||||
if (pi) {
|
if (pi) {
|
||||||
// update journaled target inode
|
// update journaled target inode
|
||||||
pi->nlink--;
|
pi->nlink--;
|
||||||
pi->ctime = g_clock.real_now();
|
pi->ctime = now;
|
||||||
pi->version = ipv;
|
pi->version = ipv;
|
||||||
}
|
}
|
||||||
|
|
||||||
C_MDS_rename_local_finish *fin = new C_MDS_rename_local_finish(mds, mdr,
|
C_MDS_rename_local_finish *fin = new C_MDS_rename_local_finish(mds, mdr,
|
||||||
srcdn, destdn, straydn,
|
srcdn, destdn, straydn,
|
||||||
ipv, pi ? pi->ctime:utime_t());
|
ipv, ddirpv, sdirpv, now);
|
||||||
|
|
||||||
journal_opens(); // journal pending opens, just in case
|
journal_opens(); // journal pending opens, just in case
|
||||||
|
|
||||||
@ -2340,6 +2409,7 @@ void Server::_rename_local_reanchored(LogEvent *le, C_MDS_rename_local_finish *f
|
|||||||
void Server::_rename_local_finish(MDRequest *mdr,
|
void Server::_rename_local_finish(MDRequest *mdr,
|
||||||
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
||||||
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
||||||
|
version_t ddirpv, version_t sdirpv,
|
||||||
utime_t ictime,
|
utime_t ictime,
|
||||||
version_t atid1, version_t atid2)
|
version_t atid1, version_t atid2)
|
||||||
{
|
{
|
||||||
@ -2352,8 +2422,13 @@ void Server::_rename_local_finish(MDRequest *mdr,
|
|||||||
bool linkmerge = (srcdn->inode == destdn->inode &&
|
bool linkmerge = (srcdn->inode == destdn->inode &&
|
||||||
(srcdn->is_primary() || destdn->is_primary()));
|
(srcdn->is_primary() || destdn->is_primary()));
|
||||||
|
|
||||||
|
// dir mtimes
|
||||||
|
dirty_dn_diri(destdn, ddirpv, ictime);
|
||||||
|
dirty_dn_diri(srcdn, sdirpv, ictime);
|
||||||
|
|
||||||
if (linkmerge) {
|
if (linkmerge) {
|
||||||
assert(ipv);
|
assert(ipv);
|
||||||
|
|
||||||
if (destdn->is_primary()) {
|
if (destdn->is_primary()) {
|
||||||
dout(10) << "merging remote onto primary link" << endl;
|
dout(10) << "merging remote onto primary link" << endl;
|
||||||
|
|
||||||
@ -2755,6 +2830,10 @@ void Server::_do_open(MDRequest *mdr, CInode *cur)
|
|||||||
<< " on " << *cur << endl;
|
<< " on " << *cur << endl;
|
||||||
|
|
||||||
// hit pop
|
// hit pop
|
||||||
|
if (cmode == FILE_MODE_RW ||
|
||||||
|
cmode == FILE_MODE_W)
|
||||||
|
mds->balancer->hit_inode(cur, META_POP_IWR);
|
||||||
|
else
|
||||||
mds->balancer->hit_inode(cur, META_POP_IRD);
|
mds->balancer->hit_inode(cur, META_POP_IRD);
|
||||||
|
|
||||||
// reply
|
// reply
|
||||||
|
@ -70,6 +70,9 @@ public:
|
|||||||
CDir* try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
CDir* try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
||||||
//CDir* try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
//CDir* try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr);
|
||||||
|
|
||||||
|
version_t predirty_dn_diri(CDentry *dn, class EMetaBlob *blob, utime_t mtime);
|
||||||
|
void dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime);
|
||||||
|
|
||||||
// requests on existing inodes.
|
// requests on existing inodes.
|
||||||
void handle_client_stat(MDRequest *mdr);
|
void handle_client_stat(MDRequest *mdr);
|
||||||
void handle_client_utime(MDRequest *mdr);
|
void handle_client_utime(MDRequest *mdr);
|
||||||
@ -108,7 +111,7 @@ public:
|
|||||||
void _link_local(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
void _link_local(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
||||||
void _link_local_finish(MDRequest *mdr,
|
void _link_local_finish(MDRequest *mdr,
|
||||||
CDentry *dn, CInode *targeti,
|
CDentry *dn, CInode *targeti,
|
||||||
version_t, utime_t, version_t);
|
version_t, utime_t, version_t, version_t);
|
||||||
void _link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
void _link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti);
|
||||||
|
|
||||||
// unlink
|
// unlink
|
||||||
@ -117,7 +120,7 @@ public:
|
|||||||
void _unlink_local(MDRequest *mdr, CDentry *dn);
|
void _unlink_local(MDRequest *mdr, CDentry *dn);
|
||||||
void _unlink_local_finish(MDRequest *mdr,
|
void _unlink_local_finish(MDRequest *mdr,
|
||||||
CDentry *dn, CDentry *straydn,
|
CDentry *dn, CDentry *straydn,
|
||||||
version_t, utime_t, version_t);
|
version_t, utime_t, version_t, version_t);
|
||||||
void _unlink_remote(MDRequest *mdr, CDentry *dn);
|
void _unlink_remote(MDRequest *mdr, CDentry *dn);
|
||||||
|
|
||||||
// rename
|
// rename
|
||||||
@ -134,7 +137,7 @@ public:
|
|||||||
void _rename_local_finish(MDRequest *mdr,
|
void _rename_local_finish(MDRequest *mdr,
|
||||||
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
|
||||||
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
|
||||||
utime_t ictime,
|
version_t ddirpv, version_t sdirpv, utime_t ictime,
|
||||||
version_t atid1, version_t atid2);
|
version_t atid1, version_t atid2);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -30,12 +30,12 @@
|
|||||||
|
|
||||||
inline const char *get_lock_type_name(int t) {
|
inline const char *get_lock_type_name(int t) {
|
||||||
switch (t) {
|
switch (t) {
|
||||||
case LOCK_OTYPE_DN: return "dentry";
|
case LOCK_OTYPE_DN: return "dn";
|
||||||
case LOCK_OTYPE_IFILE: return "inode_file";
|
case LOCK_OTYPE_IFILE: return "ifile";
|
||||||
case LOCK_OTYPE_IAUTH: return "inode_auth";
|
case LOCK_OTYPE_IAUTH: return "iauth";
|
||||||
case LOCK_OTYPE_ILINK: return "inode_link";
|
case LOCK_OTYPE_ILINK: return "ilink";
|
||||||
case LOCK_OTYPE_IDIRFRAGTREE: return "inode_dirfragtree";
|
case LOCK_OTYPE_IDIRFRAGTREE: return "idft";
|
||||||
case LOCK_OTYPE_IDIR: return "inode_dir";
|
case LOCK_OTYPE_IDIR: return "idir";
|
||||||
default: assert(0);
|
default: assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -46,13 +46,15 @@ inline const char *get_lock_type_name(int t) {
|
|||||||
#define LOCK_SYNC 1 // AR R . R .
|
#define LOCK_SYNC 1 // AR R . R .
|
||||||
#define LOCK_LOCK 2 // AR R W . .
|
#define LOCK_LOCK 2 // AR R W . .
|
||||||
#define LOCK_GLOCKR -3 // AR R . . .
|
#define LOCK_GLOCKR -3 // AR R . . .
|
||||||
|
#define LOCK_REMOTEXLOCK -50 // on NON-auth
|
||||||
|
|
||||||
inline const char *get_simplelock_state_name(int n) {
|
inline const char *get_simplelock_state_name(int n) {
|
||||||
switch (n) {
|
switch (n) {
|
||||||
case LOCK_UNDEF: return "undef";
|
case LOCK_UNDEF: return "UNDEF";
|
||||||
case LOCK_SYNC: return "sync";
|
case LOCK_SYNC: return "sync";
|
||||||
case LOCK_LOCK: return "lock";
|
case LOCK_LOCK: return "lock";
|
||||||
case LOCK_GLOCKR: return "glockr";
|
case LOCK_GLOCKR: return "glockr";
|
||||||
|
case LOCK_REMOTEXLOCK: return "remote_xlock";
|
||||||
default: assert(0);
|
default: assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -63,8 +65,7 @@ class SimpleLock {
|
|||||||
public:
|
public:
|
||||||
static const int WAIT_RD = (1<<0); // to read
|
static const int WAIT_RD = (1<<0); // to read
|
||||||
static const int WAIT_WR = (1<<1); // to write
|
static const int WAIT_WR = (1<<1); // to write
|
||||||
static const int WAIT_NOLOCKS = (1<<2); // for last rdlock to finish
|
static const int WAIT_SINGLEAUTH = (1<<2);
|
||||||
//static const int WAIT_LOCK = (1<<3); // for locked state
|
|
||||||
static const int WAIT_STABLE = (1<<3); // for a stable state
|
static const int WAIT_STABLE = (1<<3); // for a stable state
|
||||||
static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock
|
static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock
|
||||||
static const int WAIT_BITS = 5;
|
static const int WAIT_BITS = 5;
|
||||||
@ -248,7 +249,7 @@ public:
|
|||||||
|
|
||||||
virtual void print(ostream& out) {
|
virtual void print(ostream& out) {
|
||||||
out << "(";
|
out << "(";
|
||||||
//out << get_lock_type_name(l.get_type()) << " ";
|
out << get_lock_type_name(get_type()) << " ";
|
||||||
out << get_simplelock_state_name(get_state());
|
out << get_simplelock_state_name(get_state());
|
||||||
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
|
||||||
if (is_rdlocked())
|
if (is_rdlocked())
|
||||||
|
@ -40,7 +40,7 @@ public:
|
|||||||
set<dirfrag_t> &get_bounds() { return bounds; }
|
set<dirfrag_t> &get_bounds() { return bounds; }
|
||||||
|
|
||||||
void print(ostream& out) {
|
void print(ostream& out) {
|
||||||
out << "export " << base << " " << metablob;
|
out << "EExport " << base << " " << metablob;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void encode_payload(bufferlist& bl) {
|
virtual void encode_payload(bufferlist& bl) {
|
||||||
|
@ -33,7 +33,7 @@ class EImportFinish : public LogEvent {
|
|||||||
EImportFinish() : LogEvent(EVENT_IMPORTFINISH) { }
|
EImportFinish() : LogEvent(EVENT_IMPORTFINISH) { }
|
||||||
|
|
||||||
void print(ostream& out) {
|
void print(ostream& out) {
|
||||||
out << "import_finish " << base;
|
out << "EImportFinish " << base;
|
||||||
if (success)
|
if (success)
|
||||||
out << " success";
|
out << " success";
|
||||||
else
|
else
|
||||||
|
@ -294,6 +294,7 @@ class MDSCacheObject {
|
|||||||
// -- state --
|
// -- state --
|
||||||
const static int STATE_AUTH = (1<<30);
|
const static int STATE_AUTH = (1<<30);
|
||||||
const static int STATE_DIRTY = (1<<29);
|
const static int STATE_DIRTY = (1<<29);
|
||||||
|
const static int STATE_REJOINING = (1<<28); // replica has not joined w/ primary copy
|
||||||
|
|
||||||
// -- wait --
|
// -- wait --
|
||||||
const static int WAIT_SINGLEAUTH = (1<<30);
|
const static int WAIT_SINGLEAUTH = (1<<30);
|
||||||
@ -327,8 +328,9 @@ class MDSCacheObject {
|
|||||||
void state_reset(unsigned s) { state = s; }
|
void state_reset(unsigned s) { state = s; }
|
||||||
|
|
||||||
bool is_auth() { return state_test(STATE_AUTH); }
|
bool is_auth() { return state_test(STATE_AUTH); }
|
||||||
bool is_dirty() { return state & STATE_DIRTY; }
|
bool is_dirty() { return state_test(STATE_DIRTY); }
|
||||||
bool is_clean() { return !is_dirty(); }
|
bool is_clean() { return !is_dirty(); }
|
||||||
|
bool is_rejoining() { return state_test(STATE_REJOINING); }
|
||||||
|
|
||||||
// --------------------------------------------
|
// --------------------------------------------
|
||||||
// authority
|
// authority
|
||||||
@ -457,7 +459,7 @@ protected:
|
|||||||
if (waiting.empty())
|
if (waiting.empty())
|
||||||
get(PIN_WAITER);
|
get(PIN_WAITER);
|
||||||
waiting.insert(pair<int,Context*>(mask, c));
|
waiting.insert(pair<int,Context*>(mask, c));
|
||||||
dout(10) << (mdsco_db_line_prefix(this))
|
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
|
||||||
<< "add_waiter " << mask << " " << c
|
<< "add_waiter " << mask << " " << c
|
||||||
<< " on " << *this
|
<< " on " << *this
|
||||||
<< endl;
|
<< endl;
|
||||||
@ -469,14 +471,14 @@ protected:
|
|||||||
while (it != waiting.end()) {
|
while (it != waiting.end()) {
|
||||||
if (it->first & mask) {
|
if (it->first & mask) {
|
||||||
ls.push_back(it->second);
|
ls.push_back(it->second);
|
||||||
dout(10) << (mdsco_db_line_prefix(this))
|
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
|
||||||
<< "take_waiting mask " << mask << " took " << it->second
|
<< "take_waiting mask " << mask << " took " << it->second
|
||||||
<< " tag " << it->first
|
<< " tag " << it->first
|
||||||
<< " on " << *this
|
<< " on " << *this
|
||||||
<< endl;
|
<< endl;
|
||||||
waiting.erase(it++);
|
waiting.erase(it++);
|
||||||
} else {
|
} else {
|
||||||
dout(10) << "take_waiting mask " << mask << " SKIPPING " << it->second
|
pdout(10,g_conf.debug_mds) << "take_waiting mask " << mask << " SKIPPING " << it->second
|
||||||
<< " tag " << it->first
|
<< " tag " << it->first
|
||||||
<< " on " << *this
|
<< " on " << *this
|
||||||
<< endl;
|
<< endl;
|
||||||
|
@ -185,7 +185,9 @@ class MClientReply : public Message {
|
|||||||
virtual char *get_type_name() { return "creply"; }
|
virtual char *get_type_name() { return "creply"; }
|
||||||
void print(ostream& o) {
|
void print(ostream& o) {
|
||||||
o << "creply(" << env.dst.name << "." << st.tid;
|
o << "creply(" << env.dst.name << "." << st.tid;
|
||||||
if (st.result) o << " = " << st.result;
|
o << " = " << st.result;
|
||||||
|
if (st.result <= 0)
|
||||||
|
o << " " << strerror(-st.result);
|
||||||
o << ")";
|
o << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#define __MLOCK_H
|
#define __MLOCK_H
|
||||||
|
|
||||||
#include "msg/Message.h"
|
#include "msg/Message.h"
|
||||||
|
#include "mds/SimpleLock.h"
|
||||||
|
|
||||||
// for replicas
|
// for replicas
|
||||||
#define LOCK_AC_SYNC -1
|
#define LOCK_AC_SYNC -1
|
||||||
@ -91,6 +91,12 @@ class MLock : public Message {
|
|||||||
data.claim(bl);
|
data.claim(bl);
|
||||||
}
|
}
|
||||||
virtual char *get_type_name() { return "ILock"; }
|
virtual char *get_type_name() { return "ILock"; }
|
||||||
|
void print(ostream& out) {
|
||||||
|
out << "lock(a=" << action
|
||||||
|
<< " " << ino
|
||||||
|
<< " " << get_lock_type_name(otype)
|
||||||
|
<< ")";
|
||||||
|
}
|
||||||
|
|
||||||
void set_ino(inodeno_t ino, char ot) {
|
void set_ino(inodeno_t ino, char ot) {
|
||||||
otype = ot;
|
otype = ot;
|
||||||
@ -111,32 +117,27 @@ class MLock : public Message {
|
|||||||
this->dn = dn;
|
this->dn = dn;
|
||||||
}
|
}
|
||||||
void set_reqid(metareqid_t ri) { reqid = ri; }
|
void set_reqid(metareqid_t ri) { reqid = ri; }
|
||||||
void set_data(bufferlist& data) {
|
void set_data(const bufferlist& data) {
|
||||||
this->data.claim( data );
|
this->data = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
void decode_payload() {
|
void decode_payload() {
|
||||||
int off = 0;
|
int off = 0;
|
||||||
payload.copy(off,sizeof(action), (char*)&action);
|
::_decode(action, payload, off);
|
||||||
off += sizeof(action);
|
::_decode(asker, payload, off);
|
||||||
payload.copy(off,sizeof(asker), (char*)&asker);
|
::_decode(otype, payload, off);
|
||||||
off += sizeof(asker);
|
::_decode(ino, payload, off);
|
||||||
payload.copy(off,sizeof(otype), (char*)&otype);
|
::_decode(dirfrag, payload, off);
|
||||||
off += sizeof(otype);
|
|
||||||
payload.copy(off,sizeof(ino), (char*)&ino);
|
|
||||||
off += sizeof(ino);
|
|
||||||
payload.copy(off,sizeof(dirfrag), (char*)&dirfrag);
|
|
||||||
off += sizeof(dirfrag);
|
|
||||||
::_decode(reqid, payload, off);
|
::_decode(reqid, payload, off);
|
||||||
::_decode(dn, payload, off);
|
::_decode(dn, payload, off);
|
||||||
::_decode(data, payload, off);
|
::_decode(data, payload, off);
|
||||||
}
|
}
|
||||||
virtual void encode_payload() {
|
virtual void encode_payload() {
|
||||||
payload.append((char*)&action, sizeof(action));
|
::_encode(action, payload);
|
||||||
payload.append((char*)&asker, sizeof(asker));
|
::_encode(asker, payload);
|
||||||
payload.append((char*)&otype, sizeof(otype));
|
::_encode(otype, payload);
|
||||||
payload.append((char*)&ino, sizeof(ino));
|
::_encode(ino, payload);
|
||||||
payload.append((char*)&dirfrag, sizeof(dirfrag));
|
::_encode(dirfrag, payload);
|
||||||
::_encode(reqid, payload);
|
::_encode(reqid, payload);
|
||||||
::_encode(dn, payload);
|
::_encode(dn, payload);
|
||||||
::_encode(data, payload);
|
::_encode(data, payload);
|
||||||
|
@ -45,11 +45,12 @@ class MMDSCacheRejoin : public Message {
|
|||||||
int32_t linklock;
|
int32_t linklock;
|
||||||
int32_t dirfragtreelock;
|
int32_t dirfragtreelock;
|
||||||
int32_t filelock;
|
int32_t filelock;
|
||||||
|
__int32_t dirlock;
|
||||||
inode_strong() {}
|
inode_strong() {}
|
||||||
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0) :
|
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0, int dl=0) :
|
||||||
caps_wanted(cw),
|
caps_wanted(cw),
|
||||||
nonce(n),
|
nonce(n),
|
||||||
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f) { }
|
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), dirlock(dl) { }
|
||||||
};
|
};
|
||||||
struct inode_full {
|
struct inode_full {
|
||||||
inode_t inode;
|
inode_t inode;
|
||||||
@ -112,8 +113,8 @@ class MMDSCacheRejoin : public Message {
|
|||||||
void add_weak_inode(inodeno_t ino) {
|
void add_weak_inode(inodeno_t ino) {
|
||||||
weak_inodes.insert(ino);
|
weak_inodes.insert(ino);
|
||||||
}
|
}
|
||||||
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f) {
|
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
|
||||||
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f);
|
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
|
||||||
}
|
}
|
||||||
void add_full_inode(inode_t &i, const string& s, const fragtree_t &f) {
|
void add_full_inode(inode_t &i, const string& s, const fragtree_t &f) {
|
||||||
full_inodes.push_back(inode_full(i, s, f));
|
full_inodes.push_back(inode_full(i, s, f));
|
||||||
|
@ -169,7 +169,8 @@ void Journaler::write_head(Context *oncommit)
|
|||||||
bufferlist bl;
|
bufferlist bl;
|
||||||
bl.append((char*)&last_written, sizeof(last_written));
|
bl.append((char*)&last_written, sizeof(last_written));
|
||||||
filer.write(inode, 0, bl.length(), bl, 0,
|
filer.write(inode, 0, bl.length(), bl, 0,
|
||||||
0, new C_WriteHead(this, last_written, oncommit));
|
0,
|
||||||
|
new C_WriteHead(this, last_written, oncommit));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Journaler::_finish_write_head(Header &wrote, Context *oncommit)
|
void Journaler::_finish_write_head(Header &wrote, Context *oncommit)
|
||||||
@ -293,8 +294,10 @@ void Journaler::flush(Context *onsync)
|
|||||||
dout(10) << "flush flushing " << flush_pos << "~" << len << endl;
|
dout(10) << "flush flushing " << flush_pos << "~" << len << endl;
|
||||||
|
|
||||||
// submit write for anything pending
|
// submit write for anything pending
|
||||||
|
// flush _start_ pos to _finish_flush
|
||||||
filer.write(inode, flush_pos, len, write_buf, 0,
|
filer.write(inode, flush_pos, len, write_buf, 0,
|
||||||
new C_Flush(this, flush_pos), 0); // flush _start_ pos to _finish_flush
|
g_conf.journaler_safe ? 0:new C_Flush(this, flush_pos), // on ACK
|
||||||
|
g_conf.journaler_safe ? new C_Flush(this, flush_pos):0); // on COMMIT
|
||||||
pending_flush[flush_pos] = g_clock.now();
|
pending_flush[flush_pos] = g_clock.now();
|
||||||
|
|
||||||
// adjust pointers
|
// adjust pointers
|
||||||
|
Loading…
Reference in New Issue
Block a user