merged branches/sage/cephmds2 into trunk/ceph

git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1407 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
sageweil 2007-06-06 22:43:47 +00:00
parent 3b6ca5d05b
commit 26c8a9e688
24 changed files with 630 additions and 327 deletions

View File

@ -12,7 +12,7 @@ CFLAGS = -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE -DDARWI
LDINC = ar -rc
else
# For linux
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -DMPICH_IGNORE_CXX_SEEK -D_REENTRANT -D_THREAD_SAFE
CFLAGS = -g -fPIC -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE
LDINC = ld -i -o
endif

View File

@ -46,22 +46,52 @@ sage doc
sage mds
- journal+recovery
- local rename
- how to notify replicas...
/ - stray purge
- stray reintegration
- remote link
- impl remote inode xlock
- ESlaveUpdate replay, resolution, etc.
- remote unlink
- remote rename
- file capabilities i/o
- dirfrag split/merge
- client readdir for dirfrags
- consistency points/snapshots
- dentry versions vs dirfrags...
- statfs?
- finish multistage rejoin
- trim_on_rejoin
- more testing of failures + thrashing.
- is export prep dir open deadlock properly fixed by forge_replica_dir()?
- locker vs node failure
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
- failures during recovery stages (resolve, rejoin)... make sure rejoin still works!
- fix mds initial osdmap weirdness (which will currently screw up on standby -> almost anything)
- incremental mdsmaps?
- client failure
- dirfrag split
- make sure we are freezing _before_ we fetch to complete the dirfrag, else
we break commit()'s preconditions when it fetches an incomplete dir.
- detect and deal with client failure
- recovering open files
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
- path+cap window will require some fetching of metadata from disk before doing the rejoin
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
- inode.max_size
- real chdir (directory "open")
- relative metadata ops
- osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics.
- incremental mdsmaps?
- EMetablob should return 'expired' if they have higher versions (and are thus described by a newer journal entry)
- dir version/committed/etc versus migration, log expires.
- DOCUMENT.
@ -77,45 +107,14 @@ sage mds
- test open_remote_ino
- scatterlock
- unlink, link, rename need to pre_dirty and update dir inode's mtime
- tho need to make sure that behaves when dirfrag's inode is non-auth...
- FIXME how to journal root and stray inode content?
- in particular, i care about dirfragtree.. get it on rejoin?
- and dir sizes, if i add that... also on rejoin?
- recovering open files
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
- path+cap window will require some fetching of metadata from disk before doing the rejoin
- failures during migration.. what about client stale/reap stuff and misplaced WR caps?
- inode.max_size
- journal+recovery
- local rename
- how to notify replicas...
/ - stray purge
- stray reintegration
- remote link
- impl remote inode xlock
- ESlaveUpdate replay, resolution, etc.
- remote unlink
- rewrite to look link _link
- remote rename
- file capabilities i/o
- filelock to control directory mtime, dentry changes
- hmm, may have to change lock ordering, and Server::rdlock_path_pin_ref()
- dirfrag split/merge
- client readdir for dirfrags
- consistency points/snapshots
- dentry versions vs dirfrags...
- real chdir (directory "open")
- relative metadata ops
- statfs?
- fix lock caps gather ack versus ambiguous auth
foreign rename
@ -215,9 +214,13 @@ rados snapshots
objecter
- transaction prepare/commit
- read+floor_lockout
osd/rados
- transaction prepare/commit
- rollback
- rollback logging (to fix slow prepare vs rollback race)
- read+floor_lockout for clean STOGITH-like/fencing semantics after failover.
- separate out replication code into a PG class, to pave way for RAID

View File

@ -150,6 +150,7 @@ md_config_t g_conf = {
// --- journaler ---
journaler_allow_split_entries: true,
journaler_safe: false, // wait for COMMIT on journal writes
// --- mds ---
mds_cache_size: MDS_CACHE_SIZE,
@ -158,14 +159,13 @@ md_config_t g_conf = {
mds_decay_halflife: 30,
mds_beacon_interval: 5.0,
mds_beacon_grace: 100.0,
mds_beacon_grace: 10.0,
mds_log: true,
mds_log_max_len: MDS_CACHE_SIZE / 3,
mds_log_max_trimming: 10000,
mds_log_read_inc: 1<<20,
mds_log_pad_entry: 128,//256,//64,
mds_log_before_reply: true,
mds_log_flush_on_shutdown: true,
mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log
mds_log_eopen_size: 100, // # open inodes per log entry
@ -189,6 +189,7 @@ md_config_t g_conf = {
mds_bal_midchunk: .3, // any sub bigger than this taken in full
mds_bal_minchunk: .001, // never take anything smaller than this
mds_trim_on_rejoin: true,
mds_commit_on_shutdown: true,
mds_shutdown_check: 0, //30,
mds_shutdown_on_last_unmount: true,
@ -231,7 +232,7 @@ md_config_t g_conf = {
ebofs_cloneable: false,
ebofs_verify: false,
ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing)
ebofs_idle_commit_ms: 100, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
ebofs_oc_size: 10000, // onode cache
ebofs_cc_size: 10000, // cnode cache
ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB
@ -563,6 +564,9 @@ void parse_config_options(std::vector<char*>& args)
else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0)
g_conf.objecter_buffer_uncommitted = atoi(args[++i]);
else if (strcmp(args[i], "--journaler_safe") == 0)
g_conf.journaler_safe = atoi(args[++i]);
else if (strcmp(args[i], "--mds_cache_size") == 0)
g_conf.mds_cache_size = atoi(args[++i]);
@ -573,8 +577,6 @@ void parse_config_options(std::vector<char*>& args)
else if (strcmp(args[i], "--mds_log") == 0)
g_conf.mds_log = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log_before_reply") == 0)
g_conf.mds_log_before_reply = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log_max_len") == 0)
g_conf.mds_log_max_len = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log_read_inc") == 0)

View File

@ -151,6 +151,7 @@ struct md_config_t {
// journaler
bool journaler_allow_split_entries;
bool journaler_safe;
// mds
int mds_cache_size;
@ -166,7 +167,6 @@ struct md_config_t {
int mds_log_max_trimming;
int mds_log_read_inc;
int mds_log_pad_entry;
bool mds_log_before_reply;
bool mds_log_flush_on_shutdown;
off_t mds_log_import_map_interval;
int mds_log_eopen_size;
@ -190,9 +190,11 @@ struct md_config_t {
float mds_bal_midchunk;
float mds_bal_minchunk;
bool mds_trim_on_rejoin;
bool mds_commit_on_shutdown;
int mds_shutdown_check;
bool mds_shutdown_on_last_unmount;
bool mds_verify_export_dirauth; // debug flag
bool mds_local_osd;
@ -322,6 +324,8 @@ extern md_config_t g_debug_after_conf;
#define dout(x) if ((x) <= g_conf.debug) std::cout
#define dout2(x) if ((x) <= g_conf.debug) std::cout
#define pdout(x,p) if ((x) <= (p)) std::cout
/**
* for cleaner output, bracket each line with
* dbeginl (in the dout macro) and dendl (in place of endl).

View File

@ -1187,6 +1187,14 @@ void CDir::freeze_tree(Context *c)
void CDir::freeze_tree_finish(Context *c)
{
// still freezing? (we may have been canceled)
if (!is_freezing()) {
dout(10) << "freeze_tree_finish no longer freezing, done on " << *this << endl;
c->finish(-1);
delete c;
return;
}
// freezeable now?
if (!is_freezeable()) {
// wait again!
@ -1242,6 +1250,7 @@ void CDir::unfreeze_tree()
state_clear(STATE_FREEZINGTREE);
// cancel freeze waiters
finish_waiting(WAIT_UNFREEZE);
finish_waiting(WAIT_FREEZEABLE, -1);
}
}

View File

@ -61,16 +61,12 @@ ostream& operator<<(ostream& out, CInode& in)
out << " v" << in.get_version();
out << " auth=" << in.authlock;
out << " link=" << in.linklock;
out << " dft=" << in.dirfragtreelock;
out << " file=" << in.filelock;
out << " dir=" << in.dirlock;
if (in.get_num_ref()) {
out << " |";
in.print_pin_set(out);
}
// locks
out << " " << in.authlock;
out << " " << in.linklock;
out << " " << in.dirfragtreelock;
out << " " << in.filelock;
out << " " << in.dirlock;
// hack: spit out crap on which clients have caps
if (!in.get_client_caps().empty()) {
@ -83,6 +79,12 @@ ostream& operator<<(ostream& out, CInode& in)
}
out << "}";
}
if (in.get_num_ref()) {
out << " |";
in.print_pin_set(out);
}
out << " " << &in;
out << "]";
return out;

View File

@ -210,7 +210,7 @@ class FileLock : public SimpleLock {
void print(ostream& out) {
out << "(";
//out << get_lock_type_name(l.get_type()) << " ";
out << get_lock_type_name(get_type()) << " ";
out << get_filelock_state_name(get_state());
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
if (is_rdlocked())

View File

@ -60,6 +60,7 @@
void Locker::dispatch(Message *m)
{
switch (m->get_type()) {
// locking
@ -89,16 +90,20 @@ void Locker::send_lock_message(SimpleLock *lock, int msg)
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
it != lock->get_parent()->replicas_end();
it++) {
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
continue;
MLock *m = new MLock(lock, msg, mds->get_nodeid());
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
}
}
void Locker::send_lock_message(SimpleLock *lock, int msg, bufferlist &data)
void Locker::send_lock_message(SimpleLock *lock, int msg, const bufferlist &data)
{
for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
it != lock->get_parent()->replicas_end();
it++) {
if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN)
continue;
MLock *m = new MLock(lock, msg, mds->get_nodeid());
m->set_data(data);
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
@ -499,6 +504,8 @@ void Locker::request_inode_file_caps(CInode *in)
assert(!in->is_auth());
in->replica_caps_wanted = wanted;
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
mds->send_message_mds(new MInodeFileCaps(in->ino(), mds->get_nodeid(),
in->replica_caps_wanted),
auth, MDS_PORT_LOCKER);
@ -509,18 +516,23 @@ void Locker::request_inode_file_caps(CInode *in)
void Locker::handle_inode_file_caps(MInodeFileCaps *m)
{
// nobody should be talking to us during recovery.
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
// ok
CInode *in = mdcache->get_inode(m->get_ino());
assert(in);
assert(in->is_auth());// || in->is_proxy());
assert(in->is_auth());
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
/*if (in->is_proxy()) {
dout(7) << "proxy, fw" << endl;
mds->send_message_mds(m, in->authority().first, MDS_PORT_LOCKER);
if (mds->is_rejoin() &&
in->is_rejoining()) {
dout(7) << "handle_inode_file_caps still rejoining " << *in << ", dropping " << *m << endl;
delete m;
return;
}
*/
dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << endl;
if (m->get_caps())
in->mds_caps_wanted[m->get_from()] = m->get_caps();
@ -703,6 +715,9 @@ ALSO:
void Locker::handle_lock(MLock *m)
{
// nobody should be talking to us during recovery.
assert(mds->is_rejoin() || mds->is_active() || mds->is_stopping());
switch (m->get_otype()) {
case LOCK_OTYPE_DN:
{
@ -778,6 +793,15 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
{
int from = m->get_asker();
if (mds->is_rejoin()) {
if (lock->get_parent()->is_rejoining()) {
dout(7) << "handle_simple_lock still rejoining " << *lock->get_parent()
<< ", dropping " << *m << endl;
delete m;
return;
}
}
switch (m->get_action()) {
// -- replica --
case LOCK_AC_SYNC:
@ -796,15 +820,12 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock
<< " on " << *lock->get_parent() << endl;
lock->set_state(LOCK_GLOCKR);
lock->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
return;
}
} else {
// update lock and reply
lock->set_state(LOCK_LOCK);
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
from, MDS_PORT_LOCKER);
}
break;
@ -815,6 +836,7 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
MDRequest *mdr = mdcache->request_get(m->get_reqid());
mdr->xlocks.insert(lock);
mdr->locks.insert(lock);
lock->set_state(LOCK_REMOTEXLOCK);
lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
}
break;
@ -879,40 +901,68 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
}
class C_Locker_SimpleEval : public Context {
Locker *locker;
SimpleLock *lock;
public:
C_Locker_SimpleEval(Locker *l, SimpleLock *lk) : locker(l), lock(lk) {}
void finish(int r) {
locker->simple_eval(lock);
}
};
void Locker::simple_eval(SimpleLock *lock)
{
// finished gather?
if (lock->get_parent()->is_auth() &&
!lock->is_stable() &&
!lock->is_gathering()) {
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
switch (lock->get_state()) {
case LOCK_GLOCKR:
// unstable and ambiguous auth?
if (!lock->is_stable() &&
lock->get_parent()->is_ambiguous_auth()) {
dout(7) << "simple_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_SimpleEval(this, lock));
return;
}
// finished remote xlock?
if (lock->get_state() == LOCK_REMOTEXLOCK &&
!lock->is_xlocked()) {
// tell auth
assert(!lock->get_parent()->is_auth()); // should be auth_pinned on the auth
dout(7) << "simple_eval releasing remote xlock on " << *lock->get_parent() << endl;
int auth = lock->get_parent()->authority().first;
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
auth, MDS_PORT_LOCKER);
lock->set_state(LOCK_LOCK);
lock->finish_waiters(SimpleLock::WAIT_STABLE);
break;
default:
assert(0);
}
// finished gathering?
if (lock->get_state() == LOCK_GLOCKR &&
!lock->is_gathering() &&
!lock->is_rdlocked()) {
dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
// replica: tell auth
if (!lock->get_parent()->is_auth()) {
int auth = lock->get_parent()->authority().first;
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()),
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
}
if (!lock->is_stable()) return;
if (lock->get_parent()->is_auth()) {
lock->set_state(LOCK_LOCK);
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR);
}
// sync?
if (lock->get_state() != LOCK_SYNC &&
lock->get_parent()->is_replicated() &&
// stable -> sync?
if (lock->get_parent()->is_auth() &&
lock->is_stable() &&
lock->get_state() != LOCK_SYNC &&
!lock->is_waiter_for(SimpleLock::WAIT_WR)) {
dout(7) << "simple_eval stable, syncing " << *lock
<< " on " << *lock->get_parent() << endl;
simple_sync(lock);
}
} else {
// replica
}
}
@ -930,12 +980,15 @@ void Locker::simple_sync(SimpleLock *lock)
assert(0); // um... hmm!
assert(lock->get_state() == LOCK_LOCK);
// sync.
if (lock->get_parent()->is_replicated()) {
// hard data
bufferlist data;
lock->encode_locked_state(data);
// bcast to replicas
send_lock_message(lock, LOCK_AC_SYNC, data);
}
// change lock
lock->set_state(LOCK_SYNC);
@ -1015,11 +1068,9 @@ void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr)
dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
if (lock->get_state() == LOCK_GLOCKR &&
!lock->is_rdlocked()) {
lock->set_state(LOCK_SYNC); // return state to sync, in case the unpinner flails
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
}
// last one?
if (!lock->is_rdlocked())
simple_eval(lock);
}
bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
@ -1066,6 +1117,7 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
int auth = lock->get_parent()->authority().first;
// wait for sync.
// (???????????)
@ -1075,7 +1127,6 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
}
// send lock request
int auth = lock->get_parent()->authority().first;
MLock *m = new MLock(lock, LOCK_AC_REQXLOCK, mds->get_nodeid());
mds->send_message_mds(m, auth, MDS_PORT_LOCKER);
@ -1091,29 +1142,16 @@ void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr)
// drop ref
assert(lock->can_xlock(mdr));
lock->put_xlock();
assert(mdr);
mdr->xlocks.erase(lock);
mdr->locks.erase(lock);
dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
// slave?
if (!lock->get_parent()->is_auth()) {
mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()),
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
}
// others waiting?
if (lock->is_waiter_for(SimpleLock::WAIT_WR)) {
// wake 'em up
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
} else {
// auto-sync if alone.
if (lock->get_parent()->is_auth() &&
!lock->get_parent()->is_replicated() &&
lock->get_state() != LOCK_SYNC)
lock->set_state(LOCK_SYNC);
// eval
simple_eval(lock);
}
}
@ -1261,19 +1299,43 @@ void Locker::scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr)
scatter_eval(lock);
}
class C_Locker_ScatterEval : public Context {
Locker *locker;
ScatterLock *lock;
public:
C_Locker_ScatterEval(Locker *l, ScatterLock *lk) : locker(l), lock(lk) {}
void finish(int r) {
locker->scatter_eval(lock);
}
};
void Locker::scatter_eval(ScatterLock *lock)
{
// unstable and ambiguous auth?
if (!lock->is_stable() &&
lock->get_parent()->is_ambiguous_auth()) {
dout(7) << "scatter_eval not stable and ambiguous auth, waiting on " << *lock->get_parent() << endl;
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_Locker_ScatterEval(this, lock));
return;
}
if (!lock->get_parent()->is_auth()) {
// REPLICA
if (lock->get_state() == LOCK_GSYNCS &&
!lock->is_wrlocked()) {
dout(10) << "scatter_eval no wrlocks, acking sync" << endl;
int auth = lock->get_parent()->authority().first;
if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) {
bufferlist data;
lock->encode_locked_state(data);
mds->send_message_mds(new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid(), data),
lock->get_parent()->authority().first, MDS_PORT_LOCKER);
auth, MDS_PORT_LOCKER);
}
lock->set_state(LOCK_SYNC);
lock->finish_waiters(ScatterLock::WAIT_STABLE); // ?
}
} else {
@ -1286,7 +1348,7 @@ void Locker::scatter_eval(ScatterLock *lock)
dout(7) << "scatter_eval finished gather/un-wrlock on " << *lock
<< " on " << *lock->get_parent() << endl;
lock->set_state(LOCK_SYNC);
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_RD|SimpleLock::WAIT_NOLOCKS);
lock->finish_waiters(ScatterLock::WAIT_STABLE|ScatterLock::WAIT_RD);
}
// gscatters -> scatter?
@ -1301,13 +1363,15 @@ void Locker::scatter_eval(ScatterLock *lock)
}
lock->set_state(LOCK_SCATTER);
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
lock->get_wrlock();
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
lock->put_wrlock();
}
// waiting for rd?
if (lock->get_state() == LOCK_SCATTER &&
!lock->is_wrlocked() &&
lock->is_waiter_for(SimpleLock::WAIT_RD)) {
lock->is_waiter_for(ScatterLock::WAIT_RD)) {
dout(10) << "scatter_eval no wrlocks, read waiter, syncing" << endl;
scatter_sync(lock);
}
@ -1339,9 +1403,10 @@ void Locker::scatter_sync(ScatterLock *lock)
}
else if (lock->is_wrlocked()) {
lock->set_state(LOCK_GSYNCS);
} else {
}
else {
lock->set_state(LOCK_SYNC);
lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
lock->finish_waiters(ScatterLock::WAIT_RD|ScatterLock::WAIT_STABLE);
}
}
@ -1365,7 +1430,7 @@ void Locker::scatter_scatter(ScatterLock *lock)
send_lock_message(lock, LOCK_AC_SCATTER, data);
}
lock->set_state(LOCK_SCATTER);
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
}
}
@ -1375,6 +1440,15 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
{
int from = m->get_asker();
if (mds->is_rejoin()) {
if (lock->get_parent()->is_rejoining()) {
dout(7) << "handle_scatter_lock still rejoining " << *lock->get_parent()
<< ", dropping " << *m << endl;
delete m;
return;
}
}
switch (m->get_action()) {
// -- replica --
case LOCK_AC_SYNC:
@ -1398,7 +1472,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
assert(lock->get_state() == LOCK_SYNC);
lock->decode_locked_state(m->get_data());
lock->set_state(LOCK_SCATTER);
lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
lock->finish_waiters(ScatterLock::WAIT_WR|ScatterLock::WAIT_STABLE);
break;
// -- for auth --
@ -1416,7 +1490,7 @@ void Locker::handle_scatter_lock(ScatterLock *lock, MLock *m)
dout(7) << "handle_scatter_lock " << *lock << " on " << *lock->get_parent()
<< " from " << from << ", last one"
<< endl;
simple_eval(lock);
scatter_eval(lock);
}
break;
}
@ -1508,10 +1582,8 @@ void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr)
dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
if (!lock->is_rdlocked()) {
lock->finish_waiters(SimpleLock::WAIT_NOLOCKS);
if (!lock->is_rdlocked())
file_eval(lock);
}
}
@ -1569,8 +1641,11 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
assert(lock->get_parent()->is_auth()); // or implement remote xlocks
// drop lock?
if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
// others waiting?
lock->finish_waiters(SimpleLock::WAIT_WR, 0);
//// drop lock?
//if (!lock->is_waiter_for(SimpleLock::WAIT_STABLE))
file_eval(lock);
}
@ -1582,11 +1657,31 @@ void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
* - checks if we're in unstable sfot state and can now move on to next state
* - checks if soft state should change (eg bc last writer closed)
*/
class C_Locker_FileEval : public Context {
Locker *locker;
FileLock *lock;
public:
C_Locker_FileEval(Locker *l, FileLock *lk) : locker(l), lock(lk) {}
void finish(int r) {
locker->file_eval(lock);
}
};
void Locker::file_eval(FileLock *lock)
{
CInode *in = (CInode*)lock->get_parent();
// unstable and ambiguous auth?
if (!lock->is_stable() &&
in->is_ambiguous_auth()) {
dout(7) << "file_eval not stable and ambiguous auth, waiting on " << *in << endl;
//if (!lock->get_parent()->is_waiter(MDSCacheObject::WAIT_SINGLEAUTH))
in->add_waiter(CInode::WAIT_SINGLEAUTH, new C_Locker_FileEval(this, lock));
return;
}
int issued = in->get_caps_issued();
// [auth] finished gather?
@ -1605,7 +1700,7 @@ void Locker::file_eval(FileLock *lock)
// waiters
lock->get_rdlock();
lock->finish_waiters(SimpleLock::WAIT_STABLE);
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR|SimpleLock::WAIT_RD);
lock->put_rdlock();
}
break;
@ -1789,19 +1884,15 @@ bool Locker::file_sync(FileLock *lock)
if (lock->get_state() == LOCK_LOCK) {
if (in->is_replicated()) {
// soft data
bufferlist softdata;
lock->encode_locked_state(softdata);
// bcast to replicas
send_lock_message(lock, LOCK_AC_SYNC, softdata);
}
// change lock
lock->set_state(LOCK_SYNC);
// reissue caps
issue_caps(in);
issue_caps(in); // reissue caps
return true;
}
@ -1813,10 +1904,10 @@ bool Locker::file_sync(FileLock *lock)
issue_caps(in);
} else {
// no writers, go straight to sync
if (in->is_replicated()) {
// bcast to replicas
send_lock_message(lock, LOCK_AC_SYNC);
bufferlist softdata;
lock->encode_locked_state(softdata);
send_lock_message(lock, LOCK_AC_SYNC, softdata);
}
// change lock
@ -1834,8 +1925,9 @@ bool Locker::file_sync(FileLock *lock)
} else {
// no writers, go straight to sync
if (in->is_replicated()) {
// bcast to replicas
send_lock_message(lock, LOCK_AC_SYNC);
bufferlist softdata;
lock->encode_locked_state(softdata);
send_lock_message(lock, LOCK_AC_SYNC, softdata);
}
// change lock
@ -2070,6 +2162,16 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
CInode *in = (CInode*)lock->get_parent();
int from = m->get_asker();
if (mds->is_rejoin()) {
if (in->is_rejoining()) {
dout(7) << "handle_file_lock still rejoining " << *in
<< ", dropping " << *m << endl;
delete m;
return;
}
}
dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
<< *in << " filelock=" << *lock << endl;
@ -2104,12 +2206,11 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m)
}
if (lock->is_rdlocked()) {
dout(7) << "handle_file_lock rdlocked, waiting before ack on " << *in << endl;
in->add_waiter(SimpleLock::WAIT_NOLOCKS, new C_MDS_RetryMessage(mds, m));
lock->set_state(LOCK_GLOCKR);
assert(0);// i am broken.. why retry message when state captures all the info i need?
return;
break;
}
if (issued & CAP_FILE_RD) {
dout(7) << "handle_file_lock RD cap issued, waiting before ack on " << *in << endl;
lock->set_state(LOCK_GLOCKR);
break;
}

View File

@ -60,7 +60,7 @@ private:
void handle_lock(MLock *m);
void send_lock_message(SimpleLock *lock, int msg);
void send_lock_message(SimpleLock *lock, int msg, bufferlist &data);
void send_lock_message(SimpleLock *lock, int msg, const bufferlist &data);
// -- locks --
bool acquire_locks(MDRequest *mdr,

View File

@ -706,13 +706,22 @@ void MDBalancer::find_exports(CDir *dir,
void MDBalancer::hit_inode(CInode *in, int type)
{
// hit me
in->popularity[MDS_POP_JUSTME].pop[type].hit();
in->popularity[MDS_POP_NESTED].pop[type].hit();
float me = in->popularity[MDS_POP_JUSTME].pop[type].hit();
float nested = in->popularity[MDS_POP_NESTED].pop[type].hit();
float curdom = 0;
float anydom = 0;
if (in->is_auth()) {
in->popularity[MDS_POP_CURDOM].pop[type].hit();
in->popularity[MDS_POP_ANYDOM].pop[type].hit();
curdom = in->popularity[MDS_POP_CURDOM].pop[type].hit();
anydom = in->popularity[MDS_POP_ANYDOM].pop[type].hit();
}
dout(20) << "hit_inode " << type << " pop " << me << " me, "
<< nested << " nested, "
<< curdom << " curdom, "
<< anydom << " anydom"
<< " on " << *in
<< endl;
// hit auth up to import
CDir *dir = in->get_parent_dir();
if (dir) hit_dir(dir, type);
@ -728,7 +737,8 @@ void MDBalancer::hit_dir(CDir *dir, int type)
if (g_conf.num_mds > 2 && // FIXME >2 thing
!dir->inode->is_root() && // not root (for now at least)
dir->is_auth()) {
//dout(-20) << "hit_dir " << type << " pop is " << v << " " << *dir << endl;
dout(20) << "hit_dir " << type << " pop " << v << " me "
<< *dir << endl;
// hash this dir? (later?)
if (((v > g_conf.mds_bal_hash_rd && type == META_POP_IRD) ||
@ -756,6 +766,8 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
// replicate?
float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
if (dir->is_auth()) {
if (!dir->is_rep() &&
dir_pop >= g_conf.mds_bal_replicate_threshold) {
@ -764,7 +776,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
rd_adj /= 2.0; // temper somewhat
dout(1) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
dout(2) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
dir->dir_rep = CDir::REP_ALL;
mds->mdcache->send_dir_updates(dir, true);
@ -777,7 +789,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type)
dir->is_rep() &&
dir_pop < g_conf.mds_bal_unreplicate_threshold) {
// unreplicate
dout(1) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
dout(2) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
dir->dir_rep = CDir::REP_NONE;
mds->mdcache->send_dir_updates(dir);

View File

@ -42,6 +42,7 @@
#include "events/ESlaveUpdate.h"
#include "events/EString.h"
#include "events/EPurgeFinish.h"
#include "events/EImportFinish.h"
#include "messages/MGenericMessage.h"
@ -1200,9 +1201,11 @@ void MDCache::disambiguate_imports()
if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
cancel_ambiguous_import(q->first);
mds->mdlog->submit_entry(new EImportFinish(dir, false));
} else {
dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
finish_ambiguous_import(q->first);
mds->mdlog->submit_entry(new EImportFinish(dir, true));
}
}
assert(my_ambiguous_imports.empty());
@ -1262,50 +1265,71 @@ void MDCache::finish_ambiguous_import(dirfrag_t df)
}
/*
/** recalc_auth_bits()
* once subtree auth is disambiguated, we need to adjust all the
* auth (and dirty) bits in our cache before moving on.
* auth and dirty bits in our cache before moving on.
*/
void MDCache::recalc_auth_bits()
{
dout(7) << "recalc_auth_bits" << endl;
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
p != inode_map.end();
for (map<CDir*,set<CDir*> >::iterator p = subtrees.begin();
p != subtrees.end();
++p) {
CInode *in = p->second;
if (in->authority().first == mds->get_nodeid())
in->state_set(CInode::STATE_AUTH);
else {
in->state_clear(CInode::STATE_AUTH);
if (in->is_dirty())
in->mark_clean();
}
list<CDir*> dfq; // dirfrag queue
dfq.push_back(p->first);
if (in->parent) {
if (in->parent->authority().first == mds->get_nodeid())
in->parent->state_set(CDentry::STATE_AUTH);
else {
in->parent->state_clear(CDentry::STATE_AUTH);
if (in->parent->is_dirty())
in->parent->mark_clean();
}
}
bool auth = p->first->authority().first == mds->get_nodeid();
dout(10) << " subtree auth=" << auth << " for " << *p->first << endl;
list<CDir*> ls;
for (list<CDir*>::iterator p = ls.begin();
p != ls.end();
++p) {
CDir *dir = *p;
if (dir->authority().first == mds->get_nodeid())
while (!dfq.empty()) {
CDir *dir = dfq.front();
dfq.pop_front();
// dir
if (auth)
dir->state_set(CDir::STATE_AUTH);
else {
dir->state_set(CDir::STATE_REJOINING);
dir->state_clear(CDir::STATE_AUTH);
if (dir->is_dirty())
dir->mark_clean();
}
// dentries in this dir
for (map<string,CDentry*>::iterator q = dir->items.begin();
q != dir->items.end();
++q) {
// dn
CDentry *dn = q->second;
if (auth)
dn->state_set(CDentry::STATE_AUTH);
else {
dn->state_set(CDentry::STATE_REJOINING);
dn->state_clear(CDentry::STATE_AUTH);
if (dn->is_dirty())
dn->mark_clean();
}
if (dn->is_primary()) {
// inode
if (auth)
dn->inode->state_set(CInode::STATE_AUTH);
else {
dn->inode->state_set(CInode::STATE_REJOINING);
dn->inode->state_clear(CInode::STATE_AUTH);
if (dn->inode->is_dirty())
dn->inode->mark_clean();
}
// recurse?
if (dn->inode->is_dir())
dn->inode->get_nested_dirfrags(dfq);
}
}
}
}
show_subtrees();
show_cache();
}
@ -1410,7 +1434,8 @@ void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
in->authlock.get_state(),
in->linklock.get_state(),
in->dirfragtreelock.get_state(),
in->filelock.get_state());
in->filelock.get_state(),
in->dirlock.get_state());
if (in->authlock.is_xlocked())
rejoin->add_inode_xlock(in->ino(), in->authlock.get_type(),
in->authlock.get_xlocked_by()->reqid);
@ -1489,7 +1514,7 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
if (mds->is_active() || mds->is_stopping()) {
dout(10) << "i am active. removing stale cache replicas" << endl;
// first, scour cache of replica references
// first, scour cache of unmentioned replica references
for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
p != inode_map.end();
++p) {
@ -1548,6 +1573,7 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
if (dn) {
int nonce = dn->add_replica(from);
dout(10) << " have " << *dn << endl;
if (ack)
ack->add_strong_dentry(*p, *q, dn->lock.get_state(), nonce);
} else {
dout(10) << " missing " << *p << " " << *q << endl;
@ -1578,19 +1604,22 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
if (in) {
int nonce = in->add_replica(from);
in->mds_caps_wanted.erase(from);
in->authlock.remove_gather(from); // just in case
in->linklock.remove_gather(from); // just in case
in->dirfragtreelock.remove_gather(from); // just in case
in->filelock.remove_gather(from); // just in case
dout(10) << " have (weak) " << *in << endl;
if (ack)
if (ack) {
in->authlock.remove_gather(from);
in->linklock.remove_gather(from);
in->dirfragtreelock.remove_gather(from);
in->filelock.remove_gather(from);
in->dirlock.remove_gather(from);
ack->add_strong_inode(in->ino(),
nonce,
0,
in->authlock.get_replica_state(),
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state());
in->filelock.get_replica_state(),
in->dirlock.get_replica_state());
}
} else {
dout(10) << " missing " << *p << endl;
if (!missing) missing = new MMDSCacheRejoin(MMDSCacheRejoin::OP_MISSING);
@ -1609,23 +1638,34 @@ void MDCache::handle_cache_rejoin_rejoin(MMDSCacheRejoin *m)
in->mds_caps_wanted[from] = p->second.caps_wanted;
else
in->mds_caps_wanted.erase(from);
in->authlock.remove_gather(from); // just in case
in->linklock.remove_gather(from); // just in case
in->dirfragtreelock.remove_gather(from); // just in case
in->filelock.remove_gather(from); // just in case
dout(10) << " have (strong) " << *in << endl;
if (ack) {
// i had inode, just tell replica the correct state
in->authlock.remove_gather(from);
in->linklock.remove_gather(from);
in->dirfragtreelock.remove_gather(from);
in->filelock.remove_gather(from);
in->dirlock.remove_gather(from);
ack->add_strong_inode(in->ino(),
nonce,
0,
in->authlock.get_replica_state(),
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state());
in->filelock.get_replica_state(),
in->dirlock.get_replica_state());
} else {
// note strong replica filelock state requests
//if (p->second.filelock & CAP_FILE_RD)
//filelock_replica_readers.insert(in);
// take note of replica state values.
// SimpleLock --
// we can ignore; locked replicas can be safely changed to sync.
// FileLock --
// we can also ignore.
// replicas will at most issue RDCACHE|RD, which is covered by the default SYNC,
// so only _locally_ opened files are significant.
// ScatterLock -- adjust accordingly
if (p->second.dirlock == LOCK_SCATTER ||
p->second.dirlock == LOCK_GSCATTERS) // replica still has rdlocks
in->dirlock.set_state(LOCK_SCATTER);
}
} else {
dout(10) << " missing " << p->first << endl;
@ -1711,6 +1751,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
assert(dir);
dir->set_replica_nonce(p->second.nonce);
dir->state_clear(CDir::STATE_REJOINING);
dout(10) << " got " << *dir << endl;
// dentries
@ -1721,6 +1762,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
assert(dn);
dn->set_replica_nonce(q->second.nonce);
dn->lock.set_state(q->second.lock);
dn->state_clear(CDentry::STATE_REJOINING);
dout(10) << " got " << *dn << endl;
}
}
@ -1736,6 +1778,8 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *m)
in->linklock.set_state(p->second.linklock);
in->dirfragtreelock.set_state(p->second.dirfragtreelock);
in->filelock.set_state(p->second.filelock);
in->dirlock.set_state(p->second.dirlock);
in->state_clear(CInode::STATE_REJOINING);
dout(10) << " got " << *in << endl;
}
@ -1767,7 +1811,11 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
p != m->weak_dirfrags.end();
++p) {
CDir *dir = get_dirfrag(*p);
assert(dir);
if (!dir) {
dout(10) << " don't have dirfrag " << *p << endl;
continue; // we must have trimmed it after the original rejoin
}
dout(10) << " sending " << *dir << endl;
// dentries
@ -1775,7 +1823,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
q != m->weak_dentries[*p].end();
++q) {
CDentry *dn = dir->lookup(*q);
assert(dn);
if (!dn) {
dout(10) << " don't have dentry " << *q << " in " << *dir << endl;
continue; // we must have trimmed it after our original rejoin
}
dout(10) << " sending " << *dn << endl;
if (mds->is_rejoin())
full->add_weak_dentry(*p, *q);
@ -1789,7 +1840,10 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
p != m->weak_inodes.end();
++p) {
CInode *in = get_inode(*p);
assert(in);
if (!in) {
dout(10) << " don't have inode " << *p << endl;
continue; // we must have trimmed it after the originalo rejoin
}
dout(10) << " sending " << *in << endl;
full->add_full_inode(in->inode, in->symlink, in->dirfragtree);
@ -1802,7 +1856,8 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
in->authlock.get_replica_state(),
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state());
in->filelock.get_replica_state(),
in->dirlock.get_replica_state());
}
mds->send_message_mds(full, m->get_source().num(), MDS_PORT_CACHE);
@ -1810,7 +1865,13 @@ void MDCache::handle_cache_rejoin_missing(MMDSCacheRejoin *m)
void MDCache::handle_cache_rejoin_full(MMDSCacheRejoin *m)
{
dout(7) << "handle_cache_rejoin_full from " << m->get_source() << endl;
assert(0); // write me
delete m;
}
void MDCache::send_cache_rejoin_acks()
@ -1893,7 +1954,8 @@ void MDCache::send_cache_rejoin_acks()
in->authlock.get_replica_state(),
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state());
in->filelock.get_replica_state(),
in->dirlock.get_replica_state());
}
// subdirs in this subtree?
@ -1914,6 +1976,7 @@ void MDCache::send_cache_rejoin_acks()
// ===============================================================================
/*
void MDCache::rename_file(CDentry *srcdn,
CDentry *destdn)
{
@ -1928,7 +1991,7 @@ void MDCache::rename_file(CDentry *srcdn,
// link inode w/ dentry
destdn->dir->link_inode( destdn, in );
}
*/
void MDCache::set_root(CInode *in)

View File

@ -441,6 +441,7 @@ void MDS::beacon_kill(utime_t lab)
void MDS::handle_mds_map(MMDSMap *m)
{
version_t hadepoch = mdsmap->get_epoch();
version_t epoch = m->get_epoch();
dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << endl;
@ -671,6 +672,12 @@ void MDS::handle_mds_map(MMDSMap *m)
}
*/
// just got mdsmap+osdmap?
if (hadepoch == 0 &&
mdsmap->get_epoch() > 0 &&
osdmap->get_epoch() > 0)
boot();
delete m;
}
@ -691,14 +698,22 @@ void MDS::bcast_mds_map()
void MDS::handle_osd_map(MOSDMap *m)
{
version_t had = osdmap->get_epoch();
version_t hadepoch = osdmap->get_epoch();
dout(10) << "handle_osd_map had " << hadepoch << endl;
dout(10) << "handle_osd_map had " << had << endl;
// process locally
// process
objecter->handle_osd_map(m);
if (had == 0 && osdmap->get_epoch() > 0) {
// just got mdsmap+osdmap?
if (hadepoch == 0 &&
osdmap->get_epoch() > 0 &&
mdsmap->get_epoch() > 0)
boot();
}
void MDS::boot()
{
if (is_creating())
boot_create(); // new tables, journal
else if (is_starting())
@ -706,9 +721,7 @@ void MDS::handle_osd_map(MOSDMap *m)
else if (is_replay())
boot_replay(); // replay, join
else
assert(is_standby());
}
assert(0);
}
@ -1050,6 +1063,7 @@ void MDS::my_dispatch(Message *m)
// hack: thrash exports
for (int i=0; i<g_conf.mds_thrash_exports; i++) {
set<int> s;
if (!is_active()) break;
mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
if (s.size() < 2 || mdcache->get_num_inodes() < 10)
break; // need peers for this to work.

View File

@ -189,6 +189,7 @@ class MDS : public Dispatcher {
int init(bool standby=false);
void reopen_logger();
void boot();
void boot_create(); // i am new mds.
void boot_start(); // i am old but empty (was down:out) mds.
void boot_replay(int step=0); // i am recovering existing (down:failed) mds.

View File

@ -84,7 +84,7 @@ public:
void print(ostream& out) {
out << "(";
//out << get_lock_type_name(l.get_type()) << " ";
out << get_lock_type_name(get_type()) << " ";
out << get_scatterlock_state_name(get_state());
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
if (is_rdlocked())

View File

@ -891,6 +891,54 @@ CDir* Server::try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr)
}
*/
/** predirty_dn_diri
* predirty the directory inode for a new dentry, if it is auth (and not root)
* BUG: root inode doesn't get dirtied properly, currently. blech.
*/
version_t Server::predirty_dn_diri(CDentry *dn, EMetaBlob *blob, utime_t mtime)
{
version_t dirpv = 0;
CInode *diri = dn->dir->inode;
if (diri->is_auth() && !diri->is_root()) {
dirpv = diri->pre_dirty();
inode_t *pi = blob->add_primary_dentry(diri->get_parent_dn(), true);
pi->version = dirpv;
pi->ctime = pi->mtime = mtime;
dout(10) << "predirty_dn_diri ctime/mtime " << mtime << " pv " << dirpv << " on " << *diri << endl;
}
return dirpv;
}
/** dirty_dn_diri
* follow-up with actual dirty of inode after journal entry commits.
*/
void Server::dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime)
{
CInode *diri = dn->dir->inode;
// make the udpate
diri->inode.ctime = diri->inode.mtime = mtime;
if (diri->is_auth() && !diri->is_root()) {
// we're auth.
diri->mark_dirty(dirpv);
dout(10) << "dirty_dn_diri ctime/mtime " << mtime << " v " << diri->inode.version << " on " << *diri << endl;
} else {
// we're not auth. dirlock scatterlock will propagate the update.
}
}
// ===============================================================================
// STAT
@ -1238,10 +1286,11 @@ class C_MDS_mknod_finish : public Context {
CDentry *dn;
CInode *newi;
version_t pv;
version_t dirpv;
public:
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) :
C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni, version_t dirpv_) :
mds(m), mdr(r), dn(d), newi(ni),
pv(d->get_projected_version()) {}
pv(d->get_projected_version()), dirpv(dirpv_) {}
void finish(int r) {
assert(r == 0);
@ -1252,8 +1301,7 @@ public:
newi->mark_dirty(pv);
// dir inode's mtime
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
newi->inode.ctime);
mds->server->dirty_dn_diri(dn, dirpv, newi->inode.ctime);
// hit pop
mds->balancer->hit_inode(newi, META_POP_IWR);
@ -1265,6 +1313,8 @@ public:
}
};
void Server::handle_client_mknod(MDRequest *mdr)
{
MClientRequest *req = mdr->client_request();
@ -1282,14 +1332,17 @@ void Server::handle_client_mknod(MDRequest *mdr)
newi->inode.mode |= INODE_MODE_FILE;
// prepare finisher
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("mknod");
le->metablob.add_client_req(req->get_reqid());
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
// log + wait
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
}
@ -1322,15 +1375,16 @@ void Server::handle_client_mkdir(MDRequest *mdr)
newdir->mark_dirty(newdir->pre_dirty());
// prepare finisher
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("mkdir");
le->metablob.add_client_req(req->get_reqid());
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
le->metablob.add_dir(newdir, true);
// log + wait
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
@ -1370,14 +1424,15 @@ void Server::handle_client_symlink(MDRequest *mdr)
newi->symlink = req->get_sarg();
// prepare finisher
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("symlink");
le->metablob.add_client_req(req->get_reqid());
version_t dirpv = predirty_dn_diri(dn, &le->metablob, newi->inode.ctime); // dir mtime too
le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
// log + wait
C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv);
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
}
@ -1490,15 +1545,17 @@ class C_MDS_link_local_finish : public Context {
version_t dpv;
utime_t tctime;
version_t tpv;
version_t dirpv;
public:
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, utime_t ct) :
C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, version_t dirpv_, utime_t ct) :
mds(m), mdr(r), dn(d), targeti(ti),
dpv(d->get_projected_version()),
tctime(ct),
tpv(targeti->get_parent_dn()->get_projected_version()) {}
tpv(targeti->get_parent_dn()->get_projected_version()),
dirpv(dirpv_) { }
void finish(int r) {
assert(r == 0);
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv);
mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv, dirpv);
}
};
@ -1517,6 +1574,8 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
version_t tpdv = targeti->pre_dirty();
// add to event
utime_t now = g_clock.real_now();
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now); // dir inode's mtime
le->metablob.add_dir_context(dn->get_dir());
le->metablob.add_remote_dentry(dn, true, targeti->ino()); // new remote
le->metablob.add_dir_context(targeti->get_parent_dir());
@ -1524,11 +1583,11 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
// update journaled target inode
pi->nlink++;
pi->ctime = g_clock.real_now();
pi->ctime = now;
pi->version = tpdv;
// finisher
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, pi->ctime);
C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, dirpv, now);
// log + wait
mdlog->submit_entry(le);
@ -1536,7 +1595,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
}
void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
version_t dpv, utime_t tctime, version_t tpv)
version_t dpv, utime_t tctime, version_t tpv, version_t dirpv)
{
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl;
@ -1551,8 +1610,7 @@ void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
targeti->mark_dirty(tpv);
// dir inode's mtime
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
tctime);
dirty_dn_diri(dn, dirpv, tctime);
// bump target popularity
mds->balancer->hit_inode(targeti, META_POP_IWR);
@ -1738,16 +1796,17 @@ class C_MDS_unlink_local_finish : public Context {
CDentry *straydn;
version_t ipv; // referred inode
utime_t ictime;
version_t dpv; // deleted dentry
version_t dnpv; // deleted dentry
version_t dirpv;
public:
C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd,
version_t v, utime_t ct) :
version_t v, version_t dirpv_, utime_t ct) :
mds(m), mdr(r), dn(d), straydn(sd),
ipv(v), ictime(ct),
dpv(d->get_projected_version()) { }
dnpv(d->get_projected_version()), dirpv(dirpv_) { }
void finish(int r) {
assert(r == 0);
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dpv);
mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dnpv, dirpv);
}
};
@ -1790,18 +1849,20 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
}
// the unlinked dentry
utime_t now = g_clock.real_now();
dn->pre_dirty();
version_t dirpv = predirty_dn_diri(dn, &le->metablob, now);
le->metablob.add_dir_context(dn->get_dir());
le->metablob.add_null_dentry(dn, true);
// update journaled target inode
pi->nlink--;
pi->ctime = g_clock.real_now();
pi->ctime = now;
pi->version = ipv;
// finisher
C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
ipv, pi->ctime);
ipv, dirpv, now);
journal_opens(); // journal pending opens, just in case
@ -1814,7 +1875,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
void Server::_unlink_local_finish(MDRequest *mdr,
CDentry *dn, CDentry *straydn,
version_t ipv, utime_t ictime, version_t dpv)
version_t ipv, utime_t ictime, version_t dnpv, version_t dirpv)
{
dout(10) << "_unlink_local " << *dn << endl;
@ -1829,11 +1890,10 @@ void Server::_unlink_local_finish(MDRequest *mdr,
in->inode.ctime = ictime;
in->inode.nlink--;
in->mark_dirty(ipv); // dirty inode
dn->mark_dirty(dpv); // dirty old dentry
dn->mark_dirty(dnpv); // dirty old dentry
// dir inode's mtime
dn->get_dir()->get_inode()->inode.mtime = MAX(dn->get_dir()->get_inode()->inode.mtime,
ictime);
dirty_dn_diri(dn, dirpv, ictime);
// share unlink news with replicas
for (map<int,int>::iterator it = dn->replicas_begin();
@ -2147,25 +2207,27 @@ class C_MDS_rename_local_finish : public Context {
version_t straypv;
version_t destpv;
version_t srcpv;
version_t ddirpv, sdirpv;
utime_t ictime;
public:
version_t atid1;
version_t atid2;
C_MDS_rename_local_finish(MDS *m, MDRequest *r,
CDentry *sdn, CDentry *ddn, CDentry *stdn,
version_t v, utime_t ct) :
version_t v, version_t ddirpv_, version_t sdirpv_, utime_t ct) :
mds(m), mdr(r),
srcdn(sdn), destdn(ddn), straydn(stdn),
ipv(v),
straypv(straydn ? straydn->get_projected_version():0),
destpv(destdn->get_projected_version()),
srcpv(srcdn->get_projected_version()),
ddirpv(ddirpv_), sdirpv(sdirpv_),
ictime(ct),
atid1(0), atid2(0) { }
void finish(int r) {
assert(r == 0);
mds->server->_rename_local_finish(mdr, srcdn, destdn, straydn,
srcpv, destpv, straypv, ipv, ictime,
srcpv, destpv, straypv, ipv, ddirpv, sdirpv, ictime,
atid1, atid2);
}
};
@ -2194,6 +2256,8 @@ void Server::_rename_local(MDRequest *mdr,
EUpdate *le = new EUpdate("rename_local");
le->metablob.add_client_req(mdr->reqid);
utime_t now = g_clock.real_now();
CDentry *straydn = 0;
inode_t *pi = 0;
version_t ipv = 0;
@ -2204,6 +2268,11 @@ void Server::_rename_local(MDRequest *mdr,
// primary+remote link merge?
bool linkmerge = (srcdn->inode == destdn->inode &&
(srcdn->is_primary() || destdn->is_primary()));
// dir mtimes
version_t ddirpv = predirty_dn_diri(destdn, &le->metablob, now);
version_t sdirpv = predirty_dn_diri(srcdn, &le->metablob, now);
if (linkmerge) {
dout(10) << "will merge remote+primary links" << endl;
@ -2300,13 +2369,13 @@ void Server::_rename_local(MDRequest *mdr,
if (pi) {
// update journaled target inode
pi->nlink--;
pi->ctime = g_clock.real_now();
pi->ctime = now;
pi->version = ipv;
}
C_MDS_rename_local_finish *fin = new C_MDS_rename_local_finish(mds, mdr,
srcdn, destdn, straydn,
ipv, pi ? pi->ctime:utime_t());
ipv, ddirpv, sdirpv, now);
journal_opens(); // journal pending opens, just in case
@ -2340,6 +2409,7 @@ void Server::_rename_local_reanchored(LogEvent *le, C_MDS_rename_local_finish *f
void Server::_rename_local_finish(MDRequest *mdr,
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
version_t ddirpv, version_t sdirpv,
utime_t ictime,
version_t atid1, version_t atid2)
{
@ -2352,8 +2422,13 @@ void Server::_rename_local_finish(MDRequest *mdr,
bool linkmerge = (srcdn->inode == destdn->inode &&
(srcdn->is_primary() || destdn->is_primary()));
// dir mtimes
dirty_dn_diri(destdn, ddirpv, ictime);
dirty_dn_diri(srcdn, sdirpv, ictime);
if (linkmerge) {
assert(ipv);
if (destdn->is_primary()) {
dout(10) << "merging remote onto primary link" << endl;
@ -2755,6 +2830,10 @@ void Server::_do_open(MDRequest *mdr, CInode *cur)
<< " on " << *cur << endl;
// hit pop
if (cmode == FILE_MODE_RW ||
cmode == FILE_MODE_W)
mds->balancer->hit_inode(cur, META_POP_IWR);
else
mds->balancer->hit_inode(cur, META_POP_IRD);
// reply

View File

@ -70,6 +70,9 @@ public:
CDir* try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr);
//CDir* try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr);
version_t predirty_dn_diri(CDentry *dn, class EMetaBlob *blob, utime_t mtime);
void dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime);
// requests on existing inodes.
void handle_client_stat(MDRequest *mdr);
void handle_client_utime(MDRequest *mdr);
@ -108,7 +111,7 @@ public:
void _link_local(MDRequest *mdr, CDentry *dn, CInode *targeti);
void _link_local_finish(MDRequest *mdr,
CDentry *dn, CInode *targeti,
version_t, utime_t, version_t);
version_t, utime_t, version_t, version_t);
void _link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti);
// unlink
@ -117,7 +120,7 @@ public:
void _unlink_local(MDRequest *mdr, CDentry *dn);
void _unlink_local_finish(MDRequest *mdr,
CDentry *dn, CDentry *straydn,
version_t, utime_t, version_t);
version_t, utime_t, version_t, version_t);
void _unlink_remote(MDRequest *mdr, CDentry *dn);
// rename
@ -134,7 +137,7 @@ public:
void _rename_local_finish(MDRequest *mdr,
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
utime_t ictime,
version_t ddirpv, version_t sdirpv, utime_t ictime,
version_t atid1, version_t atid2);
};

View File

@ -30,12 +30,12 @@
inline const char *get_lock_type_name(int t) {
switch (t) {
case LOCK_OTYPE_DN: return "dentry";
case LOCK_OTYPE_IFILE: return "inode_file";
case LOCK_OTYPE_IAUTH: return "inode_auth";
case LOCK_OTYPE_ILINK: return "inode_link";
case LOCK_OTYPE_IDIRFRAGTREE: return "inode_dirfragtree";
case LOCK_OTYPE_IDIR: return "inode_dir";
case LOCK_OTYPE_DN: return "dn";
case LOCK_OTYPE_IFILE: return "ifile";
case LOCK_OTYPE_IAUTH: return "iauth";
case LOCK_OTYPE_ILINK: return "ilink";
case LOCK_OTYPE_IDIRFRAGTREE: return "idft";
case LOCK_OTYPE_IDIR: return "idir";
default: assert(0);
}
}
@ -46,13 +46,15 @@ inline const char *get_lock_type_name(int t) {
#define LOCK_SYNC 1 // AR R . R .
#define LOCK_LOCK 2 // AR R W . .
#define LOCK_GLOCKR -3 // AR R . . .
#define LOCK_REMOTEXLOCK -50 // on NON-auth
inline const char *get_simplelock_state_name(int n) {
switch (n) {
case LOCK_UNDEF: return "undef";
case LOCK_UNDEF: return "UNDEF";
case LOCK_SYNC: return "sync";
case LOCK_LOCK: return "lock";
case LOCK_GLOCKR: return "glockr";
case LOCK_REMOTEXLOCK: return "remote_xlock";
default: assert(0);
}
}
@ -63,8 +65,7 @@ class SimpleLock {
public:
static const int WAIT_RD = (1<<0); // to read
static const int WAIT_WR = (1<<1); // to write
static const int WAIT_NOLOCKS = (1<<2); // for last rdlock to finish
//static const int WAIT_LOCK = (1<<3); // for locked state
static const int WAIT_SINGLEAUTH = (1<<2);
static const int WAIT_STABLE = (1<<3); // for a stable state
static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock
static const int WAIT_BITS = 5;
@ -248,7 +249,7 @@ public:
virtual void print(ostream& out) {
out << "(";
//out << get_lock_type_name(l.get_type()) << " ";
out << get_lock_type_name(get_type()) << " ";
out << get_simplelock_state_name(get_state());
if (!get_gather_set().empty()) out << " g=" << get_gather_set();
if (is_rdlocked())

View File

@ -40,7 +40,7 @@ public:
set<dirfrag_t> &get_bounds() { return bounds; }
void print(ostream& out) {
out << "export " << base << " " << metablob;
out << "EExport " << base << " " << metablob;
}
virtual void encode_payload(bufferlist& bl) {

View File

@ -33,7 +33,7 @@ class EImportFinish : public LogEvent {
EImportFinish() : LogEvent(EVENT_IMPORTFINISH) { }
void print(ostream& out) {
out << "import_finish " << base;
out << "EImportFinish " << base;
if (success)
out << " success";
else

View File

@ -294,6 +294,7 @@ class MDSCacheObject {
// -- state --
const static int STATE_AUTH = (1<<30);
const static int STATE_DIRTY = (1<<29);
const static int STATE_REJOINING = (1<<28); // replica has not joined w/ primary copy
// -- wait --
const static int WAIT_SINGLEAUTH = (1<<30);
@ -327,8 +328,9 @@ class MDSCacheObject {
void state_reset(unsigned s) { state = s; }
bool is_auth() { return state_test(STATE_AUTH); }
bool is_dirty() { return state & STATE_DIRTY; }
bool is_dirty() { return state_test(STATE_DIRTY); }
bool is_clean() { return !is_dirty(); }
bool is_rejoining() { return state_test(STATE_REJOINING); }
// --------------------------------------------
// authority
@ -457,7 +459,7 @@ protected:
if (waiting.empty())
get(PIN_WAITER);
waiting.insert(pair<int,Context*>(mask, c));
dout(10) << (mdsco_db_line_prefix(this))
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "add_waiter " << mask << " " << c
<< " on " << *this
<< endl;
@ -469,14 +471,14 @@ protected:
while (it != waiting.end()) {
if (it->first & mask) {
ls.push_back(it->second);
dout(10) << (mdsco_db_line_prefix(this))
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "take_waiting mask " << mask << " took " << it->second
<< " tag " << it->first
<< " on " << *this
<< endl;
waiting.erase(it++);
} else {
dout(10) << "take_waiting mask " << mask << " SKIPPING " << it->second
pdout(10,g_conf.debug_mds) << "take_waiting mask " << mask << " SKIPPING " << it->second
<< " tag " << it->first
<< " on " << *this
<< endl;

View File

@ -185,7 +185,9 @@ class MClientReply : public Message {
virtual char *get_type_name() { return "creply"; }
void print(ostream& o) {
o << "creply(" << env.dst.name << "." << st.tid;
if (st.result) o << " = " << st.result;
o << " = " << st.result;
if (st.result <= 0)
o << " " << strerror(-st.result);
o << ")";
}

View File

@ -17,7 +17,7 @@
#define __MLOCK_H
#include "msg/Message.h"
#include "mds/SimpleLock.h"
// for replicas
#define LOCK_AC_SYNC -1
@ -91,6 +91,12 @@ class MLock : public Message {
data.claim(bl);
}
virtual char *get_type_name() { return "ILock"; }
void print(ostream& out) {
out << "lock(a=" << action
<< " " << ino
<< " " << get_lock_type_name(otype)
<< ")";
}
void set_ino(inodeno_t ino, char ot) {
otype = ot;
@ -111,32 +117,27 @@ class MLock : public Message {
this->dn = dn;
}
void set_reqid(metareqid_t ri) { reqid = ri; }
void set_data(bufferlist& data) {
this->data.claim( data );
void set_data(const bufferlist& data) {
this->data = data;
}
void decode_payload() {
int off = 0;
payload.copy(off,sizeof(action), (char*)&action);
off += sizeof(action);
payload.copy(off,sizeof(asker), (char*)&asker);
off += sizeof(asker);
payload.copy(off,sizeof(otype), (char*)&otype);
off += sizeof(otype);
payload.copy(off,sizeof(ino), (char*)&ino);
off += sizeof(ino);
payload.copy(off,sizeof(dirfrag), (char*)&dirfrag);
off += sizeof(dirfrag);
::_decode(action, payload, off);
::_decode(asker, payload, off);
::_decode(otype, payload, off);
::_decode(ino, payload, off);
::_decode(dirfrag, payload, off);
::_decode(reqid, payload, off);
::_decode(dn, payload, off);
::_decode(data, payload, off);
}
virtual void encode_payload() {
payload.append((char*)&action, sizeof(action));
payload.append((char*)&asker, sizeof(asker));
payload.append((char*)&otype, sizeof(otype));
payload.append((char*)&ino, sizeof(ino));
payload.append((char*)&dirfrag, sizeof(dirfrag));
::_encode(action, payload);
::_encode(asker, payload);
::_encode(otype, payload);
::_encode(ino, payload);
::_encode(dirfrag, payload);
::_encode(reqid, payload);
::_encode(dn, payload);
::_encode(data, payload);

View File

@ -45,11 +45,12 @@ class MMDSCacheRejoin : public Message {
int32_t linklock;
int32_t dirfragtreelock;
int32_t filelock;
__int32_t dirlock;
inode_strong() {}
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0) :
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0, int dl=0) :
caps_wanted(cw),
nonce(n),
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f) { }
authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), dirlock(dl) { }
};
struct inode_full {
inode_t inode;
@ -112,8 +113,8 @@ class MMDSCacheRejoin : public Message {
void add_weak_inode(inodeno_t ino) {
weak_inodes.insert(ino);
}
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f) {
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f);
void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
}
void add_full_inode(inode_t &i, const string& s, const fragtree_t &f) {
full_inodes.push_back(inode_full(i, s, f));

View File

@ -169,7 +169,8 @@ void Journaler::write_head(Context *oncommit)
bufferlist bl;
bl.append((char*)&last_written, sizeof(last_written));
filer.write(inode, 0, bl.length(), bl, 0,
0, new C_WriteHead(this, last_written, oncommit));
0,
new C_WriteHead(this, last_written, oncommit));
}
void Journaler::_finish_write_head(Header &wrote, Context *oncommit)
@ -293,8 +294,10 @@ void Journaler::flush(Context *onsync)
dout(10) << "flush flushing " << flush_pos << "~" << len << endl;
// submit write for anything pending
// flush _start_ pos to _finish_flush
filer.write(inode, flush_pos, len, write_buf, 0,
new C_Flush(this, flush_pos), 0); // flush _start_ pos to _finish_flush
g_conf.journaler_safe ? 0:new C_Flush(this, flush_pos), // on ACK
g_conf.journaler_safe ? new C_Flush(this, flush_pos):0); // on COMMIT
pending_flush[flush_pos] = g_clock.now();
// adjust pointers