diff --git a/ceph/config.cc b/ceph/config.cc index 65049023131..76995921b5d 100644 --- a/ceph/config.cc +++ b/ceph/config.cc @@ -14,7 +14,7 @@ md_config_t g_conf = { num_mds: 33, num_osd: 10, - num_client: 1000, + num_client: 10, osd_cow: false, // crashy? true, @@ -33,7 +33,7 @@ md_config_t g_conf = { fake_clock: true, fakemessenger_serialize: true, - debug: 13, + debug: 10, mdcache_size: MDS_CACHE_SIZE, mdcache_mid: .8, diff --git a/ceph/mds/CDentry.h b/ceph/mds/CDentry.h index f1458edb163..a2f675abfe0 100644 --- a/ceph/mds/CDentry.h +++ b/ceph/mds/CDentry.h @@ -8,12 +8,15 @@ using namespace std; class CInode; class CDir; +#define CDENTRY_STATE_FROZEN 1 + // dentry class CDentry { protected: string name; CInode *inode; CDir *dir; + int state; friend class MDCache; friend class MDS; @@ -24,10 +27,12 @@ class CDentry { CDentry() { inode = NULL; dir = NULL; + state = 0; } CDentry(string& n, CInode *in) { name = n; inode = in; + state = 0; } CInode *get_inode() { @@ -46,6 +51,10 @@ class CDentry { bool operator>= (const CDentry& right) const; bool operator<= (const CDentry& right) const; + // -- locking + //bool is_frozen() { return is_frozen_dentry() || dir->is_frozen_dir(); } + //bool is_frozen_dentry() { return state & CDENTRY_STATE_FROZENDENTRY; } + // -- hierarchy void remove(); diff --git a/ceph/mds/CDir.cc b/ceph/mds/CDir.cc index 1ab9fbdb5c3..eaac34ad448 100644 --- a/ceph/mds/CDir.cc +++ b/ceph/mds/CDir.cc @@ -46,7 +46,7 @@ ostream& operator<<(ostream& out, CDir& dir) if (dir.get_dir_auth() != CDIR_AUTH_PARENT) out << " dir_auth=" << dir.get_dir_auth(); - return out << " state=" << dir.get_state(); + out << " state=" << dir.get_state(); return out << "]"; } diff --git a/ceph/mds/CDir.h b/ceph/mds/CDir.h index 5209f3df94a..e11946c2f4d 100644 --- a/ceph/mds/CDir.h +++ b/ceph/mds/CDir.h @@ -33,21 +33,22 @@ class Context; // state bits -#define CDIR_STATE_COMPLETE (1<<0) // the complete contents are in cache -#define CDIR_STATE_DIRTY (1<<1) // has been modified since last commit +#define CDIR_STATE_AUTH (1<<0) // auth for this dir (hashing doesn't count) +#define CDIR_STATE_PROXY (1<<1) // proxy auth -#define CDIR_STATE_FROZENTREE (1<<2) // root of tree (bounded by exports) -#define CDIR_STATE_FREEZINGTREE (1<<3) // in process of freezing -#define CDIR_STATE_FROZENDIR (1<<4) -#define CDIR_STATE_FREEZINGDIR (1<<5) +#define CDIR_STATE_COMPLETE (1<<2) // the complete contents are in cache +#define CDIR_STATE_DIRTY (1<<3) // has been modified since last commit -#define CDIR_STATE_COMMITTING (1<<6) // mid-commit -#define CDIR_STATE_FETCHING (1<<7) // currenting fetching +#define CDIR_STATE_FROZENTREE (1<<4) // root of tree (bounded by exports) +#define CDIR_STATE_FREEZINGTREE (1<<5) // in process of freezing +#define CDIR_STATE_FROZENDIR (1<<6) +#define CDIR_STATE_FREEZINGDIR (1<<7) -#define CDIR_STATE_IMPORT (1<<8) // flag set if this is an import. -#define CDIR_STATE_EXPORT (1<<9) -#define CDIR_STATE_AUTH (1<<10) // auth for this dir (hashing doesn't count) -#define CDIR_STATE_PROXY (1<<11) +#define CDIR_STATE_COMMITTING (1<<8) // mid-commit +#define CDIR_STATE_FETCHING (1<<9) // currenting fetching + +#define CDIR_STATE_IMPORT (1<<10) // flag set if this is an import. +#define CDIR_STATE_EXPORT (1<<11) #define CDIR_STATE_HASHED (1<<12) // if hashed. only hashed+auth on auth node. #define CDIR_STATE_HASHING (1<<13) diff --git a/ceph/mds/CInode.cc b/ceph/mds/CInode.cc index c0b9d351da8..0763478f576 100644 --- a/ceph/mds/CInode.cc +++ b/ceph/mds/CInode.cc @@ -80,7 +80,8 @@ CInode::CInode(bool auth) : LRUObject() { version = 0; - this->auth = auth; // by default. + //this->auth = auth; // by default. + state_set(CINODE_STATE_AUTH); } CInode::~CInode() { @@ -130,14 +131,16 @@ CDir *CInode::set_dir(CDir *newdir) void CInode::set_auth(bool a) { if (!is_dangling() && !is_root() && - auth != a) { + is_auth() != a) { CDir *dir = get_parent_dir(); - if (auth && !a) + if (is_auth() && !a) dir->nauthitems--; else dir->nauthitems++; } - auth = a; + + if (a) state_set(CINODE_STATE_AUTH); + else state_clear(CINODE_STATE_AUTH); } diff --git a/ceph/mds/CInode.h b/ceph/mds/CInode.h index 02a98c17fd2..a55d690657e 100644 --- a/ceph/mds/CInode.h +++ b/ceph/mds/CInode.h @@ -9,6 +9,7 @@ #include #include "CDentry.h" +#include "Lock.h" #include #include @@ -134,16 +135,18 @@ static char *cinode_pin_names[CINODE_NUM_PINS] = { // state -#define CINODE_STATE_ROOT 1 -#define CINODE_STATE_DIRTY 2 -#define CINODE_STATE_UNSAFE 4 // not logged yet -#define CINODE_STATE_DANGLING 8 // delete me when i expire; i have no dentry -#define CINODE_STATE_UNLINKING 16 -#define CINODE_STATE_PROXY 32 // can't expire yet -#define CINODE_STATE_EXPORTING 64 // on nonauth bystander. +#define CINODE_STATE_AUTH (1<<0) +#define CINODE_STATE_ROOT (1<<1) -#define CINODE_STATE_RENAMING 128 // moving me -#define CINODE_STATE_RENAMINGTO 256 // rename target (will be unlinked) +#define CINODE_STATE_DIRTY (1<<2) +#define CINODE_STATE_UNSAFE (1<<3) // not logged yet +#define CINODE_STATE_DANGLING (1<<4) // delete me when i expire; i have no dentry +#define CINODE_STATE_UNLINKING (1<<5) +#define CINODE_STATE_PROXY (1<<6) // can't expire yet +#define CINODE_STATE_EXPORTING (1<<7) // on nonauth bystander. + +#define CINODE_STATE_RENAMING (1<<8) // moving me +#define CINODE_STATE_RENAMINGTO (1<<9) // rename target (will be unlinked) // misc @@ -187,7 +190,7 @@ class CInode : LRUObject { CInode *lru_next, *lru_prev; // -- distributed caching - bool auth; // safety check; true if this is authoritative. + //bool auth; // safety check; true if this is authoritative. set cached_by; // mds's that cache me. /* NOTE: on replicas, this doubles as replicated_by, but the cached_by_* access methods below should NOT be used in those @@ -246,9 +249,9 @@ class CInode : LRUObject { bool is_root() { return state & CINODE_STATE_ROOT; } bool is_proxy() { return state & CINODE_STATE_PROXY; } - bool is_auth() { return auth; } + bool is_auth() { return state & CINODE_STATE_AUTH; } void set_auth(bool auth); - bool is_replica() { return !auth; } + bool is_replica() { return !is_auth(); } int get_replica_nonce() { assert(!is_auth()); return replica_nonce; } inodeno_t ino() { return inode.ino; } diff --git a/ceph/mds/Lock.h b/ceph/mds/Lock.h new file mode 100644 index 00000000000..40f906bd981 --- /dev/null +++ b/ceph/mds/Lock.h @@ -0,0 +1,100 @@ +#ifndef __LOCK_H +#define __LOCK_H + +#include +#include +using namespace std; + +// STATES +// basic lock +#define LOCK_SYNC 0 +#define LOCK_PRELOCK 1 +#define LOCK_LOCK 2 +#define LOCK_DELETING 3 // auth only +#define LOCK_DELETED 4 + +// async lock +#define LOCK_ASYNC 5 +#define LOCK_RESYNC 6 // to sync +#define LOCK_RESYNC2 7 // to lock + + +// -- basic lock + +class BasicLock { + protected: + // lock state + char state; + set gather_set; // auth + + public: + BasicLock() : state(0) { + } + + char get_state() { return state; } + char set_state(char s) { state = s; }; + set& get_gather_set() { return gather_set; } + + void init_gather(set& i) { + gather_set = i; + } + + bool can_read(bool auth) { + if (auth) + return (state == LOCK_SYNC) || (state == LOCK_PRELOCK) || (state == LOCK_LOCK); + if (!auth) + return (state == LOCK_SYNC); + } + + bool can_write(bool auth) { + return auth && state == LOCK_LOCK; + } +}; + +ostream& operator<<(ostream& out, BasicLock& l) { + static char* __lock_states[] = { + "sync", + "prelock", + "lock", + "deleting", + "deleted", + "async", + "resync", + "resync2" + }; + + out << "Lock(" << __lock_states[l.get_state()]; + + if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set(); + + // rw? + out << " "; + if (l.can_read(true)) out << "r"; + if (l.can_write(true)) out << "w"; + out << "/"; + if (l.can_read(false)) out << "r"; + if (l.can_write(false)) out << "w"; + + out << ")"; + return out; +} + + +// -- async lock + +class AsyncLock : public BasicLock { + public: + AsyncLock() : BasicLock() { + assert(state == 0); + } + bool can_write(bool auth) { + if (auth) + return (state == LOCK_LOCK) + || (state == LOCK_ASYNC) || (state == LOCK_RESYNC) || (state == LOCK_RESYNC2); + if (!auth) + return (state == LOCK_ASYNC); + } +}; + + +#endif diff --git a/ceph/mds/MDCache.cc b/ceph/mds/MDCache.cc index 414032faf1d..e34b148d0f5 100644 --- a/ceph/mds/MDCache.cc +++ b/ceph/mds/MDCache.cc @@ -859,7 +859,6 @@ void MDCache::rename_dir(CInode *from, } show_imports(); - } @@ -1668,13 +1667,91 @@ void MDCache::file_rename_finish(CInode *from, CDir *destdir, CInode *oldin, Con /* +LOCKS: + + three states: + + Auth Replica State + R R normal/sync fw writes to auth + RW - lock ping auth for R/W? + W W async (*) fw reads to auth + + * only defined for soft inode metadata, right? + + we also remember: + auth: + set replicas + bool req_r, req_w + + replica: + last_sync - stamp of last time we were sync + + + + + + INODES: - two types of inode metadata: - hard - uid/gid, mode - soft - m/c/atime, size += two types of inode metadata: + hard - uid/gid, mode + soft - m/ctime, size + ? atime - atime (*) - correspondingly, two types of locks: + * if we want _strict_ atime behavior, atime can be folded into soft. + for lazy atime, should we just leave the atime lock in async state? XXX + += correspondingly, two types of inode locks: + hardlock - hard metadata + softlock - soft metadata + + -> These locks are completely orthogonal! + += metadata ops and how they affect inode metadata: + scma=size ctime mtime atime + HARD SOFT OP + files: + R RRRR stat + RW chmod/chown + R wW touch ?ctime + R openr + W read atime + R openw + R w openwc ?ctime + W W write size mtime + close + dirs: + R W readdir atime + RRRR ( + implied stats on files) + R W W link/unlink/rename/rmdir + R WwW mkdir (ctime on new dir, size+mtime on parent dir) + + + += relationship to client (writers): + + - ops in question are + - stat ... need reasonable value for mtime (+ atime?) + - maybe we want a "quicksync" type operation instead of full lock + - truncate ... need to stop writers for the atomic truncate operation + - need a full lock + + + + + + +ALSO: + + dirlock - no dir changes (prior to unhashing) + denlock - dentry lock (prior to unlink, rename) + + + + + +OLD CRAP: + (old): sync - soft metadata.. no reads/writes can proceed. (eg no stat) lock - hard(+soft) metadata.. path traversals stop etc. (??) @@ -3688,8 +3765,9 @@ void MDCache::handle_export_dir(MExportDir *m) MExportDirNotify *notify = new MExportDirNotify(dir->ino(), m->get_source(), mds->get_nodeid()); notify->copy_exports(m->get_exports()); + if (g_conf.mds_verify_export_dirauth) - notify->copy_subdirs(imported_subdirs); // copy subdir list (debug) + notify->copy_subdirs(imported_subdirs); // copy subdir list (DEBUG) mds->messenger->send_message(notify, MSG_ADDR_MDS( *it ), MDS_PORT_CACHE, @@ -3973,7 +4051,7 @@ void MDCache::handle_export_dir_notify(MExportDirNotify *m) assert(dir->authority() != mds->get_nodeid()); assert(!dir->is_auth()); - // debug: verify subdirs + // DEBUG: verify subdirs if (g_conf.mds_verify_export_dirauth) { dout(7) << "handle_export_dir_notify on " << *dir << " checking " << m->num_subdirs() << " subdirs" << endl;