Merge branch 'mds_dir_hash' into unstable

This commit is contained in:
Sage Weil 2010-11-16 10:01:15 -08:00
commit 05bd6b078d
18 changed files with 107 additions and 33 deletions

View File

@ -494,6 +494,11 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, int mds)
in->dirstat = st->dirstat;
in->rstat = st->rstat;
if (in->is_dir()) {
in->dir_layout = st->dir_layout;
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << dendl;
}
in->layout = st->layout;
in->ctime = st->ctime;
in->max_size = st->max_size; // right?
@ -651,6 +656,10 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
return NULL;
}
Connection *con = request->reply->get_connection();
int features = con->get_features();
dout(10) << " features 0x" << hex << features << dec << dendl;
// snap trace
if (reply->snapbl.length())
update_snap_trace(reply->snapbl);
@ -667,7 +676,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
InodeStat ist;
if (reply->head.is_dentry) {
dirst.decode(p);
dirst.decode(p, features);
dst.decode(p);
::decode(dname, p);
::decode(dlease, p);
@ -675,7 +684,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
Inode *in = 0;
if (reply->head.is_target) {
ist.decode(p);
ist.decode(p, features);
in = add_update_inode(&ist, from, mds);
}
@ -759,7 +768,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
for (unsigned i=0; i<numdn; i++) {
::decode(dname, p);
::decode(dlease, p);
InodeStat ist(p);
InodeStat ist(p, features);
Inode *in = add_update_inode(&ist, from, mds);
Dentry *dn = insert_dentry_inode(dir, dname, &dlease, in, from, mds, false);
@ -830,13 +839,25 @@ int Client::choose_target_mds(MetaRequest *req)
if (req->inode) {
in = req->inode;
if (req->path.depth()) {
hash = ceph_str_hash(in->dir_layout.dl_dir_hash,
req->path[0].data(),
req->path[0].length());
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << " on " << req->path[0]
<< " => " << hash << dendl;
is_hash = true;
}
} else if (req->dentry) {
if (req->dentry->inode) {
in = req->dentry->inode;
} else {
in = req->dentry->dir->parent_inode;
hash = ceph_str_hash_linux(req->dentry->name.data(),
req->dentry->name.length());
hash = ceph_str_hash(in->dir_layout.dl_dir_hash,
req->dentry->name.data(),
req->dentry->name.length());
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << " on " << req->dentry->name
<< " => " << hash << dendl;
is_hash = true;
}
}

View File

@ -387,6 +387,7 @@ class Inode {
int32_t nlink;
// file (data access)
ceph_dir_layout dir_layout;
ceph_file_layout layout;
uint64_t size; // on directory, # dentries
uint32_t truncate_seq;

View File

@ -114,8 +114,9 @@ int main(int argc, const char **argv)
return 1;
uint64_t supported =
CEPH_FEATURE_UID |
CEPH_FEATURE_NOSRCADDR;
CEPH_FEATURE_UID |
CEPH_FEATURE_NOSRCADDR |
CEPH_FEATURE_DIRLAYOUTHASH;
messenger->set_default_policy(SimpleMessenger::Policy::client(supported, 0));
messenger->set_policy(entity_name_t::TYPE_MON,
SimpleMessenger::Policy::client(supported,

View File

@ -96,6 +96,7 @@ std::map<entity_name_t,float> g_fake_kill_after;
md_config_t g_conf;
bool g_daemon = false;
#include <stdlib.h>
#include <string.h>
@ -440,6 +441,7 @@ static struct config_option config_optionsp[] = {
OPTION(mds_early_reply, 0, OPT_BOOL, true),
OPTION(mds_short_reply_trace, 0, OPT_BOOL, true),
OPTION(mds_use_tmap, 0, OPT_BOOL, true), // use trivialmap for dir updates
OPTION(mds_default_dir_hash, 0, OPT_INT, CEPH_STR_HASH_RJENKINS),
OPTION(mds_log, 0, OPT_BOOL, true),
OPTION(mds_log_unsafe, 0, OPT_BOOL, false), // only wait for log sync, when it's mostly safe to do so
OPTION(mds_log_skip_corrupt_events, 0, OPT_BOOL, false),

View File

@ -261,6 +261,8 @@ struct md_config_t {
bool mds_use_tmap;
int mds_default_dir_hash;
bool mds_log;
bool mds_log_unsafe;
bool mds_log_skip_corrupt_events;

View File

@ -46,6 +46,7 @@
#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
#define CEPH_FEATURE_MONNAMES (1<<5)
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
/*
@ -58,10 +59,10 @@ struct ceph_file_layout {
__le32 fl_stripe_count; /* over this many objects */
__le32 fl_object_size; /* until objects are this big, then move to
new objects */
__le32 fl_cas_hash; /* 0 = none; 1 = sha256 */
__le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
/* pg -> disk layout */
__le32 fl_object_stripe_unit; /* for per-object parity, if any */
__le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
/* object -> pg layout */
__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
@ -72,6 +73,12 @@ struct ceph_file_layout {
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
struct ceph_dir_layout {
__u8 dl_dir_hash; /* see ceph_hash.h for ids */
__u8 dl_unused1;
__u16 dl_unused2;
__u32 dl_unused3;
} __attribute__ ((packed));
/* crypto algorithms */
#define CEPH_CRYPTO_NONE 0x0
@ -463,7 +470,7 @@ struct ceph_mds_reply_inode {
struct ceph_timespec rctime;
struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
} __attribute__ ((packed));
/* followed by frag array, then symlink string, then xattr blob */
/* followed by frag array, symlink string, dir layout, xattr blob */
/* reply_lease follows dname, and reply_inode */
struct ceph_mds_reply_lease {

View File

@ -211,6 +211,7 @@ struct ltstr
WRITE_RAW_ENCODER(ceph_fsid)
WRITE_RAW_ENCODER(ceph_file_layout)
WRITE_RAW_ENCODER(ceph_dir_layout)
WRITE_RAW_ENCODER(ceph_pg_pool)
WRITE_RAW_ENCODER(ceph_mds_session_head)
WRITE_RAW_ENCODER(ceph_mds_request_head)

View File

@ -40,10 +40,6 @@ public:
Anchor() : dn_hash(0), nref(0), updated(0) {}
Anchor(inodeno_t i, inodeno_t di, __u32 hash, int nr, version_t u) :
ino(i), dirino(di), dn_hash(hash), nref(nr), updated(u) { }
Anchor(inodeno_t i, inodeno_t di, const string &dname, int nr, version_t u) :
ino(i), dirino(di),
dn_hash(ceph_str_hash_linux(dname.data(), dname.length())),
nref(nr), updated(u) { }
void encode(bufferlist &bl) const {
__u8 struct_v = 1;

View File

@ -262,7 +262,7 @@ void CDentry::make_anchor_trace(vector<Anchor>& trace, CInode *in)
dir->inode->make_anchor_trace(trace);
// add this inode (in my dirfrag) to the end
trace.push_back(Anchor(in->ino(), dir->ino(), name, 0, 0));
trace.push_back(Anchor(in->ino(), dir->ino(), get_hash(), 0, 0));
dout(10) << "make_anchor_trace added " << trace.back() << dendl;
}

View File

@ -96,6 +96,7 @@ public:
public:
string name;
__u32 hash;
snapid_t first, last;
dentry_key_t key() {
@ -163,9 +164,9 @@ public:
public:
// cons
CDentry(const string& n,
CDentry(const string& n, __u32 h,
snapid_t f, snapid_t l) :
name(n),
name(n), hash(h),
first(f), last(l),
dir(0),
version(0), projected_version(0),
@ -176,9 +177,9 @@ public:
g_num_dn++;
g_num_dna++;
}
CDentry(const string& n, inodeno_t ino, unsigned char dt,
CDentry(const string& n, __u32 h, inodeno_t ino, unsigned char dt,
snapid_t f, snapid_t l) :
name(n),
name(n), hash(h),
first(f), last(l),
dir(0),
version(0), projected_version(0),
@ -200,6 +201,8 @@ public:
CDir *get_dir() const { return dir; }
const string& get_name() const { return name; }
__u32 get_hash() const { return hash; }
/*
CInode *get_inode() const { return linkage.inode; }
inodeno_t get_remote_ino() { return linkage.remote_ino; }

View File

@ -229,7 +229,7 @@ CDentry* CDir::add_null_dentry(const string& dname,
assert(lookup_exact_snap(dname, last) == 0);
// create dentry
CDentry* dn = new CDentry(dname, first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);
cache->lru.lru_insert_mid(dn);
@ -265,7 +265,7 @@ CDentry* CDir::add_primary_dentry(const string& dname, CInode *in,
assert(lookup_exact_snap(dname, last) == 0);
// create dentry
CDentry* dn = new CDentry(dname, first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);
cache->lru.lru_insert_mid(dn);
@ -303,9 +303,9 @@ CDentry* CDir::add_remote_dentry(const string& dname, inodeno_t ino, unsigned ch
{
// foreign
assert(lookup_exact_snap(dname, last) == 0);
// create dentry
CDentry* dn = new CDentry(dname, ino, d_type, first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), ino, d_type, first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);
cache->lru.lru_insert_mid(dn);

View File

@ -420,12 +420,20 @@ void CInode::pop_projected_snaprealm(sr_t *next_snaprealm)
// dirfrags
__u32 CInode::hash_dentry_name(const string &dn)
{
int which = inode.dir_layout.dl_dir_hash;
if (!which)
which = CEPH_STR_HASH_LINUX;
return ceph_str_hash(which, dn.data(), dn.length());
}
frag_t CInode::pick_dirfrag(const string& dn)
{
if (dirfragtree.empty())
return frag_t(); // avoid the string hash if we can.
__u32 h = ceph_str_hash_linux(dn.data(), dn.length());
__u32 h = hash_dentry_name(dn);
return dirfragtree[h];
}
@ -2076,6 +2084,8 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
{
int client = session->inst.name.num();
assert(snapid);
assert(session->connection);
bool valid = true;
@ -2299,6 +2309,10 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
::encode(p->second, bl);
}
::encode(symlink, bl);
if (session->connection->has_feature(CEPH_FEATURE_DIRLAYOUTHASH)) {
i = pfile ? pi : oi;
::encode(i->dir_layout, bl);
}
::encode(xbl, bl);
return valid;

View File

@ -358,6 +358,7 @@ private:
int stickydir_ref;
public:
__u32 hash_dentry_name(const string &dn);
frag_t pick_dirfrag(const string &dn);
bool has_dirfrags() { return !dirfrags.empty(); }
CDir* get_dirfrag(frag_t fg) {

View File

@ -256,10 +256,14 @@ CInode *MDCache::create_system_inode(inodeno_t ino, int mode)
in->inode.mtime = g_clock.now();
in->inode.nlink = 1;
in->inode.truncate_size = -1ull;
if (in->inode.is_dir())
memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
if (in->inode.is_dir()) {
memset(&in->inode.layout, 0, sizeof(in->inode.layout));
else
in->inode.dir_layout.dl_dir_hash = g_conf.mds_default_dir_hash;
} else {
in->inode.layout = default_file_layout;
}
if (in->is_base()) {
if (in->is_root())
@ -6845,7 +6849,7 @@ void MDCache::anchor_create(MDRequest *mdr, CInode *in, Context *onfinish)
in->make_anchor_trace(trace);
if (!trace.size()) {
assert(MDS_INO_IS_BASE(in->ino()));
trace.push_back(Anchor(in->ino(), in->ino(), "", 0, 0));
trace.push_back(Anchor(in->ino(), in->ino(), 0, 0, 0));
}
// do it

View File

@ -1652,6 +1652,13 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
in->inode.version = 1;
in->inode.nlink = 1; // FIXME
in->inode.mode = mode;
memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
if (in->inode.is_dir())
in->inode.dir_layout.dl_dir_hash = g_conf.mds_default_dir_hash;
if (layout)
in->inode.layout = *layout;
else if (in->inode.is_dir())
@ -1677,7 +1684,6 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
in->inode.gid = mdr->client_request->get_caller_gid();
in->inode.uid = mdr->client_request->get_caller_uid();
in->inode.mode = mode;
in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->now; // now

View File

@ -917,6 +917,7 @@ struct inode_t {
bool anchored; // auth only?
// file (data access)
ceph_dir_layout dir_layout; // [dir only]
ceph_file_layout layout;
uint64_t size; // on directory, # dentries
uint32_t truncate_seq;
@ -994,7 +995,7 @@ struct inode_t {
}
void encode(bufferlist &bl) const {
__u8 v = 3;
__u8 v = 4;
::encode(v, bl);
::encode(ino, bl);
@ -1008,6 +1009,7 @@ struct inode_t {
::encode(nlink, bl);
::encode(anchored, bl);
::encode(dir_layout, bl);
::encode(layout, bl);
::encode(size, bl);
::encode(truncate_seq, bl);
@ -1042,6 +1044,10 @@ struct inode_t {
::decode(nlink, p);
::decode(anchored, p);
if (v >= 4)
::decode(dir_layout, p);
else
memset(&dir_layout, 0, sizeof(dir_layout));
::decode(layout, p);
::decode(size, p);
::decode(truncate_seq, p);

View File

@ -113,15 +113,18 @@ struct InodeStat {
version_t xattr_version;
bufferlist xattrbl;
ceph_dir_layout dir_layout;
//map<string, bufferptr> xattrs;
public:
InodeStat() {}
InodeStat(bufferlist::iterator& p) {
decode(p);
InodeStat(bufferlist::iterator& p, int features) {
decode(p, features);
}
void decode(bufferlist::iterator &p) {
void decode(bufferlist::iterator &p, int features) {
struct ceph_mds_reply_inode e;
::decode(e, p);
vino.ino = inodeno_t(e.ino);
@ -160,6 +163,11 @@ struct InodeStat {
}
::decode(symlink, p);
if (features & CEPH_FEATURE_DIRLAYOUTHASH)
::decode(dir_layout, p);
else
memset(&dir_layout, 0, sizeof(dir_layout));
xattr_version = e.xattr_version;
::decode(xattrbl, p);
}

View File

@ -57,7 +57,8 @@ using namespace __gnu_cxx;
CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_RECONNECT_SEQ
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH
class SimpleMessenger : public Messenger {
public: