mirror of
https://github.com/ceph/ceph
synced 2025-01-19 17:41:39 +00:00
Merge branch 'mds_dir_hash' into unstable
This commit is contained in:
commit
05bd6b078d
@ -494,6 +494,11 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, int mds)
|
||||
in->dirstat = st->dirstat;
|
||||
in->rstat = st->rstat;
|
||||
|
||||
if (in->is_dir()) {
|
||||
in->dir_layout = st->dir_layout;
|
||||
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << dendl;
|
||||
}
|
||||
|
||||
in->layout = st->layout;
|
||||
in->ctime = st->ctime;
|
||||
in->max_size = st->max_size; // right?
|
||||
@ -651,6 +656,10 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Connection *con = request->reply->get_connection();
|
||||
int features = con->get_features();
|
||||
dout(10) << " features 0x" << hex << features << dec << dendl;
|
||||
|
||||
// snap trace
|
||||
if (reply->snapbl.length())
|
||||
update_snap_trace(reply->snapbl);
|
||||
@ -667,7 +676,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
|
||||
InodeStat ist;
|
||||
|
||||
if (reply->head.is_dentry) {
|
||||
dirst.decode(p);
|
||||
dirst.decode(p, features);
|
||||
dst.decode(p);
|
||||
::decode(dname, p);
|
||||
::decode(dlease, p);
|
||||
@ -675,7 +684,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
|
||||
|
||||
Inode *in = 0;
|
||||
if (reply->head.is_target) {
|
||||
ist.decode(p);
|
||||
ist.decode(p, features);
|
||||
in = add_update_inode(&ist, from, mds);
|
||||
}
|
||||
|
||||
@ -759,7 +768,7 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds)
|
||||
for (unsigned i=0; i<numdn; i++) {
|
||||
::decode(dname, p);
|
||||
::decode(dlease, p);
|
||||
InodeStat ist(p);
|
||||
InodeStat ist(p, features);
|
||||
|
||||
Inode *in = add_update_inode(&ist, from, mds);
|
||||
Dentry *dn = insert_dentry_inode(dir, dname, &dlease, in, from, mds, false);
|
||||
@ -830,13 +839,25 @@ int Client::choose_target_mds(MetaRequest *req)
|
||||
|
||||
if (req->inode) {
|
||||
in = req->inode;
|
||||
if (req->path.depth()) {
|
||||
hash = ceph_str_hash(in->dir_layout.dl_dir_hash,
|
||||
req->path[0].data(),
|
||||
req->path[0].length());
|
||||
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << " on " << req->path[0]
|
||||
<< " => " << hash << dendl;
|
||||
is_hash = true;
|
||||
|
||||
}
|
||||
} else if (req->dentry) {
|
||||
if (req->dentry->inode) {
|
||||
in = req->dentry->inode;
|
||||
} else {
|
||||
in = req->dentry->dir->parent_inode;
|
||||
hash = ceph_str_hash_linux(req->dentry->name.data(),
|
||||
req->dentry->name.length());
|
||||
hash = ceph_str_hash(in->dir_layout.dl_dir_hash,
|
||||
req->dentry->name.data(),
|
||||
req->dentry->name.length());
|
||||
dout(20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << " on " << req->dentry->name
|
||||
<< " => " << hash << dendl;
|
||||
is_hash = true;
|
||||
}
|
||||
}
|
||||
|
@ -387,6 +387,7 @@ class Inode {
|
||||
int32_t nlink;
|
||||
|
||||
// file (data access)
|
||||
ceph_dir_layout dir_layout;
|
||||
ceph_file_layout layout;
|
||||
uint64_t size; // on directory, # dentries
|
||||
uint32_t truncate_seq;
|
||||
|
@ -114,8 +114,9 @@ int main(int argc, const char **argv)
|
||||
return 1;
|
||||
|
||||
uint64_t supported =
|
||||
CEPH_FEATURE_UID |
|
||||
CEPH_FEATURE_NOSRCADDR;
|
||||
CEPH_FEATURE_UID |
|
||||
CEPH_FEATURE_NOSRCADDR |
|
||||
CEPH_FEATURE_DIRLAYOUTHASH;
|
||||
messenger->set_default_policy(SimpleMessenger::Policy::client(supported, 0));
|
||||
messenger->set_policy(entity_name_t::TYPE_MON,
|
||||
SimpleMessenger::Policy::client(supported,
|
||||
|
@ -96,6 +96,7 @@ std::map<entity_name_t,float> g_fake_kill_after;
|
||||
md_config_t g_conf;
|
||||
bool g_daemon = false;
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -440,6 +441,7 @@ static struct config_option config_optionsp[] = {
|
||||
OPTION(mds_early_reply, 0, OPT_BOOL, true),
|
||||
OPTION(mds_short_reply_trace, 0, OPT_BOOL, true),
|
||||
OPTION(mds_use_tmap, 0, OPT_BOOL, true), // use trivialmap for dir updates
|
||||
OPTION(mds_default_dir_hash, 0, OPT_INT, CEPH_STR_HASH_RJENKINS),
|
||||
OPTION(mds_log, 0, OPT_BOOL, true),
|
||||
OPTION(mds_log_unsafe, 0, OPT_BOOL, false), // only wait for log sync, when it's mostly safe to do so
|
||||
OPTION(mds_log_skip_corrupt_events, 0, OPT_BOOL, false),
|
||||
|
@ -261,6 +261,8 @@ struct md_config_t {
|
||||
|
||||
bool mds_use_tmap;
|
||||
|
||||
int mds_default_dir_hash;
|
||||
|
||||
bool mds_log;
|
||||
bool mds_log_unsafe;
|
||||
bool mds_log_skip_corrupt_events;
|
||||
|
@ -46,6 +46,7 @@
|
||||
#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
|
||||
#define CEPH_FEATURE_MONNAMES (1<<5)
|
||||
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
|
||||
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
|
||||
|
||||
|
||||
/*
|
||||
@ -58,10 +59,10 @@ struct ceph_file_layout {
|
||||
__le32 fl_stripe_count; /* over this many objects */
|
||||
__le32 fl_object_size; /* until objects are this big, then move to
|
||||
new objects */
|
||||
__le32 fl_cas_hash; /* 0 = none; 1 = sha256 */
|
||||
__le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
|
||||
|
||||
/* pg -> disk layout */
|
||||
__le32 fl_object_stripe_unit; /* for per-object parity, if any */
|
||||
__le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
|
||||
|
||||
/* object -> pg layout */
|
||||
__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
|
||||
@ -72,6 +73,12 @@ struct ceph_file_layout {
|
||||
|
||||
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
|
||||
|
||||
struct ceph_dir_layout {
|
||||
__u8 dl_dir_hash; /* see ceph_hash.h for ids */
|
||||
__u8 dl_unused1;
|
||||
__u16 dl_unused2;
|
||||
__u32 dl_unused3;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/* crypto algorithms */
|
||||
#define CEPH_CRYPTO_NONE 0x0
|
||||
@ -463,7 +470,7 @@ struct ceph_mds_reply_inode {
|
||||
struct ceph_timespec rctime;
|
||||
struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
|
||||
} __attribute__ ((packed));
|
||||
/* followed by frag array, then symlink string, then xattr blob */
|
||||
/* followed by frag array, symlink string, dir layout, xattr blob */
|
||||
|
||||
/* reply_lease follows dname, and reply_inode */
|
||||
struct ceph_mds_reply_lease {
|
||||
|
@ -211,6 +211,7 @@ struct ltstr
|
||||
|
||||
WRITE_RAW_ENCODER(ceph_fsid)
|
||||
WRITE_RAW_ENCODER(ceph_file_layout)
|
||||
WRITE_RAW_ENCODER(ceph_dir_layout)
|
||||
WRITE_RAW_ENCODER(ceph_pg_pool)
|
||||
WRITE_RAW_ENCODER(ceph_mds_session_head)
|
||||
WRITE_RAW_ENCODER(ceph_mds_request_head)
|
||||
|
@ -40,10 +40,6 @@ public:
|
||||
Anchor() : dn_hash(0), nref(0), updated(0) {}
|
||||
Anchor(inodeno_t i, inodeno_t di, __u32 hash, int nr, version_t u) :
|
||||
ino(i), dirino(di), dn_hash(hash), nref(nr), updated(u) { }
|
||||
Anchor(inodeno_t i, inodeno_t di, const string &dname, int nr, version_t u) :
|
||||
ino(i), dirino(di),
|
||||
dn_hash(ceph_str_hash_linux(dname.data(), dname.length())),
|
||||
nref(nr), updated(u) { }
|
||||
|
||||
void encode(bufferlist &bl) const {
|
||||
__u8 struct_v = 1;
|
||||
|
@ -262,7 +262,7 @@ void CDentry::make_anchor_trace(vector<Anchor>& trace, CInode *in)
|
||||
dir->inode->make_anchor_trace(trace);
|
||||
|
||||
// add this inode (in my dirfrag) to the end
|
||||
trace.push_back(Anchor(in->ino(), dir->ino(), name, 0, 0));
|
||||
trace.push_back(Anchor(in->ino(), dir->ino(), get_hash(), 0, 0));
|
||||
dout(10) << "make_anchor_trace added " << trace.back() << dendl;
|
||||
}
|
||||
|
||||
|
@ -96,6 +96,7 @@ public:
|
||||
|
||||
public:
|
||||
string name;
|
||||
__u32 hash;
|
||||
snapid_t first, last;
|
||||
|
||||
dentry_key_t key() {
|
||||
@ -163,9 +164,9 @@ public:
|
||||
|
||||
public:
|
||||
// cons
|
||||
CDentry(const string& n,
|
||||
CDentry(const string& n, __u32 h,
|
||||
snapid_t f, snapid_t l) :
|
||||
name(n),
|
||||
name(n), hash(h),
|
||||
first(f), last(l),
|
||||
dir(0),
|
||||
version(0), projected_version(0),
|
||||
@ -176,9 +177,9 @@ public:
|
||||
g_num_dn++;
|
||||
g_num_dna++;
|
||||
}
|
||||
CDentry(const string& n, inodeno_t ino, unsigned char dt,
|
||||
CDentry(const string& n, __u32 h, inodeno_t ino, unsigned char dt,
|
||||
snapid_t f, snapid_t l) :
|
||||
name(n),
|
||||
name(n), hash(h),
|
||||
first(f), last(l),
|
||||
dir(0),
|
||||
version(0), projected_version(0),
|
||||
@ -200,6 +201,8 @@ public:
|
||||
CDir *get_dir() const { return dir; }
|
||||
const string& get_name() const { return name; }
|
||||
|
||||
__u32 get_hash() const { return hash; }
|
||||
|
||||
/*
|
||||
CInode *get_inode() const { return linkage.inode; }
|
||||
inodeno_t get_remote_ino() { return linkage.remote_ino; }
|
||||
|
@ -229,7 +229,7 @@ CDentry* CDir::add_null_dentry(const string& dname,
|
||||
assert(lookup_exact_snap(dname, last) == 0);
|
||||
|
||||
// create dentry
|
||||
CDentry* dn = new CDentry(dname, first, last);
|
||||
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
|
||||
if (is_auth())
|
||||
dn->state_set(CDentry::STATE_AUTH);
|
||||
cache->lru.lru_insert_mid(dn);
|
||||
@ -265,7 +265,7 @@ CDentry* CDir::add_primary_dentry(const string& dname, CInode *in,
|
||||
assert(lookup_exact_snap(dname, last) == 0);
|
||||
|
||||
// create dentry
|
||||
CDentry* dn = new CDentry(dname, first, last);
|
||||
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
|
||||
if (is_auth())
|
||||
dn->state_set(CDentry::STATE_AUTH);
|
||||
cache->lru.lru_insert_mid(dn);
|
||||
@ -303,9 +303,9 @@ CDentry* CDir::add_remote_dentry(const string& dname, inodeno_t ino, unsigned ch
|
||||
{
|
||||
// foreign
|
||||
assert(lookup_exact_snap(dname, last) == 0);
|
||||
|
||||
|
||||
// create dentry
|
||||
CDentry* dn = new CDentry(dname, ino, d_type, first, last);
|
||||
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), ino, d_type, first, last);
|
||||
if (is_auth())
|
||||
dn->state_set(CDentry::STATE_AUTH);
|
||||
cache->lru.lru_insert_mid(dn);
|
||||
|
@ -420,12 +420,20 @@ void CInode::pop_projected_snaprealm(sr_t *next_snaprealm)
|
||||
|
||||
// dirfrags
|
||||
|
||||
__u32 CInode::hash_dentry_name(const string &dn)
|
||||
{
|
||||
int which = inode.dir_layout.dl_dir_hash;
|
||||
if (!which)
|
||||
which = CEPH_STR_HASH_LINUX;
|
||||
return ceph_str_hash(which, dn.data(), dn.length());
|
||||
}
|
||||
|
||||
frag_t CInode::pick_dirfrag(const string& dn)
|
||||
{
|
||||
if (dirfragtree.empty())
|
||||
return frag_t(); // avoid the string hash if we can.
|
||||
|
||||
__u32 h = ceph_str_hash_linux(dn.data(), dn.length());
|
||||
__u32 h = hash_dentry_name(dn);
|
||||
return dirfragtree[h];
|
||||
}
|
||||
|
||||
@ -2076,6 +2084,8 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
|
||||
{
|
||||
int client = session->inst.name.num();
|
||||
assert(snapid);
|
||||
|
||||
assert(session->connection);
|
||||
|
||||
bool valid = true;
|
||||
|
||||
@ -2299,6 +2309,10 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
|
||||
::encode(p->second, bl);
|
||||
}
|
||||
::encode(symlink, bl);
|
||||
if (session->connection->has_feature(CEPH_FEATURE_DIRLAYOUTHASH)) {
|
||||
i = pfile ? pi : oi;
|
||||
::encode(i->dir_layout, bl);
|
||||
}
|
||||
::encode(xbl, bl);
|
||||
|
||||
return valid;
|
||||
|
@ -358,6 +358,7 @@ private:
|
||||
int stickydir_ref;
|
||||
|
||||
public:
|
||||
__u32 hash_dentry_name(const string &dn);
|
||||
frag_t pick_dirfrag(const string &dn);
|
||||
bool has_dirfrags() { return !dirfrags.empty(); }
|
||||
CDir* get_dirfrag(frag_t fg) {
|
||||
|
@ -256,10 +256,14 @@ CInode *MDCache::create_system_inode(inodeno_t ino, int mode)
|
||||
in->inode.mtime = g_clock.now();
|
||||
in->inode.nlink = 1;
|
||||
in->inode.truncate_size = -1ull;
|
||||
if (in->inode.is_dir())
|
||||
|
||||
memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
|
||||
if (in->inode.is_dir()) {
|
||||
memset(&in->inode.layout, 0, sizeof(in->inode.layout));
|
||||
else
|
||||
in->inode.dir_layout.dl_dir_hash = g_conf.mds_default_dir_hash;
|
||||
} else {
|
||||
in->inode.layout = default_file_layout;
|
||||
}
|
||||
|
||||
if (in->is_base()) {
|
||||
if (in->is_root())
|
||||
@ -6845,7 +6849,7 @@ void MDCache::anchor_create(MDRequest *mdr, CInode *in, Context *onfinish)
|
||||
in->make_anchor_trace(trace);
|
||||
if (!trace.size()) {
|
||||
assert(MDS_INO_IS_BASE(in->ino()));
|
||||
trace.push_back(Anchor(in->ino(), in->ino(), "", 0, 0));
|
||||
trace.push_back(Anchor(in->ino(), in->ino(), 0, 0, 0));
|
||||
}
|
||||
|
||||
// do it
|
||||
|
@ -1652,6 +1652,13 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
|
||||
|
||||
in->inode.version = 1;
|
||||
in->inode.nlink = 1; // FIXME
|
||||
|
||||
in->inode.mode = mode;
|
||||
|
||||
memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
|
||||
if (in->inode.is_dir())
|
||||
in->inode.dir_layout.dl_dir_hash = g_conf.mds_default_dir_hash;
|
||||
|
||||
if (layout)
|
||||
in->inode.layout = *layout;
|
||||
else if (in->inode.is_dir())
|
||||
@ -1677,7 +1684,6 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
|
||||
in->inode.gid = mdr->client_request->get_caller_gid();
|
||||
|
||||
in->inode.uid = mdr->client_request->get_caller_uid();
|
||||
in->inode.mode = mode;
|
||||
|
||||
in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->now; // now
|
||||
|
||||
|
@ -917,6 +917,7 @@ struct inode_t {
|
||||
bool anchored; // auth only?
|
||||
|
||||
// file (data access)
|
||||
ceph_dir_layout dir_layout; // [dir only]
|
||||
ceph_file_layout layout;
|
||||
uint64_t size; // on directory, # dentries
|
||||
uint32_t truncate_seq;
|
||||
@ -994,7 +995,7 @@ struct inode_t {
|
||||
}
|
||||
|
||||
void encode(bufferlist &bl) const {
|
||||
__u8 v = 3;
|
||||
__u8 v = 4;
|
||||
::encode(v, bl);
|
||||
|
||||
::encode(ino, bl);
|
||||
@ -1008,6 +1009,7 @@ struct inode_t {
|
||||
::encode(nlink, bl);
|
||||
::encode(anchored, bl);
|
||||
|
||||
::encode(dir_layout, bl);
|
||||
::encode(layout, bl);
|
||||
::encode(size, bl);
|
||||
::encode(truncate_seq, bl);
|
||||
@ -1042,6 +1044,10 @@ struct inode_t {
|
||||
::decode(nlink, p);
|
||||
::decode(anchored, p);
|
||||
|
||||
if (v >= 4)
|
||||
::decode(dir_layout, p);
|
||||
else
|
||||
memset(&dir_layout, 0, sizeof(dir_layout));
|
||||
::decode(layout, p);
|
||||
::decode(size, p);
|
||||
::decode(truncate_seq, p);
|
||||
|
@ -113,15 +113,18 @@ struct InodeStat {
|
||||
|
||||
version_t xattr_version;
|
||||
bufferlist xattrbl;
|
||||
|
||||
ceph_dir_layout dir_layout;
|
||||
|
||||
//map<string, bufferptr> xattrs;
|
||||
|
||||
public:
|
||||
InodeStat() {}
|
||||
InodeStat(bufferlist::iterator& p) {
|
||||
decode(p);
|
||||
InodeStat(bufferlist::iterator& p, int features) {
|
||||
decode(p, features);
|
||||
}
|
||||
|
||||
void decode(bufferlist::iterator &p) {
|
||||
void decode(bufferlist::iterator &p, int features) {
|
||||
struct ceph_mds_reply_inode e;
|
||||
::decode(e, p);
|
||||
vino.ino = inodeno_t(e.ino);
|
||||
@ -160,6 +163,11 @@ struct InodeStat {
|
||||
}
|
||||
::decode(symlink, p);
|
||||
|
||||
if (features & CEPH_FEATURE_DIRLAYOUTHASH)
|
||||
::decode(dir_layout, p);
|
||||
else
|
||||
memset(&dir_layout, 0, sizeof(dir_layout));
|
||||
|
||||
xattr_version = e.xattr_version;
|
||||
::decode(xattrbl, p);
|
||||
}
|
||||
|
@ -57,7 +57,8 @@ using namespace __gnu_cxx;
|
||||
CEPH_FEATURE_SUBSCRIBE2 | \
|
||||
CEPH_FEATURE_MONNAMES | \
|
||||
CEPH_FEATURE_FLOCK | \
|
||||
CEPH_FEATURE_RECONNECT_SEQ
|
||||
CEPH_FEATURE_RECONNECT_SEQ | \
|
||||
CEPH_FEATURE_DIRLAYOUTHASH
|
||||
|
||||
class SimpleMessenger : public Messenger {
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user