Merge pull request #5849 from ukernel/wip-32bits-ceph-fuse

Wip 32bits ceph fuse

Reviewed-by: Greg Farnum <gfarnum@redhat.com>
This commit is contained in:
Gregory Farnum 2015-09-28 22:19:11 -07:00
commit 119336fbf6
8 changed files with 163 additions and 43 deletions

View File

@ -171,6 +171,60 @@ dir_result_t::dir_result_t(Inode *in)
buffer(0) {
}
void Client::_reset_faked_inos()
{
ino_t start = 1024;
free_faked_inos.clear();
free_faked_inos.insert(start, (uint32_t)-1 - start + 1);
last_used_faked_ino = 0;
_use_faked_inos = sizeof(ino_t) < 8 || cct->_conf->client_use_faked_inos;
}
void Client::_assign_faked_ino(Inode *in)
{
interval_set<ino_t>::const_iterator it = free_faked_inos.lower_bound(last_used_faked_ino + 1);
if (it == free_faked_inos.end() && last_used_faked_ino > 0) {
last_used_faked_ino = 0;
it = free_faked_inos.lower_bound(last_used_faked_ino + 1);
}
assert(it != free_faked_inos.end());
if (last_used_faked_ino < it.get_start()) {
assert(it.get_len() > 0);
last_used_faked_ino = it.get_start();
} else {
++last_used_faked_ino;
assert(it.get_start() + it.get_len() > last_used_faked_ino);
}
in->faked_ino = last_used_faked_ino;
free_faked_inos.erase(in->faked_ino);
faked_ino_map[in->faked_ino] = in->vino();
}
void Client::_release_faked_ino(Inode *in)
{
free_faked_inos.insert(in->faked_ino);
faked_ino_map.erase(in->faked_ino);
}
vinodeno_t Client::_map_faked_ino(ino_t ino)
{
vinodeno_t vino;
if (ino == 1)
vino = root->vino();
else if (faked_ino_map.count(ino))
vino = faked_ino_map[ino];
else
vino = vinodeno_t(0, CEPH_NOSNAP);
ldout(cct, 10) << "map_faked_ino " << ino << " -> " << vino << dendl;
return vino;
}
vinodeno_t Client::map_faked_ino(ino_t ino)
{
Mutex::Locker lock(client_lock);
return _map_faked_ino(ino);
}
// cons/des
Client::Client(Messenger *m, MonClient *mc)
@ -203,6 +257,7 @@ Client::Client(Messenger *m, MonClient *mc)
{
monclient->set_messenger(m);
_reset_faked_inos();
//
root = 0;
@ -286,6 +341,7 @@ void Client::tear_down_cache()
while (!root_parents.empty())
root_parents.erase(root_parents.begin());
inode_map.clear();
_reset_faked_inos();
}
assert(inode_map.empty());
@ -583,6 +639,7 @@ void Client::trim_cache(bool trim_kernel_dcache)
while (!root_parents.empty())
root_parents.erase(root_parents.begin());
inode_map.clear();
_reset_faked_inos();
}
}
@ -755,6 +812,10 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
} else {
in = new Inode(this, st->vino, &st->layout);
inode_map[st->vino] = in;
if (use_faked_inos())
_assign_faked_ino(in);
if (!root) {
root = in;
root_ancestor = in;
@ -2589,6 +2650,9 @@ void Client::put_inode(Inode *in, int n)
assert(!unclean);
put_qtree(in);
inode_map.erase(in->vino());
if (use_faked_inos())
_release_faked_ino(in);
in->cap_item.remove_myself();
in->snaprealm_item.remove_myself();
in->snapdir_parent.reset();
@ -3323,7 +3387,10 @@ public:
void Client::_async_invalidate(InodeRef& in, int64_t off, int64_t len, bool keep_caps)
{
ldout(cct, 10) << "_async_invalidate " << off << "~" << len << (keep_caps ? " keep_caps" : "") << dendl;
ino_invalidate_cb(callback_handle, in->vino(), off, len);
if (use_faked_inos())
ino_invalidate_cb(callback_handle, vinodeno_t(in->faked_ino, CEPH_NOSNAP), off, len);
else
ino_invalidate_cb(callback_handle, in->vino(), off, len);
client_lock.Lock();
if (!keep_caps)
@ -4473,10 +4540,16 @@ private:
public:
C_Client_DentryInvalidate(Client *c, Dentry *dn, bool del) :
client(c), name(dn->name) {
dirino = dn->dir->parent_inode->vino();
if (del)
ino = dn->inode->vino();
else
if (client->use_faked_inos()) {
dirino.ino = dn->dir->parent_inode->faked_ino;
if (del)
ino.ino = dn->inode->faked_ino;
} else {
dirino = dn->dir->parent_inode->vino();
if (del)
ino = dn->inode->vino();
}
if (!del)
ino.ino = inodeno_t();
}
void finish(int r) {
@ -5888,7 +5961,10 @@ int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_inf
<< " mode 0" << oct << in->mode << dec
<< " mtime " << in->mtime << " ctime " << in->ctime << dendl;
memset(st, 0, sizeof(struct stat));
st->st_ino = in->ino;
if (use_faked_inos())
st->st_ino = in->faked_ino;
else
st->st_ino = in->ino;
st->st_dev = in->snapid;
st->st_mode = in->mode;
st->st_rdev = in->rdev;
@ -6442,9 +6518,8 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p)
assert(diri->dn_set.size() < 2); // can't have multiple hard-links to a dir
uint64_t next_off = 1;
fill_dirent(&de, ".", S_IFDIR, diri->ino, next_off);
fill_stat(diri, &st);
fill_dirent(&de, ".", S_IFDIR, st.st_ino, next_off);
client_lock.Unlock();
int r = cb(p, &de, &st, -1, next_off);
@ -6461,8 +6536,8 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p)
ldout(cct, 15) << " including .." << dendl;
if (!diri->dn_set.empty()) {
InodeRef& in = diri->get_first_parent()->inode;
fill_dirent(&de, "..", S_IFDIR, in->ino, 2);
fill_stat(in, &st);
fill_dirent(&de, "..", S_IFDIR, st.st_ino, 2);
} else {
/* must be at the root (no parent),
* so we add the dotdot with a special inode (3) */
@ -8595,8 +8670,10 @@ Inode *Client::open_snapdir(Inode *diri)
in->size = diri->size;
in->dirfragtree.clear();
inode_map[vino] = in;
in->snapdir_parent = diri;
inode_map[vino] = in;
if (use_faked_inos())
_assign_faked_ino(in);
ldout(cct, 10) << "open_snapdir created snapshot inode " << *in << dendl;
} else {
in = inode_map[vino];
@ -8742,6 +8819,18 @@ snapid_t Client::ll_get_snapid(Inode *in)
return in->snapid;
}
Inode *Client::ll_get_inode(ino_t ino)
{
Mutex::Locker lock(client_lock);
vinodeno_t vino = _map_faked_ino(ino);
unordered_map<vinodeno_t,Inode*>::iterator p = inode_map.find(vino);
if (p == inode_map.end())
return NULL;
Inode *in = p->second;
_ll_get(in);
return in;
}
Inode *Client::ll_get_inode(vinodeno_t vino)
{
Mutex::Locker lock(client_lock);

View File

@ -351,6 +351,17 @@ protected:
// cache
ceph::unordered_map<vinodeno_t, Inode*> inode_map;
// fake inode number for 32-bits ino_t
ceph::unordered_map<ino_t, vinodeno_t> faked_ino_map;
interval_set<ino_t> free_faked_inos;
ino_t last_used_faked_ino;
void _assign_faked_ino(Inode *in);
void _release_faked_ino(Inode *in);
bool _use_faked_inos;
void _reset_faked_inos();
vinodeno_t _map_faked_ino(ino_t ino);
Inode* root;
map<Inode*, InodeRef> root_parents;
Inode* root_ancestor;
@ -627,6 +638,8 @@ protected:
Dentry *old_dentry = NULL);
void update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session);
bool use_faked_inos() { return _use_faked_inos; }
vinodeno_t map_faked_ino(ino_t ino);
// ----------------------
// fs ops.
@ -937,6 +950,8 @@ public:
Mutex::Locker lock(client_lock);
return _get_vino(in);
}
// get inode from faked ino
Inode *ll_get_inode(ino_t ino);
Inode *ll_get_inode(vinodeno_t vino);
int ll_lookup(Inode *parent, const char *name, struct stat *attr,
Inode **out, int uid = -1, int gid = -1);

View File

@ -11,7 +11,8 @@
ostream& operator<<(ostream &out, Inode &in)
{
out << in.vino() << "("
<< "ref=" << in._ref
<< "faked_ino=" << in.faked_ino
<< " ref=" << in._ref
<< " ll_ref=" << in.ll_ref
<< " cap_refs=" << in.cap_refs
<< " open=" << in.open_by_mode

View File

@ -159,6 +159,8 @@ struct Inode {
// -- the actual inode --
inodeno_t ino;
snapid_t snapid;
ino_t faked_ino;
uint32_t rdev; // if special file
// affected by any inode change...
@ -294,7 +296,7 @@ struct Inode {
xlist<MetaRequest*> unsafe_dir_ops;
Inode(Client *c, vinodeno_t vino, ceph_file_layout *newlayout)
: client(c), ino(vino.ino), snapid(vino.snapid),
: client(c), ino(vino.ino), snapid(vino.snapid), faked_ino(0),
rdev(0), mode(0), uid(0), gid(0), nlink(0),
size(0), truncate_seq(1), truncate_size(-1),
time_warp_seq(0), max_size(0), version(0), xattr_version(0),

View File

@ -74,9 +74,8 @@ public:
void finalize();
uint64_t fino_snap(uint64_t fino);
vinodeno_t fino_vino(inodeno_t fino);
uint64_t make_fake_ino(inodeno_t ino, snapid_t snapid);
Inode * iget(inodeno_t fino);
Inode * iget(fuse_ino_t fino);
void iput(Inode *in);
int fd_on_success;
@ -1011,27 +1010,27 @@ int CephFuse::Handle::loop()
uint64_t CephFuse::Handle::fino_snap(uint64_t fino)
{
Mutex::Locker l(stag_lock);
uint64_t stag = FINO_STAG(fino);
assert(stag_snap_map.count(stag));
return stag_snap_map[stag];
}
vinodeno_t CephFuse::Handle::fino_vino(inodeno_t fino)
{
if (fino.val == 1) {
fino = inodeno_t(client->get_root_ino());
if (client->use_faked_inos()) {
vinodeno_t vino = client->map_faked_ino(fino);
return vino.snapid;
} else {
Mutex::Locker l(stag_lock);
uint64_t stag = FINO_STAG(fino);
assert(stag_snap_map.count(stag));
return stag_snap_map[stag];
}
vinodeno_t vino(FINO_INO(fino), fino_snap(fino));
//cout << "fino_vino " << fino << " -> " << vino << std::endl;
return vino;
}
Inode * CephFuse::Handle::iget(inodeno_t fino)
Inode * CephFuse::Handle::iget(fuse_ino_t fino)
{
Inode *in =
client->ll_get_inode(fino_vino(fino));
return in;
if (client->use_faked_inos()) {
return client->ll_get_inode((ino_t)fino);
} else {
if (fino == 1)
fino = inodeno_t(client->get_root_ino());
vinodeno_t vino(FINO_INO(fino), fino_snap(fino));
return client->ll_get_inode(vino);
}
}
void CephFuse::Handle::iput(Inode *in)
@ -1041,17 +1040,22 @@ void CephFuse::Handle::iput(Inode *in)
uint64_t CephFuse::Handle::make_fake_ino(inodeno_t ino, snapid_t snapid)
{
Mutex::Locker l(stag_lock);
uint64_t stag;
if (snap_stag_map.count(snapid) == 0) {
stag = ++last_stag;
snap_stag_map[snapid] = stag;
stag_snap_map[stag] = snapid;
} else
stag = snap_stag_map[snapid];
inodeno_t fino = MAKE_FINO(ino, stag);
//cout << "make_fake_ino " << ino << "." << snapid << " -> " << fino << std::endl;
return fino;
if (client->use_faked_inos()) {
// already faked by libcephfs
return ino;
} else {
Mutex::Locker l(stag_lock);
uint64_t stag;
if (snap_stag_map.count(snapid) == 0) {
stag = ++last_stag;
snap_stag_map[snapid] = stag;
stag_snap_map[stag] = snapid;
} else
stag = snap_stag_map[snapid];
inodeno_t fino = MAKE_FINO(ino, stag);
//cout << "make_fake_ino " << ino << "." << snapid << " -> " << fino << std::endl;
return fino;
}
}
CephFuse::CephFuse(Client *c, int fd) : _handle(new CephFuse::Handle(c, fd))

View File

@ -366,6 +366,7 @@ OPTION(fuse_require_active_mds, OPT_BOOL, true) // if ceph_fuse requires active
OPTION(client_try_dentry_invalidate, OPT_BOOL, true) // the client should try to use dentry invaldation instead of remounting, on kernels it believes that will work for
OPTION(client_die_on_failed_remount, OPT_BOOL, true)
OPTION(client_check_pool_perm, OPT_BOOL, true)
OPTION(client_use_faked_inos, OPT_BOOL, false)
OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location

View File

@ -164,6 +164,10 @@ class interval_set {
return typename interval_set<T>::iterator(m.begin());
}
typename interval_set<T>::iterator lower_bound(T start) {
return typename interval_set<T>::iterator(find_inc_m(start));
}
typename interval_set<T>::iterator end() {
return typename interval_set<T>::iterator(m.end());
}
@ -172,6 +176,10 @@ class interval_set {
return typename interval_set<T>::const_iterator(m.begin());
}
typename interval_set<T>::const_iterator lower_bound(T start) const {
return typename interval_set<T>::const_iterator(find_inc(start));
}
typename interval_set<T>::const_iterator end() const {
return typename interval_set<T>::const_iterator(m.end());
}

View File

@ -5927,7 +5927,7 @@ void Server::handle_client_rename(MDRequestRef& mdr)
// dest a child of src?
// e.g. mv /usr /usr/foo
CDentry *pdn = destdir->inode->parent;
CDentry *pdn = destdir->inode->get_projected_parent_dn();
while (pdn) {
if (pdn == srcdn) {
dout(7) << "cannot rename item to be a child of itself" << dendl;