Merge pull request #2070 from somnathr/wip-sd-filestore-optimization

Wip sd filestore optimization

Reviewed-by: Samuel Just <sam.just@inktank.com>
This commit is contained in:
Samuel Just 2014-08-15 13:37:54 -07:00
commit cab479367a
17 changed files with 460 additions and 269 deletions

View File

@ -713,6 +713,7 @@ OPTION(filestore_split_multiple, OPT_INT, 2)
OPTION(filestore_update_to, OPT_INT, 1000)
OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
OPTION(filestore_fd_cache_shards, OPT_INT, 16) // FD number of shards
OPTION(filestore_dump_file, OPT_STR, "") // file onto which store transaction dumps
OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th opportunity
OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread

View File

@ -43,7 +43,7 @@ class SharedLRU {
}
}
void lru_remove(K key) {
void lru_remove(const K& key) {
typename map<K, typename list<pair<K, VPtr> >::iterator>::iterator i =
contents.find(key);
if (i == contents.end())
@ -53,7 +53,7 @@ class SharedLRU {
contents.erase(i);
}
void lru_add(K key, VPtr val, list<VPtr> *to_release) {
void lru_add(const K& key, const VPtr& val, list<VPtr> *to_release) {
typename map<K, typename list<pair<K, VPtr> >::iterator>::iterator i =
contents.find(key);
if (i != contents.end()) {
@ -66,7 +66,7 @@ class SharedLRU {
}
}
void remove(K key) {
void remove(const K& key) {
Mutex::Locker l(lock);
weak_refs.erase(key);
cond.Signal();
@ -93,7 +93,7 @@ public:
assert(weak_refs.empty());
}
void clear(K key) {
void clear(const K& key) {
VPtr val; // release any ref we have after we drop the lock
{
Mutex::Locker l(lock);
@ -119,7 +119,7 @@ public:
return weak_refs.begin()->first;
}
VPtr lower_bound(K key) {
VPtr lower_bound(const K& key) {
VPtr val;
list<VPtr> to_release;
{
@ -145,7 +145,7 @@ public:
return val;
}
VPtr lookup(K key) {
VPtr lookup(const K& key) {
VPtr val;
list<VPtr> to_release;
{
@ -153,8 +153,9 @@ public:
bool retry = false;
do {
retry = false;
if (weak_refs.count(key)) {
val = weak_refs[key].lock();
typename map<K, WeakVPtr>::iterator i = weak_refs.find(key);
if (i != weak_refs.end()) {
val = i->second.lock();
if (val) {
lru_add(key, val, &to_release);
} else {
@ -168,12 +169,35 @@ public:
return val;
}
VPtr add(K key, V *value) {
/***
* Inserts a key if not present, or bumps it to the front of the LRU if
* it is, and then gives you a reference to the value. If the key already
* existed, you are responsible for deleting the new value you tried to
* insert.
*
* @param key The key to insert
* @param value The value that goes with the key
* @param existed Set to true if the value was already in the
* map, false otherwise
* @return A reference to the map's value for the given key
*/
VPtr add(const K& key, V *value, bool *existed = NULL) {
VPtr val(value, Cleanup(this, key));
list<VPtr> to_release;
{
Mutex::Locker l(lock);
weak_refs.insert(make_pair(key, val));
typename map<K, WeakVPtr>::iterator actual = weak_refs.lower_bound(key);
if (actual != weak_refs.end() && actual->first == key) {
if (existed)
*existed = true;
return actual->second.lock();
}
if (existed)
*existed = false;
weak_refs.insert(actual, make_pair(key, val));
lru_add(key, val, &to_release);
}
return val;

View File

@ -21,6 +21,7 @@
#include "osd/osd_types.h"
#include "include/object.h"
#include "common/RWLock.h"
/**
* CollectionIndex provides an interface for manipulating indexed collections
@ -43,14 +44,14 @@ protected:
/// Returned path
string full_path;
/// Ref to parent Index
ceph::shared_ptr<CollectionIndex> parent_ref;
CollectionIndex* parent_ref;
/// coll_t for parent Index
coll_t parent_coll;
/// Normal Constructor
Path(
string path, ///< [in] Path to return.
ceph::weak_ptr<CollectionIndex> ref) ///< [in] weak_ptr to parent.
CollectionIndex* ref)
: full_path(path), parent_ref(ref), parent_coll(parent_ref->coll()) {}
/// Debugging Constructor
@ -66,11 +67,13 @@ protected:
coll_t coll() const { return parent_coll; }
/// Getter for parent
ceph::shared_ptr<CollectionIndex> get_index() const {
CollectionIndex* get_index() const {
return parent_ref;
}
};
public:
RWLock access_lock;
/// Type of returned paths
typedef ceph::shared_ptr<Path> IndexedPath;
@ -94,12 +97,6 @@ protected:
*/
virtual coll_t coll() const = 0;
/**
* For setting the internal weak_ptr to a shared_ptr to this.
*
* @see IndexManager
*/
virtual void set_ref(ceph::shared_ptr<CollectionIndex> ref) = 0;
/**
* Initializes the index.
@ -161,7 +158,7 @@ protected:
virtual int split(
uint32_t match, //< [in] value to match
uint32_t bits, //< [in] bits to check
ceph::shared_ptr<CollectionIndex> dest //< [in] destination index
CollectionIndex* dest //< [in] destination index
) { assert(0); return 0; }
@ -183,6 +180,8 @@ protected:
/// Call prior to removing directory
virtual int prep_delete() { return 0; }
CollectionIndex():access_lock("CollectionIndex::access_lock"){}
/// Virtual destructor
virtual ~CollectionIndex() {}
};

View File

@ -23,6 +23,7 @@
#include "common/Cond.h"
#include "common/shared_cache.hpp"
#include "include/compat.h"
#include "include/intarith.h"
/**
* FD Cache
@ -49,32 +50,42 @@ public:
};
private:
SharedLRU<ghobject_t, FD> registry;
CephContext *cct;
const int registry_shards;
SharedLRU<ghobject_t, FD> *registry;
public:
FDCache(CephContext *cct) : cct(cct) {
FDCache(CephContext *cct) : cct(cct),
registry_shards(cct->_conf->filestore_fd_cache_shards) {
assert(cct);
cct->_conf->add_observer(this);
registry.set_size(cct->_conf->filestore_fd_cache_size);
registry = new SharedLRU<ghobject_t, FD>[registry_shards];
for (int i = 0; i < registry_shards; ++i) {
registry[i].set_size(
MAX((cct->_conf->filestore_fd_cache_size / registry_shards), 1));
}
}
~FDCache() {
cct->_conf->remove_observer(this);
delete[] registry;
}
typedef ceph::shared_ptr<FD> FDRef;
FDRef lookup(const ghobject_t &hoid) {
return registry.lookup(hoid);
int registry_id = hoid.hobj.hash % registry_shards;
return registry[registry_id].lookup(hoid);
}
FDRef add(const ghobject_t &hoid, int fd) {
return registry.add(hoid, new FD(fd));
FDRef add(const ghobject_t &hoid, int fd, bool *existed) {
int registry_id = hoid.hobj.hash % registry_shards;
return registry[registry_id].add(hoid, new FD(fd), existed);
}
/// clear cached fd for hoid, subsequent lookups will get an empty FD
void clear(const ghobject_t &hoid) {
registry.clear(hoid);
assert(!registry.lookup(hoid));
int registry_id = hoid.hobj.hash % registry_shards;
registry[registry_id].clear(hoid);
assert(!registry[registry_id].lookup(hoid));
}
/// md_config_obs_t
@ -88,7 +99,9 @@ public:
void handle_conf_change(const md_config_t *conf,
const std::set<std::string> &changed) {
if (changed.count("filestore_fd_cache_size")) {
registry.set_size(conf->filestore_fd_cache_size);
for (int i = 0; i < registry_shards; ++i)
registry[i].set_size(
MAX((conf->filestore_fd_cache_size / registry_shards), 1));
}
}

View File

@ -147,9 +147,7 @@ int FileStore::get_cdir(coll_t cid, char *s, int len)
int FileStore::get_index(coll_t cid, Index *index)
{
char path[PATH_MAX];
get_cdir(cid, path, sizeof(path));
int r = index_manager.get_index(cid, path, index);
int r = index_manager.get_index(cid, basedir, index);
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
@ -163,15 +161,14 @@ int FileStore::init_index(coll_t cid)
return r;
}
int FileStore::lfn_find(coll_t cid, const ghobject_t& oid, IndexedPath *path)
int FileStore::lfn_find(const ghobject_t& oid, const Index& index, IndexedPath *path)
{
Index index;
IndexedPath path2;
if (!path)
path = &path2;
int r, exist;
r = get_index(cid, &index);
if (r < 0)
return r;
r = index->lookup(oid, path, &exist);
assert(NULL != index.index);
r = (index.index)->lookup(oid, path, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
@ -183,9 +180,8 @@ int FileStore::lfn_find(coll_t cid, const ghobject_t& oid, IndexedPath *path)
int FileStore::lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length)
{
IndexedPath path;
FDRef fd;
int r = lfn_open(cid, oid, false, &fd, &path);
int r = lfn_open(cid, oid, false, &fd);
if (r < 0)
return r;
r = ::ftruncate(**fd, length);
@ -202,7 +198,15 @@ int FileStore::lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length)
int FileStore::lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf)
{
IndexedPath path;
int r = lfn_find(cid, oid, &path);
Index index;
int r = get_index(cid, &index);
if (r < 0)
return r;
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(oid, index, &path);
if (r < 0)
return r;
r = ::stat(path->path(), buf);
@ -215,92 +219,112 @@ int FileStore::lfn_open(coll_t cid,
const ghobject_t& oid,
bool create,
FDRef *outfd,
IndexedPath *path,
Index *index)
Index *index)
{
assert(get_allow_sharded_objects() ||
( oid.shard_id == shard_id_t::NO_SHARD &&
oid.generation == ghobject_t::NO_GEN ));
assert(outfd);
int r = 0;
bool need_lock = true;
if (!replaying) {
*outfd = fdcache.lookup(oid);
if (*outfd) {
if (!index) {
return 0;
} else {
if (!((*index).index)) {
r = get_index(cid, index);
return r;
}
}
}
}
int flags = O_RDWR;
if (create)
flags |= O_CREAT;
Index index2;
if (!index) {
index = &index2;
}
int r = 0;
if (!(*index)) {
if (!((*index).index)) {
r = get_index(cid, index);
} else {
need_lock = false;
}
int fd, exist;
if (!replaying) {
Mutex::Locker l(fdcache_lock);
*outfd = fdcache.lookup(oid);
if (*outfd)
return 0;
assert(NULL != (*index).index);
if (need_lock) {
((*index).index)->access_lock.get_write();
}
{
IndexedPath path2;
if (!path)
path = &path2;
if (r < 0) {
derr << "error getting collection index for " << cid
<< ": " << cpp_strerror(-r) << dendl;
goto fail;
}
r = (*index)->lookup(oid, path, &exist);
if (r < 0) {
derr << "could not find " << oid << " in index: "
<< cpp_strerror(-r) << dendl;
goto fail;
}
r = ::open((*path)->path(), flags, 0644);
if (r < 0) {
r = -errno;
dout(10) << "error opening file " << (*path)->path() << " with flags="
<< flags << ": " << cpp_strerror(-r) << dendl;
goto fail;
}
fd = r;
if (create && (!exist)) {
r = (*index)->created(oid, (*path)->path());
if (r < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << "error creating " << oid << " (" << (*path)->path()
<< ") in index: " << cpp_strerror(-r) << dendl;
goto fail;
}
r = chain_fsetxattr(fd, XATTR_SPILL_OUT_NAME,
XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT));
if (r < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << "error setting spillout xattr for oid " << oid << " (" << (*path)->path()
<< "):" << cpp_strerror(-r) << dendl;
goto fail;
}
}
IndexedPath path2;
IndexedPath *path = &path2;
if (r < 0) {
derr << "error getting collection index for " << cid
<< ": " << cpp_strerror(-r) << dendl;
goto fail;
}
r = (*index)->lookup(oid, path, &exist);
if (r < 0) {
derr << "could not find " << oid << " in index: "
<< cpp_strerror(-r) << dendl;
goto fail;
}
if (!replaying) {
Mutex::Locker l(fdcache_lock);
*outfd = fdcache.lookup(oid);
if (*outfd) {
r = ::open((*path)->path(), flags, 0644);
if (r < 0) {
r = -errno;
dout(10) << "error opening file " << (*path)->path() << " with flags="
<< flags << ": " << cpp_strerror(-r) << dendl;
goto fail;
}
fd = r;
if (create && (!exist)) {
r = (*index)->created(oid, (*path)->path());
if (r < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
return 0;
} else {
*outfd = fdcache.add(oid, fd);
derr << "error creating " << oid << " (" << (*path)->path()
<< ") in index: " << cpp_strerror(-r) << dendl;
goto fail;
}
r = chain_fsetxattr(fd, XATTR_SPILL_OUT_NAME,
XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT));
if (r < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << "error setting spillout xattr for oid " << oid << " (" << (*path)->path()
<< "):" << cpp_strerror(-r) << dendl;
goto fail;
}
}
if (!replaying) {
bool existed;
*outfd = fdcache.add(oid, fd, &existed);
if (existed) {
TEMP_FAILURE_RETRY(::close(fd));
}
} else {
*outfd = FDRef(new FDCache::FD(fd));
}
if (need_lock) {
((*index).index)->access_lock.put_write();
}
return 0;
fail:
if (need_lock) {
((*index).index)->access_lock.put_write();
}
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
@ -315,6 +339,7 @@ int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghob
IndexedPath path_new, path_old;
int exist;
int r;
bool index_same = false;
if (c < newcid) {
r = get_index(newcid, &index_new);
if (r < 0)
@ -327,6 +352,7 @@ int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghob
if (r < 0)
return r;
index_new = index_old;
index_same = true;
} else {
r = get_index(c, &index_old);
if (r < 0)
@ -336,33 +362,73 @@ int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghob
return r;
}
r = index_old->lookup(o, &path_old, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (!exist)
return -ENOENT;
assert(NULL != index_old.index);
assert(NULL != index_new.index);
r = index_new->lookup(newoid, &path_new, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (exist)
return -EEXIST;
if (!index_same) {
dout(25) << "lfn_link path_old: " << path_old << dendl;
dout(25) << "lfn_link path_new: " << path_new << dendl;
r = ::link(path_old->path(), path_new->path());
if (r < 0)
return -errno;
RWLock::RLocker l1((index_old.index)->access_lock);
r = index_new->created(newoid, path_new->path());
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
r = index_old->lookup(o, &path_old, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (!exist)
return -ENOENT;
RWLock::WLocker l2((index_new.index)->access_lock);
r = index_new->lookup(newoid, &path_new, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (exist)
return -EEXIST;
dout(25) << "lfn_link path_old: " << path_old << dendl;
dout(25) << "lfn_link path_new: " << path_new << dendl;
r = ::link(path_old->path(), path_new->path());
if (r < 0)
return -errno;
r = index_new->created(newoid, path_new->path());
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
} else {
RWLock::WLocker l1((index_old.index)->access_lock);
r = index_old->lookup(o, &path_old, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (!exist)
return -ENOENT;
r = index_new->lookup(newoid, &path_new, &exist);
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
if (exist)
return -EEXIST;
dout(25) << "lfn_link path_old: " << path_old << dendl;
dout(25) << "lfn_link path_new: " << path_new << dendl;
r = ::link(path_old->path(), path_new->path());
if (r < 0)
return -errno;
r = index_new->created(newoid, path_new->path());
if (r < 0) {
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
}
return 0;
}
@ -374,7 +440,10 @@ int FileStore::lfn_unlink(coll_t cid, const ghobject_t& o,
int r = get_index(cid, &index);
if (r < 0)
return r;
Mutex::Locker l(fdcache_lock);
assert(NULL != index.index);
RWLock::WLocker l((index.index)->access_lock);
{
IndexedPath path;
int exist;
@ -434,7 +503,6 @@ FileStore::FileStore(const std::string &base, const std::string &jdev, const cha
sync_entry_timeo_lock("sync_entry_timeo_lock"),
timer(g_ceph_context, sync_entry_timeo_lock),
stop(false), sync_thread(this),
fdcache_lock("fdcache_lock"),
fdcache(g_ceph_context),
wbthrottle(g_ceph_context),
default_osr("default"),
@ -1446,6 +1514,9 @@ int FileStore::mount()
<< " with error: " << ret << dendl;
goto close_current_fd;
}
assert(NULL != index.index);
RWLock::WLocker l((index.index)->access_lock);
index->cleanup();
}
}
@ -2885,12 +2956,14 @@ int FileStore::_clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& ne
FDRef o, n;
{
Index index;
IndexedPath from, to;
r = lfn_open(cid, oldoid, false, &o, &from, &index);
r = lfn_open(cid, oldoid, false, &o, &index);
if (r < 0) {
goto out2;
}
r = lfn_open(cid, newoid, true, &n, &to, &index);
assert(NULL != (index.index));
RWLock::WLocker l((index.index)->access_lock);
r = lfn_open(cid, newoid, true, &n, &index);
if (r < 0) {
goto out;
}
@ -3700,19 +3773,21 @@ int FileStore::getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>
dout(10) << __func__ << " could not get index r = " << r << dendl;
goto out;
}
r = object_map->get_all_xattrs(oid, &omap_attrs);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
goto out;
}
{
r = object_map->get_all_xattrs(oid, &omap_attrs);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
goto out;
}
r = object_map->get_xattrs(oid, omap_attrs, &omap_aset);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
goto out;
r = object_map->get_xattrs(oid, omap_attrs, &omap_aset);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
goto out;
}
if (r == -ENOENT)
r = 0;
}
if (r == -ENOENT)
r = 0;
assert(omap_attrs.size() == omap_aset.size());
for (map<string, bufferlist>::iterator i = omap_aset.begin();
i != omap_aset.end();
@ -3921,22 +3996,23 @@ int FileStore::_rmattrs(coll_t cid, const ghobject_t& oid,
dout(10) << __func__ << " could not get index r = " << r << dendl;
goto out_close;
}
r = object_map->get_all_xattrs(oid, &omap_attrs);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
assert(!m_filestore_fail_eio || r != -EIO);
goto out_close;
}
r = object_map->remove_xattrs(oid, omap_attrs, &spos);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not remove omap_attrs r = " << r << dendl;
goto out_close;
}
if (r == -ENOENT)
r = 0;
chain_fsetxattr(**fd, XATTR_SPILL_OUT_NAME, XATTR_NO_SPILL_OUT,
{
r = object_map->get_all_xattrs(oid, &omap_attrs);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not get omap_attrs r = " << r << dendl;
assert(!m_filestore_fail_eio || r != -EIO);
goto out_close;
}
r = object_map->remove_xattrs(oid, omap_attrs, &spos);
if (r < 0 && r != -ENOENT) {
dout(10) << __func__ << " could not remove omap_attrs r = " << r << dendl;
goto out_close;
}
if (r == -ENOENT)
r = 0;
chain_fsetxattr(**fd, XATTR_SPILL_OUT_NAME, XATTR_NO_SPILL_OUT,
sizeof(XATTR_NO_SPILL_OUT));
}
out_close:
lfn_close(fd);
@ -4170,6 +4246,10 @@ int FileStore::collection_version_current(coll_t c, uint32_t *version)
int r = get_index(c, &index);
if (r < 0)
return r;
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
*version = index->collection_version();
if (*version == target_version)
return 1;
@ -4264,6 +4344,10 @@ bool FileStore::collection_empty(coll_t c)
int r = get_index(c, &index);
if (r < 0)
return false;
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
vector<ghobject_t> ls;
collection_list_handle_t handle;
r = index->collection_list_partial(ghobject_t(), 1, 1, 0, &ls, NULL);
@ -4317,6 +4401,10 @@ int FileStore::collection_list_partial(coll_t c, ghobject_t start,
int r = get_index(c, &index);
if (r < 0)
return r;
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = index->collection_list_partial(start,
min, max, seq,
ls, next);
@ -4335,6 +4423,10 @@ int FileStore::collection_list(coll_t c, vector<ghobject_t>& ls)
int r = get_index(c, &index);
if (r < 0)
return r;
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = index->collection_list(&ls);
assert(!m_filestore_fail_eio || r != -EIO);
return r;
@ -4345,10 +4437,17 @@ int FileStore::omap_get(coll_t c, const ghobject_t &hoid,
map<string, bufferlist> *out)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->get(hoid, header, out);
if (r < 0 && r != -ENOENT) {
assert(!m_filestore_fail_eio || r != -EIO);
@ -4364,10 +4463,17 @@ int FileStore::omap_get_header(
bool allow_eio)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->get_header(hoid, bl);
if (r < 0 && r != -ENOENT) {
assert(allow_eio || !m_filestore_fail_eio || r != -EIO);
@ -4379,10 +4485,17 @@ int FileStore::omap_get_header(
int FileStore::omap_get_keys(coll_t c, const ghobject_t &hoid, set<string> *keys)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->get_keys(hoid, keys);
if (r < 0 && r != -ENOENT) {
assert(!m_filestore_fail_eio || r != -EIO);
@ -4396,10 +4509,17 @@ int FileStore::omap_get_values(coll_t c, const ghobject_t &hoid,
map<string, bufferlist> *out)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->get_values(hoid, keys, out);
if (r < 0 && r != -ENOENT) {
assert(!m_filestore_fail_eio || r != -EIO);
@ -4413,10 +4533,18 @@ int FileStore::omap_check_keys(coll_t c, const ghobject_t &hoid,
set<string> *out)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->check_keys(hoid, keys, out);
if (r < 0 && r != -ENOENT) {
assert(!m_filestore_fail_eio || r != -EIO);
@ -4429,10 +4557,17 @@ ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(coll_t c,
const ghobject_t &hoid)
{
dout(15) << __func__ << " " << c << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(c, hoid, &path);
Index index;
int r = get_index(c, &index);
if (r < 0)
return ObjectMap::ObjectMapIterator();
return ObjectMap::ObjectMapIterator();
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return ObjectMap::ObjectMapIterator();
}
return object_map->get_iterator(hoid);
}
@ -4482,6 +4617,9 @@ int FileStore::_destroy_collection(coll_t c)
int r = get_index(c, &from);
if (r < 0)
return r;
assert(NULL != from.index);
RWLock::WLocker l((from.index)->access_lock);
r = from->prep_delete();
if (r < 0)
return r;
@ -4651,10 +4789,17 @@ void FileStore::_inject_failure()
int FileStore::_omap_clear(coll_t cid, const ghobject_t &hoid,
const SequencerPosition &spos) {
dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(cid, hoid, &path);
Index index;
int r = get_index(cid, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->clear_keys_header(hoid, &spos);
if (r < 0 && r != -ENOENT)
return r;
@ -4665,10 +4810,17 @@ int FileStore::_omap_setkeys(coll_t cid, const ghobject_t &hoid,
const map<string, bufferlist> &aset,
const SequencerPosition &spos) {
dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(cid, hoid, &path);
Index index;
int r = get_index(cid, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
return object_map->set_keys(hoid, aset, &spos);
}
@ -4676,10 +4828,17 @@ int FileStore::_omap_rmkeys(coll_t cid, const ghobject_t &hoid,
const set<string> &keys,
const SequencerPosition &spos) {
dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(cid, hoid, &path);
Index index;
int r = get_index(cid, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
r = object_map->rm_keys(hoid, keys, &spos);
if (r < 0 && r != -ENOENT)
return r;
@ -4708,10 +4867,17 @@ int FileStore::_omap_setheader(coll_t cid, const ghobject_t &hoid,
const SequencerPosition &spos)
{
dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
IndexedPath path;
int r = lfn_find(cid, hoid, &path);
Index index;
int r = get_index(cid, &index);
if (r < 0)
return r;
{
assert(NULL != index.index);
RWLock::RLocker l((index.index)->access_lock);
r = lfn_find(hoid, index);
if (r < 0)
return r;
}
return object_map->set_header(hoid, bl, &spos);
}
@ -4756,8 +4922,15 @@ int FileStore::_split_collection(coll_t cid,
if (!r)
r = get_index(dest, &to);
if (!r)
r = from->split(rem, bits, to);
if (!r) {
assert(NULL != from.index);
RWLock::WLocker l1((from.index)->access_lock);
assert(NULL != to.index);
RWLock::WLocker l2((to.index)->access_lock);
r = from->split(rem, bits, to.index);
}
_close_replay_guard(cid, spos);
_close_replay_guard(dest, spos);
@ -4837,8 +5010,15 @@ int FileStore::_split_collection_create(coll_t cid,
if (!r)
r = get_index(dest, &to);
if (!r)
r = from->split(rem, bits, to);
if (!r) {
assert(NULL != from.index);
RWLock::WLocker l1((from.index)->access_lock);
assert(NULL != to.index);
RWLock::WLocker l2((to.index)->access_lock);
r = from->split(rem, bits, to.index);
}
_close_replay_guard(cid, spos);
_close_replay_guard(dest, spos);

View File

@ -321,7 +321,6 @@ private:
friend ostream& operator<<(ostream& out, const OpSequencer& s);
Mutex fdcache_lock;
FDCache fdcache;
WBThrottle wbthrottle;
@ -383,7 +382,8 @@ private:
PerfCounters *logger;
public:
int lfn_find(coll_t cid, const ghobject_t& oid, IndexedPath *path);
int lfn_find(const ghobject_t& oid, const Index& index,
IndexedPath *path = NULL);
int lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length);
int lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf);
int lfn_open(
@ -391,8 +391,8 @@ public:
const ghobject_t& oid,
bool create,
FDRef *outfd,
IndexedPath *path = 0,
Index *index = 0);
void lfn_close(FDRef fd);
int lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghobject_t& newoid) ;
int lfn_unlink(coll_t cid, const ghobject_t& o, const SequencerPosition &spos,

View File

@ -18,7 +18,6 @@
#endif
#include "FlatIndex.h"
#include "CollectionIndex.h"
#include "common/ceph_crypto.h"
#include "osd/osd_types.h"
#include <errno.h>
@ -49,9 +48,6 @@ using ceph::crypto::SHA1;
#define FILENAME_PREFIX_LEN (FILENAME_SHORT_LEN - FILENAME_HASH_LEN - (sizeof(FILENAME_COOKIE) - 1) - FILENAME_EXTRA)
void FlatIndex::set_ref(ceph::shared_ptr<CollectionIndex> ref) {
self_ref = ref;
}
int FlatIndex::cleanup() {
return 0;
@ -356,7 +352,7 @@ int FlatIndex::lookup(const ghobject_t &hoid, IndexedPath *path, int *exist) {
sizeof(long_fn), exist, &is_lfn);
if (r < 0)
return r;
*path = IndexedPath(new Path(string(short_fn), self_ref));
*path = IndexedPath(new Path(string(short_fn), this));
return 0;
}

View File

@ -29,7 +29,6 @@
* This class should only be used for converting old filestores.
*/
class FlatIndex : public CollectionIndex {
ceph::weak_ptr<CollectionIndex> self_ref;
string base_path;
coll_t collection;
public:
@ -41,9 +40,6 @@ public:
coll_t coll() const { return collection; }
/// @see CollectionIndex
void set_ref(ceph::shared_ptr<CollectionIndex> ref);
/// @see CollectionIndex
int cleanup();

View File

@ -228,12 +228,12 @@ int HashIndex::col_split_level(
int HashIndex::_split(
uint32_t match,
uint32_t bits,
ceph::shared_ptr<CollectionIndex> dest) {
CollectionIndex* dest) {
assert(collection_version() == dest->collection_version());
unsigned mkdirred = 0;
return col_split_level(
*this,
*static_cast<HashIndex*>(dest.get()),
*static_cast<HashIndex*>(dest),
vector<string>(),
bits,
match,

View File

@ -156,7 +156,7 @@ public:
int _split(
uint32_t match,
uint32_t bits,
ceph::shared_ptr<CollectionIndex> dest
CollectionIndex* dest
);
protected:

View File

@ -61,13 +61,18 @@ static int get_version(const char *path, uint32_t *version) {
return 0;
}
void IndexManager::put_index(coll_t c) {
Mutex::Locker l(lock);
assert(col_indices.count(c));
col_indices.erase(c);
cond.Signal();
IndexManager::~IndexManager() {
for(map<coll_t, CollectionIndex* > ::iterator it = col_indices.begin();
it != col_indices.end(); it++) {
delete it->second;
it->second = NULL;
}
col_indices.clear();
}
int IndexManager::init_index(coll_t c, const char *path, uint32_t version) {
Mutex::Locker l(lock);
int r = set_version(path, version);
@ -80,7 +85,7 @@ int IndexManager::init_index(coll_t c, const char *path, uint32_t version) {
return index.init();
}
int IndexManager::build_index(coll_t c, const char *path, Index *index) {
int IndexManager::build_index(coll_t c, const char *path, CollectionIndex **index) {
if (upgrade) {
// Need to check the collection generation
int r;
@ -91,17 +96,15 @@ int IndexManager::build_index(coll_t c, const char *path, Index *index) {
switch (version) {
case CollectionIndex::FLAT_INDEX_TAG: {
*index = Index(new FlatIndex(c, path),
RemoveOnDelete(c, this));
*index = new FlatIndex(c, path);
return 0;
}
case CollectionIndex::HASH_INDEX_TAG: // fall through
case CollectionIndex::HASH_INDEX_TAG_2: // fall through
case CollectionIndex::HOBJECT_WITH_POOL: {
// Must be a HashIndex
*index = Index(new HashIndex(c, path, g_conf->filestore_merge_threshold,
g_conf->filestore_split_multiple, version),
RemoveOnDelete(c, this));
*index = new HashIndex(c, path, g_conf->filestore_merge_threshold,
g_conf->filestore_split_multiple, version);
return 0;
}
default: assert(0);
@ -109,28 +112,29 @@ int IndexManager::build_index(coll_t c, const char *path, Index *index) {
} else {
// No need to check
*index = Index(new HashIndex(c, path, g_conf->filestore_merge_threshold,
*index = new HashIndex(c, path, g_conf->filestore_merge_threshold,
g_conf->filestore_split_multiple,
CollectionIndex::HOBJECT_WITH_POOL,
g_conf->filestore_index_retry_probability),
RemoveOnDelete(c, this));
g_conf->filestore_index_retry_probability);
return 0;
}
}
int IndexManager::get_index(coll_t c, const char *path, Index *index) {
int IndexManager::get_index(coll_t c, const string& baseDir, Index *index) {
Mutex::Locker l(lock);
while (1) {
if (!col_indices.count(c)) {
int r = build_index(c, path, index);
if (r < 0)
return r;
(*index)->set_ref(*index);
col_indices[c] = (*index);
break;
} else {
cond.Wait(lock);
}
map<coll_t, CollectionIndex* > ::iterator it = col_indices.find(c);
if (it == col_indices.end()) {
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s/current/%s", baseDir.c_str(), c.to_str().c_str());
CollectionIndex* colIndex = NULL;
int r = build_index(c, path, &colIndex);
if (r < 0)
return r;
col_indices[c] = colIndex;
index->index = colIndex;
} else {
index->index = it->second;
}
return 0;
}

View File

@ -28,7 +28,17 @@
/// Public type for Index
typedef ceph::shared_ptr<CollectionIndex> Index;
struct Index {
CollectionIndex *index;
Index() : index(NULL) {}
Index(CollectionIndex* index) : index(index) {}
CollectionIndex *operator->() { return index; }
CollectionIndex &operator*() { return *index; }
};
/**
* Encapsulates mutual exclusion for CollectionIndexes.
*
@ -37,39 +47,12 @@ typedef ceph::shared_ptr<CollectionIndex> Index;
* that path) may result in the path becoming invalid. Thus, during
* the lifetime of a CollectionIndex object and any paths returned
* by it, no other concurrent accesses may be allowed.
*
* This is enforced using shared_ptr. A shared_ptr<CollectionIndex>
* is returned from get_index. Any paths generated using that object
* carry a reference to the parrent index. Once all
* shared_ptr<CollectionIndex> references have expired, the destructor
* removes the weak_ptr from col_indices and wakes waiters.
* This is enforced by using CollectionIndex::access_lock
*/
class IndexManager {
Mutex lock; ///< Lock for Index Manager
Cond cond; ///< Cond for waiters on col_indices
bool upgrade;
/// Currently in use CollectionIndices
map<coll_t,ceph::weak_ptr<CollectionIndex> > col_indices;
/// Cleans up state for c @see RemoveOnDelete
void put_index(
coll_t c ///< Put the index for c
);
/// Callback for shared_ptr release @see get_index
class RemoveOnDelete {
public:
coll_t c;
IndexManager *manager;
RemoveOnDelete(coll_t c, IndexManager *manager) :
c(c), manager(manager) {}
void operator()(CollectionIndex *index) {
manager->put_index(c);
delete index;
}
};
map<coll_t, CollectionIndex* > col_indices;
/**
* Index factory
@ -82,12 +65,14 @@ class IndexManager {
* @param [out] index Index for c
* @return error code
*/
int build_index(coll_t c, const char *path, Index *index);
int build_index(coll_t c, const char *path, CollectionIndex **index);
public:
/// Constructor
IndexManager(bool upgrade) : lock("IndexManager lock"),
upgrade(upgrade) {}
~IndexManager();
/**
* Reserve and return index for c
*
@ -96,7 +81,7 @@ public:
* @param [out] index Index for c
* @return error code
*/
int get_index(coll_t c, const char *path, Index *index);
int get_index(coll_t c, const string& baseDir, Index *index);
/**
* Initialize index for collection c at path

View File

@ -74,10 +74,6 @@ struct FDCloser {
/* Public methods */
void LFNIndex::set_ref(ceph::shared_ptr<CollectionIndex> ref)
{
self_ref = ref;
}
int LFNIndex::init()
{
@ -142,7 +138,7 @@ int LFNIndex::lookup(const ghobject_t &oid,
} else {
*exist = 1;
}
*out_path = IndexedPath(new Path(full_path, self_ref));
*out_path = IndexedPath(new Path(full_path, this));
r = 0;
);
}

View File

@ -98,8 +98,6 @@ class LFNIndex : public CollectionIndex {
/// Path to Index base.
const string base_path;
/// For reference counting the collection @see Path
ceph::weak_ptr<CollectionIndex> self_ref;
protected:
const uint32_t index_version;
@ -155,9 +153,6 @@ public:
/// Virtual destructor
virtual ~LFNIndex() {}
/// @see CollectionIndex
void set_ref(ceph::shared_ptr<CollectionIndex> ref);
/// @see CollectionIndex
int init();
@ -200,14 +195,14 @@ public:
virtual int _split(
uint32_t match, //< [in] value to match
uint32_t bits, //< [in] bits to check
ceph::shared_ptr<CollectionIndex> dest //< [in] destination index
CollectionIndex* dest //< [in] destination index
) = 0;
/// @see CollectionIndex
int split(
uint32_t match,
uint32_t bits,
ceph::shared_ptr<CollectionIndex> dest
CollectionIndex* dest
) {
WRAP_RETRY(
r = _split(match, bits, dest);

View File

@ -1154,7 +1154,11 @@ OSDMapRef OSDService::_add_map(OSDMap *o)
OSDMap::dedup(for_dedup.get(), o);
}
}
OSDMapRef l = map_cache.add(e, o);
bool existed;
OSDMapRef l = map_cache.add(e, o, &existed);
if (existed) {
delete o;
}
return l;
}

View File

@ -70,7 +70,6 @@ TEST(FlatIndex, created_unlink) {
//
{
CollectionIndex::IndexedPath indexed_path;
index->set_ref(index);
const std::string object_name(10, 'A');
ghobject_t hoid(hobject_t(object_t(object_name), key, CEPH_NOSNAP, hash, pool, ""));
int exists;
@ -88,7 +87,6 @@ TEST(FlatIndex, created_unlink) {
//
{
CollectionIndex::IndexedPath indexed_path;
index->set_ref(index);
const std::string object_name(1024, 'A');
ghobject_t hoid(hobject_t(object_t(object_name), key, CEPH_NOSNAP, hash, pool, ""));
int exists;

View File

@ -42,7 +42,7 @@ public:
virtual int _split(
uint32_t match,
uint32_t bits,
ceph::shared_ptr<CollectionIndex> dest
CollectionIndex* dest
) { return 0; }
void test_generate_and_parse(const ghobject_t &hoid, const std::string &mangled_expected) {