Merge PR #31778 into master

* refs/pull/31778/head:
	os/bluestore: pin onodes as they are added to the cache
	Revert "Revert "Merge pull request #30964 from markhpc/wip-bs-cache-trim-pinned""

Reviewed-by: Mark Nelson <mnelson@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2019-11-23 20:30:28 -06:00
commit d6f5918850
5 changed files with 115 additions and 40 deletions

View File

@ -2774,6 +2774,11 @@ std::vector<Option> get_global_options() {
.set_default(true)
.set_description(""),
Option("osd_num_cache_shards", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
.set_default(32)
.set_flag(Option::FLAG_STARTUP)
.set_description("The number of cache shards to use in the object store."),
Option("osd_op_num_threads_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(0)
.set_flag(Option::FLAG_STARTUP)

View File

@ -841,58 +841,88 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
BlueStore::Onode,
boost::intrusive::list_member_hook<>,
&BlueStore::Onode::lru_item> > list_t;
typedef boost::intrusive::list<
BlueStore::Onode,
boost::intrusive::member_hook<
BlueStore::Onode,
boost::intrusive::list_member_hook<>,
&BlueStore::Onode::pin_item> > pin_list_t;
list_t lru;
pin_list_t pin_list;
explicit LruOnodeCacheShard(CephContext *cct) : BlueStore::OnodeCacheShard(cct) {}
void _add(BlueStore::OnodeRef& o, int level) override
{
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
ceph_assert(o->s == nullptr);
o->s = this;
if (o->nref > 1) {
pin_list.push_front(*o);
o->pinned = true;
num_pinned = pin_list.size();
} else {
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
}
num = lru.size();
}
void _rm(BlueStore::OnodeRef& o) override
{
lru.erase(lru.iterator_to(*o));
o->s = nullptr;
if (o->pinned) {
o->pinned = false;
pin_list.erase(pin_list.iterator_to(*o));
} else {
lru.erase(lru.iterator_to(*o));
}
num = lru.size();
num_pinned = pin_list.size();
}
void _touch(BlueStore::OnodeRef& o) override
{
if (o->pinned) {
return;
}
lru.erase(lru.iterator_to(*o));
lru.push_front(*o);
num = lru.size();
}
void _trim_to(uint64_t max) override
void _pin(BlueStore::Onode& o) override
{
if (max >= lru.size()) {
if (o.pinned == true) {
return;
}
lru.erase(lru.iterator_to(o));
pin_list.push_front(o);
o.pinned = true;
num = lru.size();
num_pinned = pin_list.size();
dout(30) << __func__ << " " << o.oid << " pinned" << dendl;
}
void _unpin(BlueStore::Onode& o) override
{
if (o.pinned == false) {
return;
}
pin_list.erase(pin_list.iterator_to(o));
lru.push_front(o);
o.pinned = false;
num = lru.size();
num_pinned = pin_list.size();
dout(30) << __func__ << " " << o.oid << " unpinned" << dendl;
}
void _trim_to(uint64_t new_size) override
{
if (new_size >= lru.size()) {
return; // don't even try
}
uint64_t n = lru.size() - max;
uint64_t n = lru.size() - new_size;
auto p = lru.end();
ceph_assert(p != lru.begin());
--p;
int skipped = 0;
int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned;
while (n > 0) {
BlueStore::Onode *o = &*p;
int refs = o->nref.load();
if (refs > 1) {
dout(20) << __func__ << " " << o->oid << " has " << refs
<< " refs, skipping" << dendl;
if (++skipped >= max_skipped) {
dout(20) << __func__ << " maximum skip pinned reached; stopping with "
<< n << " left to trim" << dendl;
break;
}
if (p == lru.begin()) {
break;
} else {
p--;
n--;
continue;
}
}
dout(30) << __func__ << " rm " << o->oid << dendl;
if (p != lru.begin()) {
lru.erase(p--);
@ -900,6 +930,7 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
lru.erase(p);
ceph_assert(n == 1);
}
o->s = nullptr;
o->get(); // paranoia
o->c->onode_map.remove(o->oid);
o->put();
@ -907,9 +938,10 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
}
num = lru.size();
}
void add_stats(uint64_t *onodes) override
void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override
{
*onodes += num;
*onodes += num + num_pinned;
*pinned_onodes += num_pinned;
}
};
@ -4553,6 +4585,8 @@ void BlueStore::_init_logger()
b.add_u64(l_bluestore_onodes, "bluestore_onodes",
"Number of onodes in cache");
b.add_u64(l_bluestore_pinned_onodes, "bluestore_pinned_onodes",
"Number of pinned onodes in cache");
b.add_u64_counter(l_bluestore_onode_hits, "bluestore_onode_hits",
"Sum for onode-lookups hit in the cache");
b.add_u64_counter(l_bluestore_onode_misses, "bluestore_onode_misses",
@ -9173,18 +9207,20 @@ void BlueStore::_reap_collections()
void BlueStore::_update_cache_logger()
{
uint64_t num_onodes = 0;
uint64_t num_pinned_onodes = 0;
uint64_t num_extents = 0;
uint64_t num_blobs = 0;
uint64_t num_buffers = 0;
uint64_t num_buffer_bytes = 0;
for (auto c : onode_cache_shards) {
c->add_stats(&num_onodes);
c->add_stats(&num_onodes, &num_pinned_onodes);
}
for (auto c : buffer_cache_shards) {
c->add_stats(&num_extents, &num_blobs,
&num_buffers, &num_buffer_bytes);
}
logger->set(l_bluestore_onodes, num_onodes);
logger->set(l_bluestore_pinned_onodes, num_pinned_onodes);
logger->set(l_bluestore_extents, num_extents);
logger->set(l_bluestore_blobs, num_blobs);
logger->set(l_bluestore_buffers, num_buffers);

View File

@ -103,6 +103,7 @@ enum {
l_bluestore_compressed_allocated,
l_bluestore_compressed_original,
l_bluestore_onodes,
l_bluestore_pinned_onodes,
l_bluestore_onode_hits,
l_bluestore_onode_misses,
l_bluestore_onode_shard_hits,
@ -1049,20 +1050,22 @@ public:
};
struct OnodeSpace;
struct OnodeCacheShard;
/// an in-memory object
struct Onode {
MEMPOOL_CLASS_HELPERS();
// Not persisted and updated on cache insertion/removal
OnodeCacheShard *s;
bool pinned = false; // Only to be used by the onode cache shard
std::atomic_int nref; ///< reference count
Collection *c;
ghobject_t oid;
/// key under PREFIX_OBJ where we are stored
mempool::bluestore_cache_other::string key;
boost::intrusive::list_member_hook<> lru_item;
boost::intrusive::list_member_hook<> lru_item, pin_item;
bluestore_onode_t onode; ///< metadata stored as value in kv store
bool exists; ///< true if object logically exists
@ -1079,7 +1082,8 @@ public:
Onode(Collection *c, const ghobject_t& o,
const mempool::bluestore_cache_other::string& k)
: nref(0),
: s(nullptr),
nref(0),
c(c),
oid(o),
key(k),
@ -1088,7 +1092,8 @@ public:
}
Onode(Collection* c, const ghobject_t& o,
const string& k)
: nref(0),
: s(nullptr),
nref(0),
c(c),
oid(o),
key(k),
@ -1097,7 +1102,8 @@ public:
}
Onode(Collection* c, const ghobject_t& o,
const char* k)
: nref(0),
: s(nullptr),
nref(0),
c(c),
oid(o),
key(k),
@ -1115,11 +1121,18 @@ public:
void flush();
void get() {
++nref;
if (++nref == 2 && s != nullptr) {
s->pin(*this);
}
}
void put() {
if (--nref == 0)
int n = --nref;
if (n == 1 && s != nullptr) {
s->unpin(*this);
}
if (n == 0) {
delete this;
}
}
const string& get_omap_prefix();
@ -1154,7 +1167,7 @@ public:
return num;
}
virtual void _trim_to(uint64_t max) = 0;
virtual void _trim_to(uint64_t new_size) = 0;
void _trim() {
if (cct->_conf->objectstore_blackhole) {
// do not trim if we are throwing away IOs a layer down
@ -1162,6 +1175,7 @@ public:
}
_trim_to(max);
}
void trim() {
std::lock_guard l(lock);
_trim();
@ -1182,6 +1196,8 @@ public:
/// A Generic onode Cache Shard
struct OnodeCacheShard : public CacheShard {
std::atomic<uint64_t> num_pinned = {0};
std::array<std::pair<ghobject_t, mono_clock::time_point>, 64> dumped_onodes;
public:
OnodeCacheShard(CephContext* cct) : CacheShard(cct) {}
@ -1190,8 +1206,20 @@ public:
virtual void _add(OnodeRef& o, int level) = 0;
virtual void _rm(OnodeRef& o) = 0;
virtual void _touch(OnodeRef& o) = 0;
virtual void add_stats(uint64_t *onodes) = 0;
virtual void _pin(Onode& o) = 0;
virtual void _unpin(Onode& o) = 0;
void pin(Onode& o) {
std::lock_guard l(lock);
_pin(o);
}
void unpin(Onode& o) {
std::lock_guard l(lock);
_unpin(o);
}
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
bool empty() {
return _get_num() == 0;
}

View File

@ -3204,6 +3204,11 @@ int OSD::enable_disable_fuse(bool stop)
return 0;
}
size_t OSD::get_num_cache_shards()
{
return cct->_conf.get_val<Option::size_t>("osd_num_cache_shards");
}
int OSD::get_num_op_shards()
{
if (cct->_conf->osd_op_num_shards)
@ -3297,7 +3302,7 @@ int OSD::init()
dout(2) << "journal " << journal_path << dendl;
ceph_assert(store); // call pre_init() first!
store->set_cache_shards(get_num_op_shards());
store->set_cache_shards(get_num_cache_shards());
int r = store->mount();
if (r < 0) {

View File

@ -2039,6 +2039,7 @@ private:
int init_op_flags(OpRequestRef& op);
size_t get_num_cache_shards();
int get_num_op_shards();
int get_num_op_threads();