mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
Merge PR #31778 into master
* refs/pull/31778/head: os/bluestore: pin onodes as they are added to the cache Revert "Revert "Merge pull request #30964 from markhpc/wip-bs-cache-trim-pinned"" Reviewed-by: Mark Nelson <mnelson@redhat.com> Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
d6f5918850
@ -2774,6 +2774,11 @@ std::vector<Option> get_global_options() {
|
||||
.set_default(true)
|
||||
.set_description(""),
|
||||
|
||||
Option("osd_num_cache_shards", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
|
||||
.set_default(32)
|
||||
.set_flag(Option::FLAG_STARTUP)
|
||||
.set_description("The number of cache shards to use in the object store."),
|
||||
|
||||
Option("osd_op_num_threads_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
|
||||
.set_default(0)
|
||||
.set_flag(Option::FLAG_STARTUP)
|
||||
|
@ -841,58 +841,88 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
|
||||
BlueStore::Onode,
|
||||
boost::intrusive::list_member_hook<>,
|
||||
&BlueStore::Onode::lru_item> > list_t;
|
||||
typedef boost::intrusive::list<
|
||||
BlueStore::Onode,
|
||||
boost::intrusive::member_hook<
|
||||
BlueStore::Onode,
|
||||
boost::intrusive::list_member_hook<>,
|
||||
&BlueStore::Onode::pin_item> > pin_list_t;
|
||||
|
||||
list_t lru;
|
||||
pin_list_t pin_list;
|
||||
|
||||
explicit LruOnodeCacheShard(CephContext *cct) : BlueStore::OnodeCacheShard(cct) {}
|
||||
|
||||
void _add(BlueStore::OnodeRef& o, int level) override
|
||||
{
|
||||
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
|
||||
ceph_assert(o->s == nullptr);
|
||||
o->s = this;
|
||||
if (o->nref > 1) {
|
||||
pin_list.push_front(*o);
|
||||
o->pinned = true;
|
||||
num_pinned = pin_list.size();
|
||||
} else {
|
||||
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
|
||||
}
|
||||
num = lru.size();
|
||||
}
|
||||
void _rm(BlueStore::OnodeRef& o) override
|
||||
{
|
||||
lru.erase(lru.iterator_to(*o));
|
||||
o->s = nullptr;
|
||||
if (o->pinned) {
|
||||
o->pinned = false;
|
||||
pin_list.erase(pin_list.iterator_to(*o));
|
||||
} else {
|
||||
lru.erase(lru.iterator_to(*o));
|
||||
}
|
||||
num = lru.size();
|
||||
num_pinned = pin_list.size();
|
||||
}
|
||||
void _touch(BlueStore::OnodeRef& o) override
|
||||
{
|
||||
if (o->pinned) {
|
||||
return;
|
||||
}
|
||||
lru.erase(lru.iterator_to(*o));
|
||||
lru.push_front(*o);
|
||||
num = lru.size();
|
||||
}
|
||||
void _trim_to(uint64_t max) override
|
||||
void _pin(BlueStore::Onode& o) override
|
||||
{
|
||||
if (max >= lru.size()) {
|
||||
if (o.pinned == true) {
|
||||
return;
|
||||
}
|
||||
lru.erase(lru.iterator_to(o));
|
||||
pin_list.push_front(o);
|
||||
o.pinned = true;
|
||||
num = lru.size();
|
||||
num_pinned = pin_list.size();
|
||||
dout(30) << __func__ << " " << o.oid << " pinned" << dendl;
|
||||
|
||||
}
|
||||
void _unpin(BlueStore::Onode& o) override
|
||||
{
|
||||
if (o.pinned == false) {
|
||||
return;
|
||||
}
|
||||
pin_list.erase(pin_list.iterator_to(o));
|
||||
lru.push_front(o);
|
||||
o.pinned = false;
|
||||
num = lru.size();
|
||||
num_pinned = pin_list.size();
|
||||
dout(30) << __func__ << " " << o.oid << " unpinned" << dendl;
|
||||
}
|
||||
void _trim_to(uint64_t new_size) override
|
||||
{
|
||||
if (new_size >= lru.size()) {
|
||||
return; // don't even try
|
||||
}
|
||||
uint64_t n = lru.size() - max;
|
||||
|
||||
uint64_t n = lru.size() - new_size;
|
||||
auto p = lru.end();
|
||||
ceph_assert(p != lru.begin());
|
||||
--p;
|
||||
int skipped = 0;
|
||||
int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned;
|
||||
while (n > 0) {
|
||||
BlueStore::Onode *o = &*p;
|
||||
int refs = o->nref.load();
|
||||
if (refs > 1) {
|
||||
dout(20) << __func__ << " " << o->oid << " has " << refs
|
||||
<< " refs, skipping" << dendl;
|
||||
if (++skipped >= max_skipped) {
|
||||
dout(20) << __func__ << " maximum skip pinned reached; stopping with "
|
||||
<< n << " left to trim" << dendl;
|
||||
break;
|
||||
}
|
||||
|
||||
if (p == lru.begin()) {
|
||||
break;
|
||||
} else {
|
||||
p--;
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
dout(30) << __func__ << " rm " << o->oid << dendl;
|
||||
if (p != lru.begin()) {
|
||||
lru.erase(p--);
|
||||
@ -900,6 +930,7 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
|
||||
lru.erase(p);
|
||||
ceph_assert(n == 1);
|
||||
}
|
||||
o->s = nullptr;
|
||||
o->get(); // paranoia
|
||||
o->c->onode_map.remove(o->oid);
|
||||
o->put();
|
||||
@ -907,9 +938,10 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
|
||||
}
|
||||
num = lru.size();
|
||||
}
|
||||
void add_stats(uint64_t *onodes) override
|
||||
void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override
|
||||
{
|
||||
*onodes += num;
|
||||
*onodes += num + num_pinned;
|
||||
*pinned_onodes += num_pinned;
|
||||
}
|
||||
};
|
||||
|
||||
@ -4553,6 +4585,8 @@ void BlueStore::_init_logger()
|
||||
|
||||
b.add_u64(l_bluestore_onodes, "bluestore_onodes",
|
||||
"Number of onodes in cache");
|
||||
b.add_u64(l_bluestore_pinned_onodes, "bluestore_pinned_onodes",
|
||||
"Number of pinned onodes in cache");
|
||||
b.add_u64_counter(l_bluestore_onode_hits, "bluestore_onode_hits",
|
||||
"Sum for onode-lookups hit in the cache");
|
||||
b.add_u64_counter(l_bluestore_onode_misses, "bluestore_onode_misses",
|
||||
@ -9173,18 +9207,20 @@ void BlueStore::_reap_collections()
|
||||
void BlueStore::_update_cache_logger()
|
||||
{
|
||||
uint64_t num_onodes = 0;
|
||||
uint64_t num_pinned_onodes = 0;
|
||||
uint64_t num_extents = 0;
|
||||
uint64_t num_blobs = 0;
|
||||
uint64_t num_buffers = 0;
|
||||
uint64_t num_buffer_bytes = 0;
|
||||
for (auto c : onode_cache_shards) {
|
||||
c->add_stats(&num_onodes);
|
||||
c->add_stats(&num_onodes, &num_pinned_onodes);
|
||||
}
|
||||
for (auto c : buffer_cache_shards) {
|
||||
c->add_stats(&num_extents, &num_blobs,
|
||||
&num_buffers, &num_buffer_bytes);
|
||||
}
|
||||
logger->set(l_bluestore_onodes, num_onodes);
|
||||
logger->set(l_bluestore_pinned_onodes, num_pinned_onodes);
|
||||
logger->set(l_bluestore_extents, num_extents);
|
||||
logger->set(l_bluestore_blobs, num_blobs);
|
||||
logger->set(l_bluestore_buffers, num_buffers);
|
||||
|
@ -103,6 +103,7 @@ enum {
|
||||
l_bluestore_compressed_allocated,
|
||||
l_bluestore_compressed_original,
|
||||
l_bluestore_onodes,
|
||||
l_bluestore_pinned_onodes,
|
||||
l_bluestore_onode_hits,
|
||||
l_bluestore_onode_misses,
|
||||
l_bluestore_onode_shard_hits,
|
||||
@ -1049,20 +1050,22 @@ public:
|
||||
};
|
||||
|
||||
struct OnodeSpace;
|
||||
|
||||
struct OnodeCacheShard;
|
||||
/// an in-memory object
|
||||
struct Onode {
|
||||
MEMPOOL_CLASS_HELPERS();
|
||||
// Not persisted and updated on cache insertion/removal
|
||||
OnodeCacheShard *s;
|
||||
bool pinned = false; // Only to be used by the onode cache shard
|
||||
|
||||
std::atomic_int nref; ///< reference count
|
||||
Collection *c;
|
||||
|
||||
ghobject_t oid;
|
||||
|
||||
/// key under PREFIX_OBJ where we are stored
|
||||
mempool::bluestore_cache_other::string key;
|
||||
|
||||
boost::intrusive::list_member_hook<> lru_item;
|
||||
boost::intrusive::list_member_hook<> lru_item, pin_item;
|
||||
|
||||
bluestore_onode_t onode; ///< metadata stored as value in kv store
|
||||
bool exists; ///< true if object logically exists
|
||||
@ -1079,7 +1082,8 @@ public:
|
||||
|
||||
Onode(Collection *c, const ghobject_t& o,
|
||||
const mempool::bluestore_cache_other::string& k)
|
||||
: nref(0),
|
||||
: s(nullptr),
|
||||
nref(0),
|
||||
c(c),
|
||||
oid(o),
|
||||
key(k),
|
||||
@ -1088,7 +1092,8 @@ public:
|
||||
}
|
||||
Onode(Collection* c, const ghobject_t& o,
|
||||
const string& k)
|
||||
: nref(0),
|
||||
: s(nullptr),
|
||||
nref(0),
|
||||
c(c),
|
||||
oid(o),
|
||||
key(k),
|
||||
@ -1097,7 +1102,8 @@ public:
|
||||
}
|
||||
Onode(Collection* c, const ghobject_t& o,
|
||||
const char* k)
|
||||
: nref(0),
|
||||
: s(nullptr),
|
||||
nref(0),
|
||||
c(c),
|
||||
oid(o),
|
||||
key(k),
|
||||
@ -1115,11 +1121,18 @@ public:
|
||||
|
||||
void flush();
|
||||
void get() {
|
||||
++nref;
|
||||
if (++nref == 2 && s != nullptr) {
|
||||
s->pin(*this);
|
||||
}
|
||||
}
|
||||
void put() {
|
||||
if (--nref == 0)
|
||||
int n = --nref;
|
||||
if (n == 1 && s != nullptr) {
|
||||
s->unpin(*this);
|
||||
}
|
||||
if (n == 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
const string& get_omap_prefix();
|
||||
@ -1154,7 +1167,7 @@ public:
|
||||
return num;
|
||||
}
|
||||
|
||||
virtual void _trim_to(uint64_t max) = 0;
|
||||
virtual void _trim_to(uint64_t new_size) = 0;
|
||||
void _trim() {
|
||||
if (cct->_conf->objectstore_blackhole) {
|
||||
// do not trim if we are throwing away IOs a layer down
|
||||
@ -1162,6 +1175,7 @@ public:
|
||||
}
|
||||
_trim_to(max);
|
||||
}
|
||||
|
||||
void trim() {
|
||||
std::lock_guard l(lock);
|
||||
_trim();
|
||||
@ -1182,6 +1196,8 @@ public:
|
||||
|
||||
/// A Generic onode Cache Shard
|
||||
struct OnodeCacheShard : public CacheShard {
|
||||
std::atomic<uint64_t> num_pinned = {0};
|
||||
|
||||
std::array<std::pair<ghobject_t, mono_clock::time_point>, 64> dumped_onodes;
|
||||
public:
|
||||
OnodeCacheShard(CephContext* cct) : CacheShard(cct) {}
|
||||
@ -1190,8 +1206,20 @@ public:
|
||||
virtual void _add(OnodeRef& o, int level) = 0;
|
||||
virtual void _rm(OnodeRef& o) = 0;
|
||||
virtual void _touch(OnodeRef& o) = 0;
|
||||
virtual void add_stats(uint64_t *onodes) = 0;
|
||||
virtual void _pin(Onode& o) = 0;
|
||||
virtual void _unpin(Onode& o) = 0;
|
||||
|
||||
void pin(Onode& o) {
|
||||
std::lock_guard l(lock);
|
||||
_pin(o);
|
||||
}
|
||||
|
||||
void unpin(Onode& o) {
|
||||
std::lock_guard l(lock);
|
||||
_unpin(o);
|
||||
}
|
||||
|
||||
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
|
||||
bool empty() {
|
||||
return _get_num() == 0;
|
||||
}
|
||||
|
@ -3204,6 +3204,11 @@ int OSD::enable_disable_fuse(bool stop)
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t OSD::get_num_cache_shards()
|
||||
{
|
||||
return cct->_conf.get_val<Option::size_t>("osd_num_cache_shards");
|
||||
}
|
||||
|
||||
int OSD::get_num_op_shards()
|
||||
{
|
||||
if (cct->_conf->osd_op_num_shards)
|
||||
@ -3297,7 +3302,7 @@ int OSD::init()
|
||||
dout(2) << "journal " << journal_path << dendl;
|
||||
ceph_assert(store); // call pre_init() first!
|
||||
|
||||
store->set_cache_shards(get_num_op_shards());
|
||||
store->set_cache_shards(get_num_cache_shards());
|
||||
|
||||
int r = store->mount();
|
||||
if (r < 0) {
|
||||
|
@ -2039,6 +2039,7 @@ private:
|
||||
|
||||
int init_op_flags(OpRequestRef& op);
|
||||
|
||||
size_t get_num_cache_shards();
|
||||
int get_num_op_shards();
|
||||
int get_num_op_threads();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user