diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index fdcf7dde5d8..9b9a83e8c80 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -417,6 +417,7 @@ OSDMonitor::OSDMonitor( { inc_cache = std::make_shared(this); full_cache = std::make_shared(this); + cct->_conf.add_observer(this); int r = _set_cache_sizes(); if (r < 0) { derr << __func__ << " using default osd cache size - mon_osd_cache_size (" @@ -426,6 +427,114 @@ OSDMonitor::OSDMonitor( } } +const char **OSDMonitor::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + "mon_memory_target", + "mon_memory_autotune", + "rocksdb_cache_size", + NULL + }; + return KEYS; +} + +void OSDMonitor::handle_conf_change(const ConfigProxy& conf, + const std::set &changed) +{ + dout(10) << __func__ << " " << changed << dendl; + + if (changed.count("mon_memory_autotune")) { + _set_cache_autotuning(); + } + if (changed.count("mon_memory_target") || + changed.count("rocksdb_cache_size")) { + int r = _update_mon_cache_settings(); + if (r < 0) { + derr << __func__ << " mon_memory_target:" + << g_conf()->mon_memory_target + << " rocksdb_cache_size:" + << g_conf()->rocksdb_cache_size + << ". Invalid size provided." + << dendl; + } + } +} + +void OSDMonitor::_set_cache_autotuning() +{ + mon_memory_autotune = g_conf()->mon_memory_autotune; + if (!mon_memory_autotune && pcm != nullptr) { + // Disable cache autotuning + std::lock_guard l(balancer_lock); + pcm = nullptr; + } + + if (mon_memory_autotune && pcm == nullptr) { + int r = register_cache_with_pcm(); + if (r < 0) { + dout(10) << __func__ + << " Error while registering osdmon caches with pcm." + << " Cache auto tuning not enabled." + << dendl; + } + } +} + +int OSDMonitor::_update_mon_cache_settings() +{ + if (g_conf()->mon_memory_target <= 0 || + g_conf()->mon_memory_target < mon_memory_min || + g_conf()->rocksdb_cache_size <= 0) { + return -EINVAL; + } + + uint64_t old_mon_memory_target = mon_memory_target; + uint64_t old_rocksdb_cache_size = rocksdb_cache_size; + + // Set the new pcm memory cache sizes + mon_memory_target = g_conf()->mon_memory_target; + rocksdb_cache_size = g_conf()->rocksdb_cache_size; + + uint64_t base = mon_memory_base; + double fragmentation = mon_memory_fragmentation; + uint64_t target = mon_memory_target; + uint64_t min = mon_memory_min; + uint64_t max = min; + + uint64_t ltarget = (1.0 - fragmentation) * target; + if (ltarget > base + min) { + max = ltarget - base; + } + + int r = _set_cache_ratios(); + if (r < 0) { + derr << __func__ << " Cache ratios for pcm could not be set." + << " Review the kv (rocksdb) and mon_memory_target sizes." + << dendl; + mon_memory_target = old_mon_memory_target; + rocksdb_cache_size = old_rocksdb_cache_size; + return -EINVAL; + } + + if (mon_memory_autotune && pcm != nullptr) { + std::lock_guard l(balancer_lock); + // set pcm cache levels + pcm->set_target_memory(target); + pcm->set_min_memory(min); + pcm->set_max_memory(max); + // tune memory based on new values + pcm->tune_memory(); + pcm->balance(); + _set_new_cache_sizes(); + dout(10) << __func__ << " Updated mon cache setting." + << " target: " << target + << " min: " << min + << " max: " << max + << dendl; + } + return 0; +} + int OSDMonitor::_set_cache_sizes() { if (g_conf()->mon_memory_autotune) { @@ -4953,22 +5062,25 @@ void OSDMonitor::tick() !pending_inc.new_pg_temp.empty()) // also propose if we adjusted pg_temp propose_pending(); - if (ceph_using_tcmalloc() && pcm != nullptr) { - pcm->tune_memory(); - pcm->balance(); - _set_new_cache_sizes(); - dout(10) << "tick balancer " - << " inc cache_bytes: " << inc_cache->get_cache_bytes() - << " inc comtd_bytes: " << inc_cache->get_committed_size() - << " inc used_bytes: " << inc_cache->_get_used_bytes() - << " inc num_osdmaps: " << inc_cache->_get_num_osdmaps() - << dendl; - dout(10) << "tick balancer " - << " full cache_bytes: " << full_cache->get_cache_bytes() - << " full comtd_bytes: " << full_cache->get_committed_size() - << " full used_bytes: " << full_cache->_get_used_bytes() - << " full num_osdmaps: " << full_cache->_get_num_osdmaps() - << dendl; + { + std::lock_guard l(balancer_lock); + if (ceph_using_tcmalloc() && mon_memory_autotune && pcm != nullptr) { + pcm->tune_memory(); + pcm->balance(); + _set_new_cache_sizes(); + dout(10) << "tick balancer " + << " inc cache_bytes: " << inc_cache->get_cache_bytes() + << " inc comtd_bytes: " << inc_cache->get_committed_size() + << " inc used_bytes: " << inc_cache->_get_used_bytes() + << " inc num_osdmaps: " << inc_cache->_get_num_osdmaps() + << dendl; + dout(10) << "tick balancer " + << " full cache_bytes: " << full_cache->get_cache_bytes() + << " full comtd_bytes: " << full_cache->get_committed_size() + << " full used_bytes: " << full_cache->_get_used_bytes() + << " full num_osdmaps: " << full_cache->_get_num_osdmaps() + << dendl; + } } } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index cd911465580..c87d5569cc2 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -209,12 +209,17 @@ struct osdmap_manifest_t { }; WRITE_CLASS_ENCODER(osdmap_manifest_t); -class OSDMonitor : public PaxosService { +class OSDMonitor : public PaxosService, + public md_config_obs_t { CephContext *cct; public: OSDMap osdmap; + // config observer + const char** get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set &changed) override; // [leader] OSDMap::Incremental pending_inc; map pending_metadata; @@ -225,6 +230,7 @@ public: map> pending_pseudo_purged_snaps; std::shared_ptr rocksdb_binned_kv_cache = nullptr; std::shared_ptr pcm = nullptr; + ceph::mutex balancer_lock = ceph::make_mutex("OSDMonitor::balancer_lock"); map osd_weight; @@ -324,6 +330,7 @@ private: int _set_cache_ratios(); void _set_new_cache_sizes(); void _set_cache_autotuning(); + int _update_mon_cache_settings(); friend struct OSDMemCache; friend struct IncCache;