mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
Merge PR #25544 into master
* refs/pull/25544/head: common/PriorityCache: Automatic chunk sizing Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
bdb86f4f9c
@ -15,12 +15,38 @@
|
||||
#include "PriorityCache.h"
|
||||
|
||||
namespace PriorityCache {
|
||||
int64_t get_chunk(uint64_t usage, uint64_t chunk_bytes) {
|
||||
// Add a chunk of headroom and round up to the near chunk
|
||||
uint64_t val = usage + chunk_bytes;
|
||||
uint64_t r = (val) % chunk_bytes;
|
||||
int64_t get_chunk(uint64_t usage, uint64_t total_bytes) {
|
||||
uint64_t chunk = total_bytes;
|
||||
|
||||
// Find the nearest power of 2
|
||||
chunk -= 1;
|
||||
chunk |= chunk >> 1;
|
||||
chunk |= chunk >> 2;
|
||||
chunk |= chunk >> 4;
|
||||
chunk |= chunk >> 8;
|
||||
chunk |= chunk >> 16;
|
||||
chunk |= chunk >> 32;
|
||||
chunk += 1;
|
||||
// shrink it to 1/256 of the rounded up cache size
|
||||
chunk /= 256;
|
||||
|
||||
// bound the chunk size to be between 4MB and 32MB
|
||||
chunk = (chunk > 4ul*1024*1024) ? chunk : 4ul*1024*1024;
|
||||
chunk = (chunk < 16ul*1024*1024) ? chunk : 16ul*1024*1024;
|
||||
|
||||
/* Add 16 chunks of headroom and round up to the near chunk. Note that
|
||||
* if RocksDB is used, it's a good idea to have N MB of headroom where
|
||||
* N is the target_file_size_base value. RocksDB will read SST files
|
||||
* into the block cache during compaction which potentially can force out
|
||||
* all existing cached data. Once compaction is finished, the SST data is
|
||||
* released leaving an empty cache. Having enough headroom to absorb
|
||||
* compaction reads allows the kv cache grow even during extremely heavy
|
||||
* compaction workloads.
|
||||
*/
|
||||
uint64_t val = usage + (16 * chunk);
|
||||
uint64_t r = (val) % chunk;
|
||||
if (r > 0)
|
||||
val = val + chunk_bytes - r;
|
||||
val = val + chunk - r;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
@ -27,18 +27,15 @@ namespace PriorityCache {
|
||||
LAST = PRI3,
|
||||
};
|
||||
|
||||
int64_t get_chunk(uint64_t usage, uint64_t chunk_bytes);
|
||||
int64_t get_chunk(uint64_t usage, uint64_t total_bytes);
|
||||
|
||||
struct PriCache {
|
||||
virtual ~PriCache();
|
||||
|
||||
/* Ask the cache to request memory for the given priority rounded up to
|
||||
* the nearst chunk_bytes. This for example, may return the size of all
|
||||
* items associated with this priority plus some additional space for
|
||||
* future growth. Note that the cache may ultimately be allocated less
|
||||
* memory than it requests here.
|
||||
/* Ask the cache to request memory for the given priority. Note that the
|
||||
* cache may ultimately be allocated less memory than it requests here.
|
||||
*/
|
||||
virtual int64_t request_cache_bytes(PriorityCache::Priority pri, uint64_t chunk_bytes) const = 0;
|
||||
virtual int64_t request_cache_bytes(PriorityCache::Priority pri, uint64_t total_cache) const = 0;
|
||||
|
||||
// Get the number of bytes currently allocated to the given priority.
|
||||
virtual int64_t get_cache_bytes(PriorityCache::Priority pri) const = 0;
|
||||
@ -52,8 +49,15 @@ namespace PriorityCache {
|
||||
// Allocate additional bytes for a given priority.
|
||||
virtual void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) = 0;
|
||||
|
||||
// Commit the current number of bytes allocated to the cache.
|
||||
virtual int64_t commit_cache_size() = 0;
|
||||
/* Commit the current number of bytes allocated to the cache. Space is
|
||||
* allocated in chunks based on the allocation size and current total size
|
||||
* of memory available for caches. */
|
||||
virtual int64_t commit_cache_size(uint64_t total_cache) = 0;
|
||||
|
||||
/* Get the current number of bytes allocated to the cache. this may be
|
||||
* larger than the value returned by get_cache_bytes as it includes extra
|
||||
* space for future growth. */
|
||||
virtual int64_t get_committed_size() const = 0;
|
||||
|
||||
// Get the ratio of available memory this cache should target.
|
||||
virtual double get_cache_ratio() const = 0;
|
||||
|
@ -4364,11 +4364,6 @@ std::vector<Option> get_global_options() {
|
||||
.add_see_also("bluestore_cache_meta_ratio")
|
||||
.set_description("Automatically tune the ratio of caches while respecting min values."),
|
||||
|
||||
Option("bluestore_cache_autotune_chunk_size", Option::TYPE_SIZE, Option::LEVEL_DEV)
|
||||
.set_default(33554432)
|
||||
.add_see_also("bluestore_cache_autotune")
|
||||
.set_description("The chunk size in bytes to allocate to caches when cache autotune is enabled."),
|
||||
|
||||
Option("bluestore_cache_autotune_interval", Option::TYPE_FLOAT, Option::LEVEL_DEV)
|
||||
.set_default(5)
|
||||
.add_see_also("bluestore_cache_autotune")
|
||||
|
@ -375,7 +375,11 @@ public:
|
||||
cache_bytes[pri] += bytes;
|
||||
}
|
||||
|
||||
virtual int64_t commit_cache_size() {
|
||||
virtual int64_t commit_cache_size(uint64_t total_cache) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
virtual int64_t get_committed_size() const {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
@ -1301,7 +1301,6 @@ int64_t RocksDBStore::request_cache_bytes(PriorityCache::Priority pri, uint64_t
|
||||
default:
|
||||
break;
|
||||
}
|
||||
request = PriorityCache::get_chunk(usage, chunk_bytes);
|
||||
request = (request > assigned) ? request - assigned : 0;
|
||||
dout(10) << __func__ << " Priority: " << static_cast<uint32_t>(pri)
|
||||
<< " Usage: " << usage << " Request: " << request << dendl;
|
||||
@ -1313,28 +1312,26 @@ int64_t RocksDBStore::get_cache_usage() const
|
||||
return static_cast<int64_t>(bbt_opts.block_cache->GetUsage());
|
||||
}
|
||||
|
||||
int64_t RocksDBStore::commit_cache_size()
|
||||
int64_t RocksDBStore::commit_cache_size(uint64_t total_bytes)
|
||||
{
|
||||
size_t old_bytes = bbt_opts.block_cache->GetCapacity();
|
||||
int64_t total_bytes = get_cache_bytes();
|
||||
int64_t new_bytes = PriorityCache::get_chunk(
|
||||
get_cache_bytes(), total_bytes);
|
||||
dout(10) << __func__ << " old: " << old_bytes
|
||||
<< " new: " << total_bytes << dendl;
|
||||
bbt_opts.block_cache->SetCapacity((size_t) total_bytes);
|
||||
<< " new: " << new_bytes << dendl;
|
||||
bbt_opts.block_cache->SetCapacity((size_t) new_bytes);
|
||||
|
||||
// Set the high priority pool ratio is this is the binned LRU cache.
|
||||
if (g_conf()->rocksdb_cache_type == "binned_lru") {
|
||||
auto binned_cache =
|
||||
std::static_pointer_cast<rocksdb_cache::BinnedLRUCache>(bbt_opts.block_cache);
|
||||
int64_t high_pri_bytes = get_cache_bytes(PriorityCache::Priority::PRI0);
|
||||
double ratio = (double) high_pri_bytes / total_bytes;
|
||||
int64_t high_pri_bytes = PriorityCache::get_chunk(
|
||||
binned_cache->GetHighPriPoolUsage()+1, total_bytes);
|
||||
double ratio = (double) high_pri_bytes / new_bytes;
|
||||
dout(10) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
|
||||
binned_cache->SetHighPriPoolRatio(ratio);
|
||||
}
|
||||
return total_bytes;
|
||||
}
|
||||
|
||||
int64_t RocksDBStore::get_cache_capacity() {
|
||||
return bbt_opts.block_cache->GetCapacity();
|
||||
return new_bytes;
|
||||
}
|
||||
|
||||
RocksDBStore::RocksDBWholeSpaceIteratorImpl::~RocksDBWholeSpaceIteratorImpl()
|
||||
|
@ -480,7 +480,10 @@ err:
|
||||
|
||||
virtual int64_t request_cache_bytes(
|
||||
PriorityCache::Priority pri, uint64_t cache_bytes) const override;
|
||||
virtual int64_t commit_cache_size() override;
|
||||
virtual int64_t commit_cache_size(uint64_t total_cache) override;
|
||||
virtual int64_t get_committed_size() const override {
|
||||
return bbt_opts.block_cache->GetCapacity();
|
||||
}
|
||||
virtual std::string get_cache_name() const override {
|
||||
return "RocksDB Block Cache";
|
||||
}
|
||||
|
@ -3538,9 +3538,9 @@ void BlueStore::MempoolThread::_trim_shards(bool interval_stats)
|
||||
if (store->cache_autotune) {
|
||||
cache_size = autotune_cache_size;
|
||||
|
||||
kv_alloc = store->db->get_cache_bytes();
|
||||
meta_alloc = meta_cache.get_cache_bytes();
|
||||
data_alloc = data_cache.get_cache_bytes();
|
||||
kv_alloc = store->db->get_committed_size();
|
||||
meta_alloc = meta_cache.get_committed_size();
|
||||
data_alloc = data_cache.get_committed_size();
|
||||
}
|
||||
|
||||
if (interval_stats) {
|
||||
@ -3632,6 +3632,14 @@ void BlueStore::MempoolThread::_balance_cache(
|
||||
const std::list<PriorityCache::PriCache *>& caches)
|
||||
{
|
||||
int64_t mem_avail = autotune_cache_size;
|
||||
/* Each cache is going to get at least 1 chunk's worth of memory from get_chunk
|
||||
* so shrink the available memory here to compensate. Don't shrink the amount of
|
||||
* memory below 0 however.
|
||||
*/
|
||||
mem_avail -= PriorityCache::get_chunk(1, autotune_cache_size) * caches.size();
|
||||
if (mem_avail < 0) {
|
||||
mem_avail = 0;
|
||||
}
|
||||
|
||||
// Assign memory for each priority level
|
||||
for (int i = 0; i < PriorityCache::Priority::LAST + 1; i++) {
|
||||
@ -3654,7 +3662,7 @@ void BlueStore::MempoolThread::_balance_cache(
|
||||
|
||||
// Finally commit the new cache sizes
|
||||
for (auto it = caches.begin(); it != caches.end(); it++) {
|
||||
(*it)->commit_cache_size();
|
||||
(*it)->commit_cache_size(autotune_cache_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3678,7 +3686,7 @@ void BlueStore::MempoolThread::_balance_cache_pri(int64_t *mem_avail,
|
||||
uint64_t total_assigned = 0;
|
||||
|
||||
for (auto it = tmp_caches.begin(); it != tmp_caches.end(); ) {
|
||||
int64_t cache_wants = (*it)->request_cache_bytes(pri, store->cache_autotune_chunk_size);
|
||||
int64_t cache_wants = (*it)->request_cache_bytes(pri, autotune_cache_size);
|
||||
|
||||
// Usually the ratio should be set to the fraction of the current caches'
|
||||
// assigned ratio compared to the total ratio of all caches that still
|
||||
@ -4103,8 +4111,6 @@ int BlueStore::_set_cache_sizes()
|
||||
{
|
||||
ceph_assert(bdev);
|
||||
cache_autotune = cct->_conf.get_val<bool>("bluestore_cache_autotune");
|
||||
cache_autotune_chunk_size =
|
||||
cct->_conf.get_val<Option::size_t>("bluestore_cache_autotune_chunk_size");
|
||||
cache_autotune_interval =
|
||||
cct->_conf.get_val<double>("bluestore_cache_autotune_interval");
|
||||
osd_memory_target = cct->_conf.get_val<uint64_t>("osd_memory_target");
|
||||
|
@ -1990,7 +1990,6 @@ private:
|
||||
double cache_kv_ratio = 0; ///< cache ratio dedicated to kv (e.g., rocksdb)
|
||||
double cache_data_ratio = 0; ///< cache ratio dedicated to object data
|
||||
bool cache_autotune = false; ///< cache autotune setting
|
||||
uint64_t cache_autotune_chunk_size = 0; ///< cache autotune chunk size
|
||||
double cache_autotune_interval = 0; ///< time to wait between cache rebalancing
|
||||
uint64_t osd_memory_target = 0; ///< OSD memory target when autotuning cache
|
||||
uint64_t osd_memory_base = 0; ///< OSD base memory when autotuning cache
|
||||
@ -2018,6 +2017,7 @@ private:
|
||||
struct MempoolCache : public PriorityCache::PriCache {
|
||||
BlueStore *store;
|
||||
int64_t cache_bytes[PriorityCache::Priority::LAST+1];
|
||||
int64_t committed_bytes = 0;
|
||||
double cache_ratio = 0;
|
||||
|
||||
MempoolCache(BlueStore *s) : store(s) {};
|
||||
@ -2025,15 +2025,14 @@ private:
|
||||
virtual uint64_t _get_used_bytes() const = 0;
|
||||
|
||||
virtual int64_t request_cache_bytes(
|
||||
PriorityCache::Priority pri, uint64_t chunk_bytes) const {
|
||||
PriorityCache::Priority pri, uint64_t total_cache) const {
|
||||
int64_t assigned = get_cache_bytes(pri);
|
||||
|
||||
switch (pri) {
|
||||
// All cache items are currently shoved into the LAST priority
|
||||
case PriorityCache::Priority::LAST:
|
||||
{
|
||||
uint64_t usage = _get_used_bytes();
|
||||
int64_t request = PriorityCache::get_chunk(usage, chunk_bytes);
|
||||
int64_t request = _get_used_bytes();
|
||||
return(request > assigned) ? request - assigned : 0;
|
||||
}
|
||||
default:
|
||||
@ -2060,8 +2059,13 @@ private:
|
||||
virtual void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) {
|
||||
cache_bytes[pri] += bytes;
|
||||
}
|
||||
virtual int64_t commit_cache_size() {
|
||||
return get_cache_bytes();
|
||||
virtual int64_t commit_cache_size(uint64_t total_cache) {
|
||||
committed_bytes = PriorityCache::get_chunk(
|
||||
get_cache_bytes(), total_cache);
|
||||
return committed_bytes;
|
||||
}
|
||||
virtual int64_t get_committed_size() const {
|
||||
return committed_bytes;
|
||||
}
|
||||
virtual double get_cache_ratio() const {
|
||||
return cache_ratio;
|
||||
|
Loading…
Reference in New Issue
Block a user