Merge pull request #1342 from ceph/wip-cache-add

mon: add 'osd tier add-cache ...' command (DNM until after wip-tier-add)

Reviewed-by: Loic Dachary <loic@dachary.org>
This commit is contained in:
Sage Weil 2014-03-03 21:37:56 -08:00
commit bcea57d61f
5 changed files with 129 additions and 10 deletions

View File

@ -88,6 +88,13 @@ ceph osd tier remove data cache2
ceph osd pool delete cache cache --yes-i-really-really-mean-it
ceph osd pool delete cache2 cache2 --yes-i-really-really-mean-it
# convenient add-cache command
ceph osd pool create cache3 2
ceph osd tier add-cache data cache3 1024000
ceph osd dump | grep cache3 | grep bloom | grep 'false_positive_probability: 0.05' | grep 'target_bytes 1024000' | grep '1200s x4'
ceph osd tier remove data cache3
ceph osd pool delete cache3 cache3 --yes-i-really-really-mean-it
# Assumes there are at least 3 MDSes and two OSDs
#

View File

@ -428,8 +428,15 @@ OPTION(osd_pool_default_erasure_code_properties,
) // default properties of osd pool create
OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
OPTION(osd_pool_default_flag_hashpspool, OPT_BOOL, true) // use new pg hashing to prevent pool/pg overlap
OPTION(osd_pool_default_hit_set_bloom_fpp, OPT_FLOAT, .05)
OPTION(osd_hit_set_min_size, OPT_INT, 1000) // min target size for a HitSet
OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_cache_size, OPT_INT, 500)
OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message

View File

@ -571,6 +571,13 @@ COMMAND("osd tier remove-overlay " \
"name=pool,type=CephPoolname ", \
"remove the overlay pool for base pool <pool>", "osd", "rw", "cli,rest")
COMMAND("osd tier add-cache " \
"name=pool,type=CephPoolname " \
"name=tierpool,type=CephPoolname " \
"name=size,type=CephInt,range=0", \
"add a cache <tierpool> of size <size> to existing pool <pool>", \
"osd", "rw", "cli,rest")
/*
* mon/ConfigKeyService.cc
*/

View File

@ -3213,7 +3213,7 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
p.hit_set_params = HitSet::Params();
else if (val == "bloom") {
BloomHitSet::Params *bsp = new BloomHitSet::Params;
bsp->set_fpp(.05);
bsp->set_fpp(g_conf->osd_pool_default_hit_set_bloom_fpp);
p.hit_set_params = HitSet::Params(bsp);
} else if (val == "explicit_hash")
p.hit_set_params = HitSet::Params(new ExplicitHashHitSet::Params);
@ -4506,8 +4506,14 @@ done:
goto reply;
}
// go
pending_inc.get_new_pool(pool_id, p)->tiers.insert(tierpool_id);
pending_inc.get_new_pool(tierpool_id, tp)->tier_of = pool_id;
pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
pg_pool_t *ntp = pending_inc.get_new_pool(tierpool_id, tp);
if (np->tiers.count(tierpool_id) || ntp->is_tier()) {
wait_for_finished_proposal(new C_RetryMessage(this, m));
return true;
}
np->tiers.insert(tierpool_id);
ntp->tier_of = pool_id;
ss << "pool '" << tierpoolstr << "' is now (or already was) a tier of '" << poolstr << "'";
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
@ -4549,8 +4555,16 @@ done:
goto reply;
}
// go
pending_inc.get_new_pool(pool_id, p)->tiers.erase(tierpool_id);
pending_inc.get_new_pool(tierpool_id, tp)->clear_tier();
pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
pg_pool_t *ntp = pending_inc.get_new_pool(tierpool_id, tp);
if (np->tiers.count(tierpool_id) == 0 ||
ntp->tier_of != pool_id ||
np->read_tier == tierpool_id) {
wait_for_finished_proposal(new C_RetryMessage(this, m));
return true;
}
np->tiers.erase(tierpool_id);
ntp->clear_tier();
ss << "pool '" << tierpoolstr << "' is now (or already was) not a tier of '" << poolstr << "'";
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
@ -4652,6 +4666,90 @@ done:
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
return true;
} else if (prefix == "osd tier add-cache") {
string poolstr;
cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
int64_t pool_id = osdmap.lookup_pg_pool_name(poolstr);
if (pool_id < 0) {
ss << "unrecognized pool '" << poolstr << "'";
err = -ENOENT;
goto reply;
}
string tierpoolstr;
cmd_getval(g_ceph_context, cmdmap, "tierpool", tierpoolstr);
int64_t tierpool_id = osdmap.lookup_pg_pool_name(tierpoolstr);
if (tierpool_id < 0) {
ss << "unrecognized pool '" << tierpoolstr << "'";
err = -ENOENT;
goto reply;
}
const pg_pool_t *p = osdmap.get_pg_pool(pool_id);
assert(p);
const pg_pool_t *tp = osdmap.get_pg_pool(tierpool_id);
assert(tp);
if (p->tiers.count(tierpool_id)) {
assert(tp->tier_of == pool_id);
err = 0;
ss << "pool '" << tierpoolstr << "' is now (or already was) a tier of '" << poolstr << "'";
goto reply;
}
if (tp->is_tier()) {
ss << "tier pool '" << tierpoolstr << "' is already a tier of '"
<< osdmap.get_pool_name(tp->tier_of) << "'";
err = -EINVAL;
goto reply;
}
int64_t size = 0;
cmd_getval(g_ceph_context, cmdmap, "size", size);
// make sure new tier is empty
const pool_stat_t& tier_stats =
mon->pgmon()->pg_map.get_pg_pool_sum_stat(tierpool_id);
if (tier_stats.stats.sum.num_objects != 0) {
ss << "tier pool '" << tierpoolstr << "' is not empty";
err = -ENOTEMPTY;
goto reply;
}
string modestr = g_conf->osd_tier_default_cache_mode;
pg_pool_t::cache_mode_t mode = pg_pool_t::get_cache_mode_from_str(modestr);
if (mode < 0) {
ss << "osd tier cache default mode '" << modestr << "' is not a valid cache mode";
err = -EINVAL;
goto reply;
}
HitSet::Params hsp;
if (g_conf->osd_tier_default_cache_hit_set_type == "bloom") {
BloomHitSet::Params *bsp = new BloomHitSet::Params;
bsp->set_fpp(g_conf->osd_pool_default_hit_set_bloom_fpp);
hsp = HitSet::Params(bsp);
} else if (g_conf->osd_tier_default_cache_hit_set_type == "explicit_hash") {
hsp = HitSet::Params(new ExplicitHashHitSet::Params);
}
else if (g_conf->osd_tier_default_cache_hit_set_type == "explicit_object") {
hsp = HitSet::Params(new ExplicitObjectHitSet::Params);
} else {
ss << "osd tier cache default hit set type '" <<
g_conf->osd_tier_default_cache_hit_set_type << "' is not a known type";
err = -EINVAL;
goto reply;
}
// go
pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
pg_pool_t *ntp = pending_inc.get_new_pool(tierpool_id, tp);
if (np->tiers.count(tierpool_id) || ntp->is_tier()) {
wait_for_finished_proposal(new C_RetryMessage(this, m));
return true;
}
np->tiers.insert(tierpool_id);
ntp->tier_of = pool_id;
ntp->cache_mode = mode;
ntp->hit_set_count = g_conf->osd_tier_default_cache_hit_set_count;
ntp->hit_set_period = g_conf->osd_tier_default_cache_hit_set_period;
ntp->hit_set_params = hsp;
ntp->target_max_bytes = size;
ss << "pool '" << tierpoolstr << "' is now (or already was) a cache tier of '" << poolstr << "'";
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
return true;
} else if (prefix == "osd pool set-quota") {
string poolstr;
cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
@ -4720,11 +4818,11 @@ done:
ss << "will thrash map for " << thrash_map << " epochs";
ret = thrash();
err = 0;
} else {
err = -EINVAL;
}
} else {
err = -EINVAL;
}
reply:
reply:
getline(ss, rs);
if (err < 0 && rs.length() == 0)
rs = cpp_strerror(err);

View File

@ -10784,7 +10784,7 @@ void ReplicatedPG::agent_choose_mode()
// get dirty, full ratios
uint64_t dirty_micro = 0;
uint64_t full_micro = 0;
if (pool.info.target_max_bytes) {
if (pool.info.target_max_bytes && info.stats.stats.sum.num_objects) {
uint64_t avg_size = info.stats.stats.sum.num_bytes /
info.stats.stats.sum.num_objects;
dirty_micro =