mon/OSDMonitor: allow pg_num to adjusted up or down via pg[p]_num_target

The CLI now sets the *_target values, imposing only the subset of constraints that
the user needs to be concerned with.

new "pg_num_actual" and "pgp_num_actual" properties/commands are added that allow
the underlying raw values to be adjusted.  For the merge case, this sets
pg_num_pending instead of pg_num so that the OSDs can go through the
merge prep process.

A controller (in a future commit) will make pg[p]_num converge to pg[p]_num_target.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2018-02-15 21:25:32 -06:00
parent 17b270a04f
commit 7f3d156ebf

View File

@ -6587,9 +6587,11 @@ int OSDMonitor::prepare_new_pool(string& name,
pi->crush_rule = crush_rule;
pi->expected_num_objects = expected_num_objects;
pi->object_hash = CEPH_STR_HASH_RJENKINS;
pi->set_pg_num(pg_num);
pi->set_pg_num_pending(pg_num, pending_inc.epoch);
pi->set_pgp_num(pgp_num);
pi->set_pg_num(1);
pi->set_pg_num_pending(pi->get_pg_num(), pending_inc.epoch);
pi->set_pg_num_target(pg_num);
pi->set_pgp_num(pi->get_pg_num());
pi->set_pgp_num_target(pgp_num);
pi->last_change = pending_inc.epoch;
pi->auid = 0;
if (pool_type == pg_pool_t::TYPE_ERASURE) {
@ -6739,6 +6741,49 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
}
}
p.min_size = n;
} else if (var == "pg_num_actual") {
if (interr.length()) {
ss << "error parsing integer value '" << val << "': " << interr;
return -EINVAL;
}
if (n == (int)p.get_pg_num()) {
return 0;
}
if (static_cast<uint64_t>(n) > g_conf().get_val<uint64_t>("mon_max_pool_pg_num")) {
ss << "'pg_num' must be greater than 0 and less than or equal to "
<< g_conf().get_val<uint64_t>("mon_max_pool_pg_num")
<< " (you may adjust 'mon max pool pg num' for higher values)";
return -ERANGE;
}
if (n > (int)p.get_pg_num()) {
p.set_pg_num(n);
} else {
if (osdmap.require_osd_release < CEPH_RELEASE_NAUTILUS) {
ss << "nautilus OSDs are required to adjust pg_num_pending";
return -EPERM;
}
if (osdmap.require_min_compat_client > 0 &&
osdmap.require_min_compat_client < CEPH_RELEASE_MIMIC) {
ss << "require_min_compat_client "
<< ceph_release_name(osdmap.require_min_compat_client)
<< " < mimic, which is required for pg merging. "
<< "Try 'ceph osd set-require-min-compat-client mimic'.";
return -EPERM;
}
if (n < (int)p.get_pgp_num()) {
ss << "specified pg_num " << n << " < pgp_num " << p.get_pgp_num();
return -EINVAL;
}
if (n < (int)p.get_pg_num() - 1) {
ss << "specified pg_num " << n << " < pg_num (" << p.get_pg_num()
<< ") - 1; only single pg decrease is currently supported";
return -EINVAL;
}
p.set_pg_num_pending(n, pending_inc.epoch);
}
// force pre-luminous clients to resend their ops, since they
// don't understand that split PGs now form a new interval.
p.last_force_op_resend_preluminous = pending_inc.epoch;
} else if (var == "pg_num") {
if (p.has_flag(pg_pool_t::FLAG_NOPGCHANGE)) {
ss << "pool pg_num change is disabled; you must unset nopgchange flag for the pool first";
@ -6748,10 +6793,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
ss << "error parsing integer value '" << val << "': " << interr;
return -EINVAL;
}
if (n <= (int)p.get_pg_num()) {
ss << "specified pg_num " << n << " <= current " << p.get_pg_num();
if (n < (int)p.get_pg_num())
return -EEXIST;
if (n == (int)p.get_pg_num_target()) {
return 0;
}
if (static_cast<uint64_t>(n) > g_conf().get_val<uint64_t>("mon_max_pool_pg_num")) {
@ -6760,31 +6802,49 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
<< " (you may adjust 'mon max pool pg num' for higher values)";
return -ERANGE;
}
int r = check_pg_num(pool, n, p.get_size(), &ss);
if (r) {
return r;
if (n > (int)p.get_pg_num_target()) {
int r = check_pg_num(pool, n, p.get_size(), &ss);
if (r) {
return r;
}
string force;
cmd_getval(cct,cmdmap, "force", force);
if (p.cache_mode != pg_pool_t::CACHEMODE_NONE &&
force != "--yes-i-really-mean-it") {
ss << "splits in cache pools must be followed by scrubs and leave sufficient free space to avoid overfilling. use --yes-i-really-mean-it to force.";
return -EPERM;
}
int expected_osds = std::min(p.get_pg_num(), osdmap.get_num_osds());
int64_t new_pgs = n - p.get_pg_num_target();
if (new_pgs > g_conf()->mon_osd_max_split_count * expected_osds) {
ss << "specified pg_num " << n << " is too large (creating "
<< new_pgs << " new PGs on ~" << expected_osds
<< " OSDs exceeds per-OSD max with mon_osd_max_split_count of "
<< g_conf()->mon_osd_max_split_count << ')';
return -E2BIG;
}
} else {
if (osdmap.require_osd_release < CEPH_RELEASE_NAUTILUS) {
ss << "nautilus OSDs are required to adjust pg_num_pending";
return -EPERM;
}
if (osdmap.require_min_compat_client > 0 &&
osdmap.require_min_compat_client < CEPH_RELEASE_MIMIC) {
ss << "require_min_compat_client "
<< ceph_release_name(osdmap.require_min_compat_client)
<< " < mimic, which is required for pg merging. "
<< "Try 'ceph osd set-require-min-compat-client mimic'.";
return -EPERM;
}
if (n < (int)p.get_pgp_num_target()) {
ss << "specified pg_num " << n
<< " < pgp_num " << p.get_pgp_num_target();
return -EINVAL;
}
}
string force;
cmd_getval(cct,cmdmap, "force", force);
if (p.cache_mode != pg_pool_t::CACHEMODE_NONE &&
force != "--yes-i-really-mean-it") {
ss << "splits in cache pools must be followed by scrubs and leave sufficient free space to avoid overfilling. use --yes-i-really-mean-it to force.";
return -EPERM;
}
int expected_osds = std::min(p.get_pg_num(), osdmap.get_num_osds());
int64_t new_pgs = n - p.get_pg_num();
if (new_pgs > g_conf()->mon_osd_max_split_count * expected_osds) {
ss << "specified pg_num " << n << " is too large (creating "
<< new_pgs << " new PGs on ~" << expected_osds
<< " OSDs exceeds per-OSD max with mon_osd_max_split_count of "
<< g_conf()->mon_osd_max_split_count << ')';
return -E2BIG;
}
p.set_pg_num(n);
// force pre-luminous clients to resend their ops, since they
// don't understand that split PGs now form a new interval.
p.last_force_op_resend_preluminous = pending_inc.epoch;
} else if (var == "pgp_num") {
// set target; mgr will adjust pg_num_actual later
p.set_pg_num_target(n);
} else if (var == "pgp_num_actual") {
if (p.has_flag(pg_pool_t::FLAG_NOPGCHANGE)) {
ss << "pool pgp_num change is disabled; you must unset nopgchange flag for the pool first";
return -EPERM;
@ -6801,7 +6861,30 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
ss << "specified pgp_num " << n << " > pg_num " << p.get_pg_num();
return -EINVAL;
}
if (n > (int)p.get_pg_num_pending()) {
ss << "specified pgp_num " << n
<< " > pg_num_pending " << p.get_pg_num_pending();
return -EINVAL;
}
p.set_pgp_num(n);
} else if (var == "pgp_num") {
if (p.has_flag(pg_pool_t::FLAG_NOPGCHANGE)) {
ss << "pool pgp_num change is disabled; you must unset nopgchange flag for the pool first";
return -EPERM;
}
if (interr.length()) {
ss << "error parsing integer value '" << val << "': " << interr;
return -EINVAL;
}
if (n <= 0) {
ss << "specified pgp_num must > 0, but you set to " << n;
return -EINVAL;
}
if (n > (int)p.get_pg_num_target()) {
ss << "specified pgp_num " << n << " > pg_num " << p.get_pg_num_target();
return -EINVAL;
}
p.set_pgp_num_target(n);
} else if (var == "crush_rule") {
int id = osdmap.crush->get_rule_id(val);
if (id == -ENOENT) {