mon/OSDMonitor: add 'osd [rm-]pg-remap[-items] ...' commands

Add commands to add and remove pg_remap mappings.

Require that a mon config option is set before this is allowed so that
users don't inadvertantly prevent older clients from interacting with the
cluster.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2017-03-13 11:59:07 -04:00
parent eebd9a1bc2
commit a6aa42f793
4 changed files with 221 additions and 0 deletions

View File

@ -282,6 +282,7 @@ OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get conce
OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create
OPTION(mon_osd_allow_primary_temp, OPT_BOOL, false) // allow primary_temp to be set in the osdmap
OPTION(mon_osd_allow_primary_affinity, OPT_BOOL, false) // allow primary_affinity to be set in the osdmap
OPTION(mon_osd_allow_pg_remap, OPT_BOOL, false) // allow pg remap to be set in the osdmap
OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes
OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming
OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT, .25) // max estimate of pg total before we do all pgs in parallel

View File

@ -676,6 +676,25 @@ COMMAND("osd pg-temp " \
"name=id,type=CephOsdName,n=N,req=false", \
"set pg_temp mapping pgid:[<id> [<id>...]] (developers only)", \
"osd", "rw", "cli,rest")
COMMAND("osd pg-remap " \
"name=pgid,type=CephPgid " \
"name=id,type=CephOsdName,n=N", \
"set pg_remap mapping <pgid>:[<id> [<id>...]] primary <primary> (developers only)", \
"osd", "rw", "cli,rest")
COMMAND("osd rm-pg-remap " \
"name=pgid,type=CephPgid", \
"clear pg_remap mapping for <pgid> (developers only)", \
"osd", "rw", "cli,rest")
COMMAND("osd pg-remap-items " \
"name=pgid,type=CephPgid " \
"name=id,type=CephOsdName,n=N", \
"set pg_remap_items mapping <pgid>:{<id> to <id>, [...]} (developers only)", \
"osd", "rw", "cli,rest")
COMMAND("osd rm-pg-remap-items " \
"name=pgid,type=CephPgid", \
"clear pg_remap_items mapping for <pgid> (developers only)", \
"osd", "rw", "cli,rest")
COMMAND("osd primary-temp " \
"name=pgid,type=CephPgid " \
"name=id,type=CephOsdName", \

View File

@ -7050,6 +7050,206 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
pending_inc.new_primary_temp[pgid] = osd;
ss << "set " << pgid << " primary_temp mapping to " << osd;
goto update;
} else if (prefix == "osd pg-remap") {
if (!g_conf->mon_osd_allow_pg_remap) {
ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster.";
err = -EPERM;
goto reply;
}
err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss);
if (err == -EAGAIN)
goto wait;
if (err < 0)
goto reply;
string pgidstr;
if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
ss << "unable to parse 'pgid' value '"
<< cmd_vartype_stringify(cmdmap["pgid"]) << "'";
err = -EINVAL;
goto reply;
}
pg_t pgid;
if (!pgid.parse(pgidstr.c_str())) {
ss << "invalid pgid '" << pgidstr << "'";
err = -EINVAL;
goto reply;
}
if (!osdmap.pg_exists(pgid)) {
ss << "pg " << pgid << " does not exist";
err = -ENOENT;
goto reply;
}
if (pending_inc.new_pg_remap.count(pgid) ||
pending_inc.old_pg_remap.count(pgid)) {
dout(10) << __func__ << " waiting for pending update on " << pgid << dendl;
wait_for_finished_proposal(op, new C_RetryMessage(this, op));
return true;
}
vector<int64_t> id_vec;
if (!cmd_getval(g_ceph_context, cmdmap, "id", id_vec)) {
ss << "unable to parse 'id' value(s) '"
<< cmd_vartype_stringify(cmdmap["id"]) << "'";
err = -EINVAL;
goto reply;
}
vector<int32_t> new_pg_remap;
for (auto osd : id_vec) {
if (osd != CRUSH_ITEM_NONE && !osdmap.exists(osd)) {
ss << "osd." << osd << " does not exist";
err = -ENOENT;
goto reply;
}
new_pg_remap.push_back(osd);
}
pending_inc.new_pg_remap[pgid] = new_pg_remap;
ss << "set " << pgid << " pg_remap mapping to " << new_pg_remap;
goto update;
} else if (prefix == "osd rm-pg-remap") {
if (!g_conf->mon_osd_allow_pg_remap) {
ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster.";
err = -EPERM;
goto reply;
}
err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss);
if (err == -EAGAIN)
goto wait;
if (err < 0)
goto reply;
string pgidstr;
if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
ss << "unable to parse 'pgid' value '"
<< cmd_vartype_stringify(cmdmap["pgid"]) << "'";
err = -EINVAL;
goto reply;
}
pg_t pgid;
if (!pgid.parse(pgidstr.c_str())) {
ss << "invalid pgid '" << pgidstr << "'";
err = -EINVAL;
goto reply;
}
if (!osdmap.pg_exists(pgid)) {
ss << "pg " << pgid << " does not exist";
err = -ENOENT;
goto reply;
}
if (pending_inc.new_pg_remap.count(pgid) ||
pending_inc.old_pg_remap.count(pgid)) {
dout(10) << __func__ << " waiting for pending update on " << pgid << dendl;
wait_for_finished_proposal(op, new C_RetryMessage(this, op));
return true;
}
pending_inc.old_pg_remap.insert(pgid);
ss << "clear " << pgid << " pg_remap mapping";
goto update;
} else if (prefix == "osd pg-remap-items") {
if (!g_conf->mon_osd_allow_pg_remap) {
ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster.";
err = -EPERM;
goto reply;
}
err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss);
if (err == -EAGAIN)
goto wait;
if (err < 0)
goto reply;
string pgidstr;
if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
ss << "unable to parse 'pgid' value '"
<< cmd_vartype_stringify(cmdmap["pgid"]) << "'";
err = -EINVAL;
goto reply;
}
pg_t pgid;
if (!pgid.parse(pgidstr.c_str())) {
ss << "invalid pgid '" << pgidstr << "'";
err = -EINVAL;
goto reply;
}
if (!osdmap.pg_exists(pgid)) {
ss << "pg " << pgid << " does not exist";
err = -ENOENT;
goto reply;
}
if (pending_inc.new_pg_remap_items.count(pgid) ||
pending_inc.old_pg_remap_items.count(pgid)) {
dout(10) << __func__ << " waiting for pending update on " << pgid << dendl;
wait_for_finished_proposal(op, new C_RetryMessage(this, op));
return true;
}
vector<int64_t> id_vec;
if (!cmd_getval(g_ceph_context, cmdmap, "id", id_vec)) {
ss << "unable to parse 'id' value(s) '"
<< cmd_vartype_stringify(cmdmap["id"]) << "'";
err = -EINVAL;
goto reply;
}
if (id_vec.size() % 2) {
ss << "you must specify pairs of osd ids to be remapped";
err = -EINVAL;
goto reply;
}
vector<pair<int32_t,int32_t>> new_pg_remap_items;
for (auto p = id_vec.begin(); p != id_vec.end(); ++p) {
int from = *p++;
int to = *p;
if (!osdmap.exists(from)) {
ss << "osd." << from << " does not exist";
err = -ENOENT;
goto reply;
}
if (to != CRUSH_ITEM_NONE && !osdmap.exists(to)) {
ss << "osd." << to << " does not exist";
err = -ENOENT;
goto reply;
}
new_pg_remap_items.push_back(make_pair(from, to));
}
pending_inc.new_pg_remap_items[pgid] = new_pg_remap_items;
ss << "set " << pgid << " pg_remap_items mapping to " << new_pg_remap_items;
goto update;
} else if (prefix == "osd rm-pg-remap-items") {
if (!g_conf->mon_osd_allow_pg_remap) {
ss << "you must enable 'mon osd allow pg remap = true' on the mons before you can adjust pg_remap. note that pre-luminous clients will no longer be able to communicate with the cluster.";
err = -EPERM;
goto reply;
}
err = check_cluster_features(CEPH_FEATUREMASK_OSDMAP_REMAP, ss);
if (err == -EAGAIN)
goto wait;
if (err < 0)
goto reply;
string pgidstr;
if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
ss << "unable to parse 'pgid' value '"
<< cmd_vartype_stringify(cmdmap["pgid"]) << "'";
err = -EINVAL;
goto reply;
}
pg_t pgid;
if (!pgid.parse(pgidstr.c_str())) {
ss << "invalid pgid '" << pgidstr << "'";
err = -EINVAL;
goto reply;
}
if (!osdmap.pg_exists(pgid)) {
ss << "pg " << pgid << " does not exist";
err = -ENOENT;
goto reply;
}
if (pending_inc.new_pg_remap_items.count(pgid) ||
pending_inc.old_pg_remap_items.count(pgid)) {
dout(10) << __func__ << " waiting for pending update on " << pgid << dendl;
wait_for_finished_proposal(op, new C_RetryMessage(this, op));
return true;
}
pending_inc.old_pg_remap_items.insert(pgid);
ss << "clear " << pgid << " pg_remap_items mapping";
goto update;
} else if (prefix == "osd primary-affinity") {
int64_t id;
if (!cmd_getval(g_ceph_context, cmdmap, "id", id)) {

View File

@ -472,6 +472,7 @@ $extra_conf
[mon]
mon pg warn min per osd = 3
mon osd allow primary affinity = true
mon osd allow pg remap = true
mon reweight min pgs per osd = 4
mon osd prime pg temp = true
crushtool = $CEPH_BIN/crushtool